In [1]:
import numpy as np
import pandas as pd

# Data Structures

**Series**

In [8]:
#1. From Python dict
d = {"b": 1, "a": 0, "c": 2}
ser = pd.Series(d)
print(ser)
print("-"*10)
ser = pd.Series(d,index=[1,2,3])  #changing the index
print(ser)

b    1
a    0
c    2
dtype: int64
----------
1   NaN
2   NaN
3   NaN
dtype: float64


In [22]:
#2. From a ndarray
ser = pd.Series(np.random.rand(3))
print(ser)
print("-"*20)
ser = pd.Series(np.random.randint(3,6),index=['a','b','c'])  #changing the index
print(ser)
print("-"*20)
ser = pd.Series(np.arange(1,4),index=['i','ii','iii'])  #changing the index
print(ser)
print("-"*20)
ser = pd.Series(np.linspace(10,30,3,dtype=np.int64),index=['a','b','c'])  #changing the index
print(ser)

0    0.384911
1    0.253597
2    0.005478
dtype: float64
--------------------
a    5
b    5
c    5
dtype: int64
--------------------
i      1
ii     2
iii    3
dtype: int32
--------------------
a    10
b    20
c    30
dtype: int64


In [23]:
#3. From scalar value
ser = pd.Series(5,index=['a','b','c'])
print(ser)

a    5
b    5
c    5
dtype: int64


**DataFrame**

In [37]:
#1. From dict of Series or dicts
data =  {

    "one": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),

    "two": pd.Series([1.0, 2.0, 3.0, 4.0], index=["a", "b", "c", "d"]),

}
df = pd.DataFrame(data)
print(df)



   one  two
a  1.0  1.0
b  2.0  2.0
c  3.0  3.0
d  NaN  4.0


In [38]:
pd.DataFrame(data,index=['d','b']) 

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0


In [39]:
pd.DataFrame(data,index=[1,2])  #index not in series, so NaN

Unnamed: 0,one,two
1,,
2,,


In [40]:
#2. From dict of ndarrays / lists
data2 = { "one" : [1,2,3,4,5,6], "two" : [7,8,9,10,15,85]}
df = pd.DataFrame(data2)
df

Unnamed: 0,one,two
0,1,7
1,2,8
2,3,9
3,4,10
4,5,15
5,6,85


In [41]:
df = pd.DataFrame(data2,index=['A','B','C','D','E','F'])
df

Unnamed: 0,one,two
A,1,7
B,2,8
C,3,9
D,4,10
E,5,15
F,6,85


In [42]:
#3. From a list of dict
data3 = [{"a": 1, "b": 2}, {"a": 5, "b": 10, "c": 20}]
df = pd.DataFrame(data3)
df

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [43]:
df = pd.DataFrame(data3,index = ['first','second'])
df

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [51]:
#. creating a dataframe from numpy array
df = pd.DataFrame(np.random.rand(334,5))
df

Unnamed: 0,0,1,2,3,4
0,0.820743,0.536271,0.489139,0.529237,0.537090
1,0.549386,0.192695,0.037779,0.419713,0.265816
2,0.743429,0.441398,0.225539,0.375857,0.216105
3,0.936987,0.891443,0.334712,0.513098,0.543696
4,0.938679,0.889394,0.751702,0.670769,0.897326
...,...,...,...,...,...
329,0.933062,0.648270,0.264935,0.418503,0.010165
330,0.623991,0.165480,0.404062,0.047682,0.101788
331,0.343985,0.696523,0.734061,0.948698,0.147410
332,0.230865,0.776196,0.383726,0.906194,0.786889


**Column selection, addition**

In [55]:
df = pd.DataFrame(np.random.rand(334,5))
df

Unnamed: 0,0,1,2,3,4
0,0.104250,0.144081,0.198300,0.005364,0.330420
1,0.411225,0.573126,0.051102,0.915293,0.871803
2,0.751415,0.809976,0.608244,0.340157,0.402730
3,0.672693,0.735839,0.652392,0.856054,0.308826
4,0.122839,0.182127,0.768112,0.997602,0.084120
...,...,...,...,...,...
329,0.109809,0.177133,0.750626,0.997248,0.672627
330,0.807626,0.582918,0.915587,0.181208,0.294604
331,0.509588,0.845987,0.281521,0.870310,0.992032
332,0.688623,0.828162,0.102077,0.546514,0.698228


In [62]:
#1. selecting columns
df[1]

0      0.144081
1      0.573126
2      0.809976
3      0.735839
4      0.182127
         ...   
329    0.177133
330    0.582918
331    0.845987
332    0.828162
333    0.621144
Name: 1, Length: 334, dtype: float64

In [63]:
df[2]

0      0.198300
1      0.051102
2      0.608244
3      0.652392
4      0.768112
         ...   
329    0.750626
330    0.915587
331    0.281521
332    0.102077
333    0.198406
Name: 2, Length: 334, dtype: float64

In [64]:
df[0]

0      0.104250
1      0.411225
2      0.751415
3      0.672693
4      0.122839
         ...   
329    0.109809
330    0.807626
331    0.509588
332    0.688623
333    0.708810
Name: 0, Length: 334, dtype: float64

In [None]:
#2. deleting or popping a cloumn
del df[3]
df

In [67]:
df

Unnamed: 0,0,1,2,4
0,0.104250,0.144081,0.198300,0.330420
1,0.411225,0.573126,0.051102,0.871803
2,0.751415,0.809976,0.608244,0.402730
3,0.672693,0.735839,0.652392,0.308826
4,0.122839,0.182127,0.768112,0.084120
...,...,...,...,...
329,0.109809,0.177133,0.750626,0.672627
330,0.807626,0.582918,0.915587,0.294604
331,0.509588,0.845987,0.281521,0.992032
332,0.688623,0.828162,0.102077,0.698228


In [68]:
#3. addition of columns
df["new"] = 10

In [69]:
df

Unnamed: 0,0,1,2,4,new
0,0.104250,0.144081,0.198300,0.330420,10
1,0.411225,0.573126,0.051102,0.871803,10
2,0.751415,0.809976,0.608244,0.402730,10
3,0.672693,0.735839,0.652392,0.308826,10
4,0.122839,0.182127,0.768112,0.084120,10
...,...,...,...,...,...
329,0.109809,0.177133,0.750626,0.672627,10
330,0.807626,0.582918,0.915587,0.294604,10
331,0.509588,0.845987,0.281521,0.992032,10
332,0.688623,0.828162,0.102077,0.698228,10


In [70]:
'''for more, you can refer to the pandas doc
https://pandas.pydata.org/docs/user_guide/dsintro.html'''

'for more, you can refer to the pandas doc\nhttps://pandas.pydata.org/docs/user_guide/dsintro.html'