In [1]:
import pandas as pd
import numpy as np
#A data frame is a 2-d  object like that of SQL rows which has both index and columns
# the columns can be of different data types

In [3]:
#index would be union of all indexes of the series
d={'one':pd.Series([1.,2.,3.,4.,5.],index=['a','b','c','d','e']),
  'two':pd.Series([1.,2.,3.,4.],index=['a','b','c','d'])}
df=pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,4.0,4.0
e,5.0,


In [4]:
#creating a dataframe which will include those indexes and columns which we specified.
#if that index or column does not exist they will be replaced by NaN values
df=pd.DataFrame(d,index=['a','b','j'],columns=['two','three','one'])
df

Unnamed: 0,two,three,one
a,1.0,,1.0
b,2.0,,2.0
j,,,


In [5]:
#to access the index of DataFrame
df.index

Index(['a', 'b', 'j'], dtype='object')

In [6]:
#to access all the column names of the DataFrame
df.columns

Index(['two', 'three', 'one'], dtype='object')

In [7]:
#from a list of dictionaries
# the keys will act as the name of columns
#if index is not passed the it will be range(len(list))
a=[{'a':1.,'b':2.},{'a':7.,'b':5.,'c':8.}]
df=pd.DataFrame(a)
df

Unnamed: 0,a,b,c
0,1.0,2.0,
1,7.0,5.0,8.0


In [8]:
#fillna will replace the value given by you to fill NA values
df.fillna(0,inplace=True)
df

Unnamed: 0,a,b,c
0,1.0,2.0,0.0
1,7.0,5.0,8.0


In [13]:
#slicing the dataframe as an array
df[1:]

Unnamed: 0,a,b,c
1,7.0,5.0,8.0


In [14]:
#broadcasting if a new column is entered broadcasted to all new values
df['d']=9.
df

Unnamed: 0,a,b,c,d
0,1.0,2.0,0.0,9.0
1,7.0,5.0,8.0,9.0


In [9]:
#to create a new column
df['flag']=df['c']>7
df

Unnamed: 0,a,b,c,flag
0,1.0,2.0,0.0,False
1,7.0,5.0,8.0,True


In [10]:
#to pop a column from the DataFrame
c=df.pop('c')
c

0    0.0
1    8.0
Name: c, dtype: float64

In [13]:
#to create a new row by using assign method
#assign will always return a copy of data leaving the dataframe untouched
df.assign(mul=df['a']*df['b']).head()

Unnamed: 0,a,b,flag,mul
0,1.0,2.0,False,2.0
1,7.0,5.0,True,35.0


In [14]:
df

Unnamed: 0,a,b,flag
0,1.0,2.0,False
1,7.0,5.0,True


# Indexing and selecting

In [19]:
#selecting a column of a data frame, it returns a series object
df['b']

0    2.0
1    5.0
Name: b, dtype: float64

In [23]:
#to obtain a row
#iloc attribute will select a row based on the index position of the row
df.iloc[0]

a           1
b           2
flag    False
Name: 0, dtype: object


In [26]:
#to obtain a row
#loc attribute will select a row based on the label of the row
df.loc[0]


a           1
b           2
flag    False
Name: 0, dtype: object

## Data Alignment 

In [None]:
#whenever there is an operation of two data frame the resulting dataframe will union of both data frames on both columns and index

In [30]:
#here the resulting data frame will be summation of cells on the two data Frame
df1=pd.DataFrame(np.random.randn(10,4),columns=['A','B','C','D'])
df2=pd.DataFrame(np.random.randn(6,3),columns=['A','B','C'])
result_df=df1+df2
result_df.fillna("No val")

Unnamed: 0,A,B,C,D
0,1.37692,-0.730318,-1.27273,No val
1,0.33791,-0.166968,-0.990298,No val
2,0.745035,-0.610629,-1.07949,No val
3,0.156646,0.865527,1.62929,No val
4,0.250117,0.664221,-1.10676,No val
5,0.367447,-0.98952,-0.509197,No val
6,No val,No val,No val,No val
7,No val,No val,No val,No val
8,No val,No val,No val,No val
9,No val,No val,No val,No val


In [33]:
#operation of a DataFrame and a Series
# a row wise operation will be performed
df1-df1.iloc[0]

Unnamed: 0,A,B,C,D
0,0.0,0.0,0.0,0.0
1,1.015746,0.84376,0.352826,0.131092
2,2.52045,-0.204231,-0.366043,0.76155
3,0.759447,0.9109,1.219377,-0.847369
4,2.223851,0.7775,-1.257892,-0.577762
5,-0.82632,0.377144,-0.816762,1.895662
6,1.82529,0.291107,-0.359088,1.322551
7,0.6532,0.678282,0.296194,0.280151
8,1.611732,-0.670359,1.177141,0.125914
9,-0.589415,0.581705,-0.43872,-0.102474


# Scalar Operations on DataFrame


In [35]:
#the square operation will go on every cell in a row fashion
result_df**2

Unnamed: 0,A,B,C,D
0,1.895899,0.533365,1.619833,
1,0.114184,0.027878,0.980689,
2,0.555077,0.372868,1.165304,
3,0.024538,0.749136,2.654588,
4,0.062559,0.441189,1.224918,
5,0.135017,0.979149,0.259282,
6,,,,
7,,,,
8,,,,
9,,,,


In [37]:
result_df

Unnamed: 0,A,B,C,D
0,1.376916,-0.730318,-1.272727,
1,0.33791,-0.166968,-0.990298,
2,0.745035,-0.610629,-1.079493,
3,0.156646,0.865527,1.629291,
4,0.250117,0.664221,-1.10676,
5,0.367447,-0.98952,-0.509197,
6,,,,
7,,,,
8,,,,
9,,,,


In [38]:
#transpose of an DataFrame
result_df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
A,1.376916,0.33791,0.745035,0.156646,0.250117,0.367447,,,,
B,-0.730318,-0.166968,-0.610629,0.865527,0.664221,-0.98952,,,,
C,-1.272727,-0.990298,-1.079493,1.629291,-1.10676,-0.509197,,,,
D,,,,,,,,,,
