## Pandas - Data Frame

In [43]:
import numpy as np
from numpy.random import randn

import pandas as pd

In [44]:
np.random.seed(101) # seed ensures that we are getting same random number everytime

In [45]:
randn(10)

array([ 2.70684984,  0.62813271,  0.90796945,  0.50382575,  0.65111795,
       -0.31931804, -0.84807698,  0.60596535, -2.01816824,  0.74012206])

In [46]:
df = pd.DataFrame(randn(5, 4), ['A', 'B', 'C', 'D', 'E'], ['W', 'X', 'Y', 'Z'])
df

Unnamed: 0,W,X,Y,Z
A,0.528813,-0.589001,0.188695,-0.758872
B,-0.933237,0.955057,0.190794,1.978757
C,2.605967,0.683509,0.302665,1.693723
D,-1.706086,-1.159119,-0.134841,0.390528
E,0.166905,0.184502,0.807706,0.07296


In [47]:
df[df * 100 > 40]

Unnamed: 0,W,X,Y,Z
A,0.528813,,,
B,,0.955057,,1.978757
C,2.605967,0.683509,,1.693723
D,,,,
E,,,0.807706,


In [48]:
df

Unnamed: 0,W,X,Y,Z
A,0.528813,-0.589001,0.188695,-0.758872
B,-0.933237,0.955057,0.190794,1.978757
C,2.605967,0.683509,0.302665,1.693723
D,-1.706086,-1.159119,-0.134841,0.390528
E,0.166905,0.184502,0.807706,0.07296


In [49]:
df[df['W'] > 0] # filter the rows based on value of column W

Unnamed: 0,W,X,Y,Z
A,0.528813,-0.589001,0.188695,-0.758872
C,2.605967,0.683509,0.302665,1.693723
E,0.166905,0.184502,0.807706,0.07296


In [50]:
df[df['W']>0][['X','Z']]

Unnamed: 0,X,Z
A,-0.589001,-0.758872
C,0.683509,1.693723
E,0.184502,0.07296


In [51]:
df[(df['W']>0) & (df['W']<1)] # combines the index for both conditions

Unnamed: 0,W,X,Y,Z
A,0.528813,-0.589001,0.188695,-0.758872
E,0.166905,0.184502,0.807706,0.07296


In [52]:
df[(df['W']>0) | (df['W']<1)] # combines the index for both conditions

Unnamed: 0,W,X,Y,Z
A,0.528813,-0.589001,0.188695,-0.758872
B,-0.933237,0.955057,0.190794,1.978757
C,2.605967,0.683509,0.302665,1.693723
D,-1.706086,-1.159119,-0.134841,0.390528
E,0.166905,0.184502,0.807706,0.07296


In [53]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,0.528813,-0.589001,0.188695,-0.758872
1,B,-0.933237,0.955057,0.190794,1.978757
2,C,2.605967,0.683509,0.302665,1.693723
3,D,-1.706086,-1.159119,-0.134841,0.390528
4,E,0.166905,0.184502,0.807706,0.07296


In [54]:
states = 'BH TS MH DL TN'.split()
df['States'] = states
df

Unnamed: 0,W,X,Y,Z,States
A,0.528813,-0.589001,0.188695,-0.758872,BH
B,-0.933237,0.955057,0.190794,1.978757,TS
C,2.605967,0.683509,0.302665,1.693723,MH
D,-1.706086,-1.159119,-0.134841,0.390528,DL
E,0.166905,0.184502,0.807706,0.07296,TN


In [55]:
df.set_index('States')

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BH,0.528813,-0.589001,0.188695,-0.758872
TS,-0.933237,0.955057,0.190794,1.978757
MH,2.605967,0.683509,0.302665,1.693723
DL,-1.706086,-1.159119,-0.134841,0.390528
TN,0.166905,0.184502,0.807706,0.07296
