In [108]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [109]:
np.random.seed(101)

In [110]:
df = pd.DataFrame(randn(5,4),['A','B','C','D','E'],['W','X','Y','Z'])

In [111]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [112]:
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [113]:
type(df['W'])

pandas.core.series.Series

In [114]:
type(df)

pandas.core.frame.DataFrame

In [115]:
df.W

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [116]:
df[['W','Y']]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077
C,-2.018168,0.528813
D,0.188695,-0.933237
E,0.190794,2.605967


In [117]:
df['W + Y'] = df['W'] + df['Y']

In [118]:
df

Unnamed: 0,W,X,Y,Z,W + Y
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [119]:
df.drop('W + Y',1)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [120]:
df

Unnamed: 0,W,X,Y,Z,W + Y
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [121]:
dropped_new = df.drop('W + Y',1)

In [122]:
dropped_new

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [123]:
df['W + Y'] = df['W'] + df['Y']

In [124]:
df.drop('W + Y',1,inplace = True)

In [125]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [126]:
df.drop('E',0,inplace=True)

In [127]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [128]:
df.shape

(4, 4)

In [129]:
df.loc['D']

W    0.188695
X   -0.758872
Y   -0.933237
Z    0.955057
Name: D, dtype: float64

In [130]:
df.iloc[2] # uses index of row


W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [131]:
df.iloc[2,1:3] # selecting chunks of data (row,column)

X    0.740122
Y    0.528813
Name: C, dtype: float64

In [132]:
df.loc[['A','B'],['Y','Z']]

Unnamed: 0,Y,Z
A,0.907969,0.503826
B,-0.848077,0.605965


In [133]:
df.iloc[1:3,0:2]

Unnamed: 0,W,X
B,0.651118,-0.319318
C,-2.018168,0.740122


In [134]:
# conditional selection

In [135]:
df>0

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True


In [136]:
booldf = df>0

In [137]:
df[booldf] # null for false

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057


In [138]:
df['W']>0

A     True
B     True
C    False
D     True
Name: W, dtype: bool

In [139]:
df[df['W']>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057


In [140]:
df[df['Y']>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
C,-2.018168,0.740122,0.528813,-0.589001


In [141]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [142]:
df[(df['W']>2) & (df['Z']<1)] # & for and

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826


In [143]:
df[(df['W']>2) | (df['Z']<0.5)] # |(pipe) for or

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
C,-2.018168,0.740122,0.528813,-0.589001


In [144]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [145]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057


In [146]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [147]:
df.reset_index(inplace=True)

In [148]:
df

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057


In [149]:
newind = 'JAIPUR MUMBAI DELHI BANGLORE'.split()

In [150]:
df['CITIES'] = newind

In [151]:
df

Unnamed: 0,index,W,X,Y,Z,CITIES
0,A,2.70685,0.628133,0.907969,0.503826,JAIPUR
1,B,0.651118,-0.319318,-0.848077,0.605965,MUMBAI
2,C,-2.018168,0.740122,0.528813,-0.589001,DELHI
3,D,0.188695,-0.758872,-0.933237,0.955057,BANGLORE


In [152]:
df.set_index('CITIES')

Unnamed: 0_level_0,index,W,X,Y,Z
CITIES,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
JAIPUR,A,2.70685,0.628133,0.907969,0.503826
MUMBAI,B,0.651118,-0.319318,-0.848077,0.605965
DELHI,C,-2.018168,0.740122,0.528813,-0.589001
BANGLORE,D,0.188695,-0.758872,-0.933237,0.955057


In [153]:
df

Unnamed: 0,index,W,X,Y,Z,CITIES
0,A,2.70685,0.628133,0.907969,0.503826,JAIPUR
1,B,0.651118,-0.319318,-0.848077,0.605965,MUMBAI
2,C,-2.018168,0.740122,0.528813,-0.589001,DELHI
3,D,0.188695,-0.758872,-0.933237,0.955057,BANGLORE
