In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn

In [3]:
np.random.seed(101)

In [4]:
df = pd.DataFrame(randn(5,4), ['A','B','C','D','E'],['W','X','Y','Z'])

In [5]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [6]:
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [7]:
type(df['W'])

pandas.core.series.Series

In [8]:
df.W

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [9]:
df['X']

A    0.628133
B   -0.319318
C    0.740122
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [10]:
df[['W','Z']]

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [11]:
df['new'] = df['X'] + df['Z']

In [12]:
df['new']

A    1.131958
B    0.286647
C    0.151122
D    0.196184
E    2.662266
Name: new, dtype: float64

In [13]:
df.drop('new', axis=1)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [14]:
df.drop('E', axis=0)

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,1.131958
B,0.651118,-0.319318,-0.848077,0.605965,0.286647
C,-2.018168,0.740122,0.528813,-0.589001,0.151122
D,0.188695,-0.758872,-0.933237,0.955057,0.196184


In [15]:
df.loc['A']

W      2.706850
X      0.628133
Y      0.907969
Z      0.503826
new    1.131958
Name: A, dtype: float64

In [16]:
df.iloc[2]

W     -2.018168
X      0.740122
Y      0.528813
Z     -0.589001
new    0.151122
Name: C, dtype: float64

In [17]:
df.loc['B','Y']

-0.8480769834036315

In [18]:
df.loc[['A','B'], ['X', 'Y']]

Unnamed: 0,X,Y
A,0.628133,0.907969
B,-0.319318,-0.848077


In [19]:
df > 0

Unnamed: 0,W,X,Y,Z,new
A,True,True,True,True,True
B,True,False,False,True,True
C,False,True,True,False,True
D,True,False,False,True,True
E,True,True,True,True,True


In [20]:
booldf = df > 0.1

In [21]:
df[booldf]

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,1.131958
B,0.651118,,,0.605965,0.286647
C,,0.740122,0.528813,,0.151122
D,0.188695,,,0.955057,0.196184
E,0.190794,1.978757,2.605967,0.683509,2.662266


In [22]:
df['W']>0

A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool

In [24]:
df[df['W']>0]

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,1.131958
B,0.651118,-0.319318,-0.848077,0.605965,0.286647
D,0.188695,-0.758872,-0.933237,0.955057,0.196184
E,0.190794,1.978757,2.605967,0.683509,2.662266


In [25]:
df[df['Z'] < 0]

Unnamed: 0,W,X,Y,Z,new
C,-2.018168,0.740122,0.528813,-0.589001,0.151122


In [26]:
df[df['W'] > 0]['X']

A    0.628133
B   -0.319318
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [28]:
df[(df['W']> 0) & (df['Y']> 1)]

Unnamed: 0,W,X,Y,Z,new
E,0.190794,1.978757,2.605967,0.683509,2.662266


In [29]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z,new
0,A,2.70685,0.628133,0.907969,0.503826,1.131958
1,B,0.651118,-0.319318,-0.848077,0.605965,0.286647
2,C,-2.018168,0.740122,0.528813,-0.589001,0.151122
3,D,0.188695,-0.758872,-0.933237,0.955057,0.196184
4,E,0.190794,1.978757,2.605967,0.683509,2.662266


In [30]:
newind = 'CA CD FG DF RG'.split()

In [31]:
newind

['CA', 'CD', 'FG', 'DF', 'RG']

In [32]:
df['states'] = newind

In [33]:
df

Unnamed: 0,W,X,Y,Z,new,states
A,2.70685,0.628133,0.907969,0.503826,1.131958,CA
B,0.651118,-0.319318,-0.848077,0.605965,0.286647,CD
C,-2.018168,0.740122,0.528813,-0.589001,0.151122,FG
D,0.188695,-0.758872,-0.933237,0.955057,0.196184,DF
E,0.190794,1.978757,2.605967,0.683509,2.662266,RG


In [34]:
df.drop('new', axis=1)

Unnamed: 0,W,X,Y,Z,states
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,CD
C,-2.018168,0.740122,0.528813,-0.589001,FG
D,0.188695,-0.758872,-0.933237,0.955057,DF
E,0.190794,1.978757,2.605967,0.683509,RG


In [35]:
df

Unnamed: 0,W,X,Y,Z,new,states
A,2.70685,0.628133,0.907969,0.503826,1.131958,CA
B,0.651118,-0.319318,-0.848077,0.605965,0.286647,CD
C,-2.018168,0.740122,0.528813,-0.589001,0.151122,FG
D,0.188695,-0.758872,-0.933237,0.955057,0.196184,DF
E,0.190794,1.978757,2.605967,0.683509,2.662266,RG


In [36]:
df.set_index('states')

Unnamed: 0_level_0,W,X,Y,Z,new
states,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CA,2.70685,0.628133,0.907969,0.503826,1.131958
CD,0.651118,-0.319318,-0.848077,0.605965,0.286647
FG,-2.018168,0.740122,0.528813,-0.589001,0.151122
DF,0.188695,-0.758872,-0.933237,0.955057,0.196184
RG,0.190794,1.978757,2.605967,0.683509,2.662266


In [37]:
df

Unnamed: 0,W,X,Y,Z,new,states
A,2.70685,0.628133,0.907969,0.503826,1.131958,CA
B,0.651118,-0.319318,-0.848077,0.605965,0.286647,CD
C,-2.018168,0.740122,0.528813,-0.589001,0.151122,FG
D,0.188695,-0.758872,-0.933237,0.955057,0.196184,DF
E,0.190794,1.978757,2.605967,0.683509,2.662266,RG


In [39]:
# Index Levels
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside, inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [41]:
hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )