In [2]:
import numpy as np
import pandas as pd

In [3]:
np.random.seed(0)
df = pd.DataFrame(data=np.random.normal(size=(3,5)),
                  index=['X','Y','Z'],
                  columns=['aa','bb','cc','dd','ee'])

In [4]:
df

Unnamed: 0,aa,bb,cc,dd,ee
X,1.764052,0.400157,0.978738,2.240893,1.867558
Y,-0.977278,0.950088,-0.151357,-0.103219,0.410599
Z,0.144044,1.454274,0.761038,0.121675,0.443863


In [5]:
#how to access a column in a DataFrame
df['aa']

X    1.764052
Y   -0.977278
Z    0.144044
Name: aa, dtype: float64

In [6]:
df['cc']

X    0.978738
Y   -0.151357
Z    0.761038
Name: cc, dtype: float64

In [7]:
#pass a list of columns
df[['aa','cc']]

Unnamed: 0,aa,cc
X,1.764052,0.978738
Y,-0.977278,-0.151357
Z,0.144044,0.761038


In [8]:
#how to access a row of data (explicit)
df.loc['X']

aa    1.764052
bb    0.400157
cc    0.978738
dd    2.240893
ee    1.867558
Name: X, dtype: float64

In [9]:
#how to access a row of data (implicit/positional)
df.iloc[0]

aa    1.764052
bb    0.400157
cc    0.978738
dd    2.240893
ee    1.867558
Name: X, dtype: float64

In [10]:
#multiple rows of data back in query 
# .loc is for row notation
df.loc[['X','Y','Z']]

Unnamed: 0,aa,bb,cc,dd,ee
X,1.764052,0.400157,0.978738,2.240893,1.867558
Y,-0.977278,0.950088,-0.151357,-0.103219,0.410599
Z,0.144044,1.454274,0.761038,0.121675,0.443863


In [11]:
#The following are equivilant

In [12]:
df['aa']['Z']

0.144043571160878

In [13]:
df['aa'].loc['Z']

0.144043571160878

In [14]:
df.loc['Z']['aa']

0.144043571160878

In [15]:
df.loc['Z','aa']

0.144043571160878

In [16]:
df.index.names

FrozenList([None])

In [17]:
#add a name to the index
df.index.names = ['group']

In [18]:
df

Unnamed: 0_level_0,aa,bb,cc,dd,ee
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
X,1.764052,0.400157,0.978738,2.240893,1.867558
Y,-0.977278,0.950088,-0.151357,-0.103219,0.410599
Z,0.144044,1.454274,0.761038,0.121675,0.443863


In [19]:
#reset index to 0-n 

In [20]:
df.reset_index()

Unnamed: 0,group,aa,bb,cc,dd,ee
0,X,1.764052,0.400157,0.978738,2.240893,1.867558
1,Y,-0.977278,0.950088,-0.151357,-0.103219,0.410599
2,Z,0.144044,1.454274,0.761038,0.121675,0.443863


In [21]:
#replace index inplace
df.reset_index(drop=True,inplace=True)

In [22]:
df

Unnamed: 0,aa,bb,cc,dd,ee
0,1.764052,0.400157,0.978738,2.240893,1.867558
1,-0.977278,0.950088,-0.151357,-0.103219,0.410599
2,0.144044,1.454274,0.761038,0.121675,0.443863


In [23]:
#define a new column and name it 'new'
df['new']=['P','Q','R']

In [24]:
df

Unnamed: 0,aa,bb,cc,dd,ee,new
0,1.764052,0.400157,0.978738,2.240893,1.867558,P
1,-0.977278,0.950088,-0.151357,-0.103219,0.410599,Q
2,0.144044,1.454274,0.761038,0.121675,0.443863,R


In [25]:
df.set_index('new',inplace=True)

In [26]:
df

Unnamed: 0_level_0,aa,bb,cc,dd,ee
new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
P,1.764052,0.400157,0.978738,2.240893,1.867558
Q,-0.977278,0.950088,-0.151357,-0.103219,0.410599
R,0.144044,1.454274,0.761038,0.121675,0.443863


In [34]:
#Change the name of a column (rename method)
#Rename Index/row P to XX, Q to YY..etc
df.rename(index={'P':'XX','Q':'YY','R':'ZZ'},columns={'aa':'AA','bb':'BB'},inplace=True)

In [35]:
df

Unnamed: 0_level_0,AA,BB,cc,dd,ee
new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
XX,1.764052,0.400157,0.978738,2.240893,1.867558
YY,-0.977278,0.950088,-0.151357,-0.103219,0.410599
ZZ,0.144044,1.454274,0.761038,0.121675,0.443863


In [None]:
#Hierarchical indixies 


In [40]:
#multi-index DataFrame
np.random.seed(0)
df = pd.DataFrame(data=np.random.randint(-100,100,(4,5)),
                  index=pd.MultiIndex.from_product([['X','Y'],[1,2]]),
                  columns=['A','B','C','D','E'])

df

Unnamed: 0,Unnamed: 1,A,B,C,D,E
X,1,72,-53,17,92,-33
X,2,95,3,-91,-79,-64
Y,1,-13,-30,-12,40,-42
Y,2,93,-61,-13,74,-12


In [46]:
df.loc['X']

Unnamed: 0,A,B,C,D,E
1,72,-53,17,92,-33
2,95,3,-91,-79,-64


In [44]:
#how to index into this dataframe
#alt
# (df.loc['X'])['A']

df.loc['X']['A']

1    72
2    95
Name: A, dtype: int64

In [45]:
#retrive the element in row/index 1
df.loc['X']['A'].loc[1]

72

In [49]:
#equivilant to above

In [50]:
df.loc['X'].loc[1]

A    72
B   -53
C    17
D    92
E   -33
Name: 1, dtype: int64

In [48]:
df.loc['X'].loc[1]['A']

72

In [51]:
#rename index names
df.index.names

FrozenList([None, None])

In [52]:
df.index.names = ['Groups','Number']

In [53]:
df.index.names

FrozenList(['Groups', 'Number'])

In [54]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C,D,E
Groups,Number,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
X,1,72,-53,17,92,-33
X,2,95,3,-91,-79,-64
Y,1,-13,-30,-12,40,-42
Y,2,93,-61,-13,74,-12


In [55]:
#cross section indexing (get to sublevels assocated with diffrent levels)

In [56]:
df.xs('X',level='Groups')

Unnamed: 0_level_0,A,B,C,D,E
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,72,-53,17,92,-33
2,95,3,-91,-79,-64


In [57]:
df.xs(1,level='Number')

Unnamed: 0_level_0,A,B,C,D,E
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
X,72,-53,17,92,-33
Y,-13,-30,-12,40,-42
