In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(0)
df = pd.DataFrame(data=np.random.normal(size=(3,5)),
                  index=['X','Y','Z'],
                  columns=['aa','bb','cc','dd','ee'])

In [3]:
df

Unnamed: 0,aa,bb,cc,dd,ee
X,1.764052,0.400157,0.978738,2.240893,1.867558
Y,-0.977278,0.950088,-0.151357,-0.103219,0.410599
Z,0.144044,1.454274,0.761038,0.121675,0.443863


In [7]:
#how to access a column in a DataFrame
df['aa']

X    1.764052
Y   -0.977278
Z    0.144044
Name: aa, dtype: float64

In [5]:
df['cc']

X    0.978738
Y   -0.151357
Z    0.761038
Name: cc, dtype: float64

In [6]:
#pass a list of columns
df[['aa','cc']]

Unnamed: 0,aa,cc
X,1.764052,0.978738
Y,-0.977278,-0.151357
Z,0.144044,0.761038


In [8]:
#how to access a row of data (explicit)
df.loc['X']

aa    1.764052
bb    0.400157
cc    0.978738
dd    2.240893
ee    1.867558
Name: X, dtype: float64

In [9]:
#how to access a row of data (implicit/positional)
df.iloc[0]

aa    1.764052
bb    0.400157
cc    0.978738
dd    2.240893
ee    1.867558
Name: X, dtype: float64

In [12]:
#multiple rows of data back in query 
# .loc is for row notation
df.loc[['X','Y','Z']]

Unnamed: 0,aa,bb,cc,dd,ee
X,1.764052,0.400157,0.978738,2.240893,1.867558
Y,-0.977278,0.950088,-0.151357,-0.103219,0.410599
Z,0.144044,1.454274,0.761038,0.121675,0.443863


In [14]:
#The following are equivilant

In [15]:
df['aa']['Z']

0.144043571160878

In [16]:
df['aa'].loc['Z']

0.144043571160878

In [17]:
df.loc['Z']['aa']

0.144043571160878

In [18]:
df.loc['Z','aa']

0.144043571160878

In [19]:
df.index.names

FrozenList([None])

In [20]:
#add a name to the index
df.index.names = ['group']

In [21]:
df

Unnamed: 0_level_0,aa,bb,cc,dd,ee
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
X,1.764052,0.400157,0.978738,2.240893,1.867558
Y,-0.977278,0.950088,-0.151357,-0.103219,0.410599
Z,0.144044,1.454274,0.761038,0.121675,0.443863


In [22]:
#reset index to 0-n 

In [23]:
df.reset_index()

Unnamed: 0,group,aa,bb,cc,dd,ee
0,X,1.764052,0.400157,0.978738,2.240893,1.867558
1,Y,-0.977278,0.950088,-0.151357,-0.103219,0.410599
2,Z,0.144044,1.454274,0.761038,0.121675,0.443863


In [24]:
#replace index inplace
df.reset_index(drop=True,inplace=True)

In [25]:
df

Unnamed: 0,aa,bb,cc,dd,ee
0,1.764052,0.400157,0.978738,2.240893,1.867558
1,-0.977278,0.950088,-0.151357,-0.103219,0.410599
2,0.144044,1.454274,0.761038,0.121675,0.443863


In [26]:
#define a new column and name it 'new'
df['new']=['P','Q','R']

In [27]:
df

Unnamed: 0,aa,bb,cc,dd,ee,new
0,1.764052,0.400157,0.978738,2.240893,1.867558,P
1,-0.977278,0.950088,-0.151357,-0.103219,0.410599,Q
2,0.144044,1.454274,0.761038,0.121675,0.443863,R


In [29]:
df.set_index('new',inplace=True)

In [30]:
df

Unnamed: 0_level_0,aa,bb,cc,dd,ee
new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
P,1.764052,0.400157,0.978738,2.240893,1.867558
Q,-0.977278,0.950088,-0.151357,-0.103219,0.410599
R,0.144044,1.454274,0.761038,0.121675,0.443863


In [None]:
14:33