In [35]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [36]:
np.random.seed(101)

In [37]:
df = pd.DataFrame(randn(5,4), ['a','b','c','d','e'], ['w','x','y','z'])

# Indexing

In [38]:
df
## Further info in module 3

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


## Index Details

### Resetting an Index

In [39]:
# Reseting the index
df.reset_index()

Unnamed: 0,index,w,x,y,z
0,a,2.70685,0.628133,0.907969,0.503826
1,b,0.651118,-0.319318,-0.848077,0.605965
2,c,-2.018168,0.740122,0.528813,-0.589001
3,d,0.188695,-0.758872,-0.933237,0.955057
4,e,0.190794,1.978757,2.605967,0.683509


### Setting an Index

In [40]:
# Setting an index

newind = 'CA NY WY OR CO'.split()

In [41]:
df['States'] = newind

In [42]:
df

Unnamed: 0,w,x,y,z,States
a,2.70685,0.628133,0.907969,0.503826,CA
b,0.651118,-0.319318,-0.848077,0.605965,NY
c,-2.018168,0.740122,0.528813,-0.589001,WY
d,0.188695,-0.758872,-0.933237,0.955057,OR
e,0.190794,1.978757,2.605967,0.683509,CO


In [43]:
df.set_index('States')

Unnamed: 0_level_0,w,x,y,z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826
NY,0.651118,-0.319318,-0.848077,0.605965
WY,-2.018168,0.740122,0.528813,-0.589001
OR,0.188695,-0.758872,-0.933237,0.955057
CO,0.190794,1.978757,2.605967,0.683509


## Multi-Index and Index Hierarchy

In [45]:
# Index Levels
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside)) # zipped it together and made it a list type. It's like dictionary format.
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [46]:
hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [47]:
df = pd.DataFrame(np.random.randn(6,2),index=hier_index,columns=['A','B'])
df
# 

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


### Call data from this multi index. 

In [48]:
# If we want to call everything from the first index
df.loc['G1']

Unnamed: 0,A,B
1,0.302665,1.693723
2,-1.706086,-1.159119
3,-0.134841,0.390528


In [51]:
# The method is outside, then in. So information at G1, position 2 is:

df.loc['G1'].loc[2]

A   -1.706086
B   -1.159119
Name: 2, dtype: float64

In [52]:
## Giving names

df.index.names = ['Group','Num']

In [55]:
# Returning a certain value
df.loc['G2'].loc[2]['B']

0.07295967531703869

In [56]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


#### Cross section method (better way to call)

In [57]:
df.xs(1,level='Num')

Unnamed: 0_level_0,A,B
Group,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.302665,1.693723
G2,0.166905,0.184502
