## Hierarchical indexing

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Tracking data abput a state for two different years
# Using the python MultiIndex

index = [('California', 2000), ('California', 2010),
         ('New York', 2000), ('New York', 2010),
         ('Texas', 2000), ('Texas', 2010)]
populations = [33871648, 37253956,
               18976457, 19378102,
               20851820, 25145561]
pop = pd.Series(populations, index=index)


In [3]:
# Creating a MultiIndex with a tuple based index
index = pd.MultiIndex.from_tuples(index)

# Reindexing the pop series with the MultiIndex above
pop = pop.reindex(index)

# Selecting a subset of the data using the 2nd level of the multiple index
pop[:, 2000]

California    33871648
New York      18976457
Texas         20851820
dtype: int64

In [24]:
# Converting a multiply index series to a conventional pandas DataFrame by unstack() mehtod
pop_unstacked = pop.unstack()
pop_unstacked

# The unstacked pop DataFrame object can be changed back to a multiply index series
# Using stack() method

pop_stacked = pop_unstacked.stack()
# Adding another column to the dataframe 
pop_stacked = pd.DataFrame({'total':pop_stacked,
                            'under_18':[9267089, 9284094,
                                        4687374, 4318033,
                                        5906301, 6879014]})
pop_stacked['divide'] =  pop_stacked['under_18']/pop_stacked['total']
pop_stacked

# Passing appropriate tuples as key, pandas automatically use multiIndex by default
data = {('California', 2000): 33871648, 
        ('California', 2010): 37253956, 
        ('Texas', 2000): 20851820, 
        ('Texas', 2010): 25145561, 
        ('New York', 2000): 18976457, 
        ('New York', 2010): 19378102}
data_series=pd.Series(data)

In [9]:
# Further examples on hierarchical indexing
# Creating a Dataframe with both axis having hierarchical index

examp = pd.DataFrame(np.arange(12).reshape((4,3)),
                     index=[['a','a','a','b'],[1,3,1,3]],
                     columns=[['Ohio','Ohio', 'Colorado'],
                              ['Green', 'Red', 'Green']])

examp.index.names = ['key1', 'key2']

examp.columns.names = ['State', 'Color']

examp

Unnamed: 0_level_0,State,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,3,3,4,5
a,1,6,7,8
b,3,9,10,11
