# Pandas

## 2. DataFrames - Part III

In [10]:
import numpy as np
import pandas as pd
from numpy.random import randn

## Multilevel-indexed DataFrames

In [4]:
# Index levels
outside = 'G1 G1 G1 G2 G2 G2'.split()
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside, inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [8]:
# returns a list of tuple pairs - corressponding elements from both lists, form the two elements
# of each tuple
list(zip(outside, inside))

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [9]:
hier_index

MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [11]:
# creates a multilevel-indexed data frame where levels are formed by the list of tuples
df = pd.DataFrame(randn(6,2),index=hier_index,columns=['A','B'])
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [13]:
# For selection of data, you start from the outer index and keep moving inwards
df.loc['G1'].loc[1]

A    0.302665
B    1.693723
Name: 1, dtype: float64

In [15]:
df.index.names = ['Groups','Num']

In [16]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [17]:
df.loc['G2'].loc[2]['B']

0.07295967531703869

In [20]:
df.loc['G1'].loc[3]['A']

-0.13484072060601238

## Cross-section

In [24]:
# used when you have a multi-level index
# very useful when you want to select something across all outer indexes, but at specific inner indexes
# below example says to select across all groups(G1 & G2), having index value for 'Num' index = 1
df.xs(1,level='Num')

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.302665,1.693723
G2,0.166905,0.184502
