In [1]:
# 002-Groupby-with-MultiIndex-001

# With heirarchically-indexed data, it's quite natural to group by one of the levels
# of the hierachy

In [4]:
# Create a series with a two-level multi index
import pandas as pd
import numpy as np

arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], 
          ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
index = pd.MultiIndex.from_arrays(arrays, names=['first','second'])
s = pd.Series(np.random.randn(8), index = index)
print(s)

first  second
bar    one      -0.825880
       two       0.963470
baz    one       0.234465
       two      -0.260631
foo    one      -0.581900
       two      -0.017743
qux    one      -0.312676
       two       2.551730
dtype: float64


In [5]:
# we can group by one of the levels in s
grouped = s.groupby(level=0)
print(grouped.sum())

first
bar    0.137590
baz   -0.026166
foo   -0.599643
qux    2.239054
dtype: float64


In [6]:
# if the MultiIndex has names specified, these can be passed instead of the level number:
print(s.groupby(level = 'second').sum())

second
one   -1.485990
two    3.236825
dtype: float64


In [7]:
# The aggregation functions such as sum will take the level parameter directly.
# Additionally, the resulting index will be named according to the chosen level:
print(s.sum(level='second'))

second
one   -1.485990
two    3.236825
dtype: float64


In [8]:
# Grouping with multiple levels is supported
print(s.groupby(level=['first','second']).sum())

first  second
bar    one      -0.825880
       two       0.963470
baz    one       0.234465
       two      -0.260631
foo    one      -0.581900
       two      -0.017743
qux    one      -0.312676
       two       2.551730
dtype: float64


In [10]:
# index level names can also be supplied as keys
print(s.groupby(['first','second']).sum())

first  second
bar    one      -0.825880
       two       0.963470
baz    one       0.234465
       two      -0.260631
foo    one      -0.581900
       two      -0.017743
qux    one      -0.312676
       two       2.551730
dtype: float64
