In [2]:
import pandas as pd
import numpy as np

In [3]:
tuples = [('A', 1), ('A', 2), ('B', 1), ('B', 2)]
index = pd.MultiIndex.from_tuples(tuples, names=['letters', 'numbers'])
data = {'values': [10, 20, 30, 40]}
df = pd.DataFrame(data, index=index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,values
letters,numbers,Unnamed: 2_level_1
A,1,10
A,2,20
B,1,30
B,2,40


In [4]:
df.loc['A']

Unnamed: 0_level_0,values
numbers,Unnamed: 1_level_1
1,10
2,20


In [5]:
df.loc['A', 1]

values    10
Name: (A, 1), dtype: int64

In [6]:
tuples = [('A', 1), ('A', 2), ('B', 1), ('B', 2)]
index = pd.MultiIndex.from_tuples(tuples, names=['letters', 'numbers'])
data = {'Value1': [1, 2, 3, 4],
        'Value2': [5, 6, 7, 8],
        'Value3': [9, 10, 11, 12]}
df = pd.DataFrame(data, index=index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Value1,Value2,Value3
letters,numbers,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,1,1,5,9
A,2,2,6,10
B,1,3,7,11
B,2,4,8,12


In [7]:
df.loc[('A', 1), 'Value3']

9

In [8]:
tuples = [('A', 1, 'yes'), ('A', 1, 'no'), ('A', 2, 'yes'), ('A', 2, 'no'), ('B', 1, 'yes'), ('B', 1, 'no'), ('B', 2, 'yes'), ('B', 2, 'no')]
index = pd.MultiIndex.from_tuples(tuples, names=['letters', 'numbers','types'])
data = {'Value1': [1, 2, 3, 4, 5, 6, 7, 8],
        'Value2': [9, 10, 11, 12, 13, 14, 15, 16],
        'Value3': [17, 18, 19, 20, 21, 22, 23, 24]}
df = pd.DataFrame(data, index=index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Value1,Value2,Value3
letters,numbers,types,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,1,yes,1,9,17
A,1,no,2,10,18
A,2,yes,3,11,19
A,2,no,4,12,20
B,1,yes,5,13,21
B,1,no,6,14,22
B,2,yes,7,15,23
B,2,no,8,16,24


In [9]:
df.loc[('A', 1, 'yes'), 'Value3']

17

In [10]:
tuples = [('A', 1), ('A', 2), ('B', 1), ('B', 2)]
index = pd.MultiIndex.from_tuples(tuples, names=['letters', 'numbers'])
data = {'Value1': [1, 2, 3, 4],
        'Value2': [5, 6, 7, 8],
        'Value3': [9, 10, 11, 12]}
df = pd.DataFrame(data, index=index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Value1,Value2,Value3
letters,numbers,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,1,1,5,9
A,2,2,6,10
B,1,3,7,11
B,2,4,8,12


In [11]:
arrays = [
    ['A', 'B']*4,
    [1, 2]*4,
    ['yes', 'no']*4
]
index = pd.MultiIndex.from_arrays(arrays, names=('Letters', 'Numbers', 'types'))
data = [10, 20, 30, 40, 10, 20, 30, 40]
df = pd.DataFrame(data, index=index, columns=['Values'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Values
Letters,Numbers,types,Unnamed: 3_level_1
A,1,yes,10
B,2,no,20
A,1,yes,30
B,2,no,40
A,1,yes,10
B,2,no,20
A,1,yes,30
B,2,no,40


In [12]:
arrays = [
    ['A', 'B']*4,
    [1, 2]*4,
    ['yes', 'no']*4
]
index = pd.MultiIndex.from_arrays(arrays, names=('Letters', 'Numbers', 'types'))
data = {'Values1': [10, 20, 30, 40, 10, 20, 30, 40],
        'Values2': [1, 2 , 3, 4, 5, 6, 7, 8]}
df = pd.DataFrame(data, index=index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Values1,Values2
Letters,Numbers,types,Unnamed: 3_level_1,Unnamed: 4_level_1
A,1,yes,10,1
B,2,no,20,2
A,1,yes,30,3
B,2,no,40,4
A,1,yes,10,5
B,2,no,20,6
A,1,yes,30,7
B,2,no,40,8


In [13]:
df.loc[('A', 1, 'yes'), 'Values1']

  df.loc[('A', 1, 'yes'), 'Values1']


Letters  Numbers  types
A        1        yes      10
                  yes      30
                  yes      10
                  yes      30
Name: Values1, dtype: int64

In [14]:
df.loc['A'].loc[1].loc['yes']['Values1']

types
yes    10
yes    30
yes    10
yes    30
Name: Values1, dtype: int64

In [15]:
# Creating a Series with a MultiIndex
arrays = [['A', 'A', 'B', 'B'], [1, 2, 1, 2]]
index = pd.MultiIndex.from_arrays(arrays, names=('Letters', 'Numbers'))

data = [10, 20, 30, 40]
series = pd.Series(data, index=index)
series

Letters  Numbers
A        1          10
         2          20
B        1          30
         2          40
dtype: int64

In [16]:
series_unstacked = series.unstack()
series_unstacked

Numbers,1,2
Letters,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,20
B,30,40


In [17]:
series_stacked = series_unstacked.stack()
series_stacked

Letters  Numbers
A        1          10
         2          20
B        1          30
         2          40
dtype: int64

In [18]:
series_swapped = series.swaplevel('Letters', 'Numbers')
series_swapped

Numbers  Letters
1        A          10
2        A          20
1        B          30
2        B          40
dtype: int64

In [19]:
arrays = [['a','b','c','d'],[1,2,3,4],]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=['first','second'])
# values = list(range(1,5))
values = np.random.randint(1,5, len(index))
s = pd.Series(values, index=index)
s

first  second
a      1         1
b      2         2
c      3         3
d      4         1
dtype: int32

In [20]:
arrays = [
    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
    ["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
s = pd.Series(np.random.randn(8), index=index)
s

first  second
bar    one       0.481361
       two      -0.010762
baz    one       0.131366
       two      -0.537333
foo    one      -0.763344
       two       0.723441
qux    one      -2.096495
       two      -0.251435
dtype: float64

In [21]:
import pandas as pd

arrays = [['bar', 'bar', 'baz', 'baz'],
          ['one', 'two', 'one', 'two']]
tuples = list(zip(*arrays))

index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
df = pd.DataFrame({'A': [1, 2, 3, 4]}, index=index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A
first,second,Unnamed: 2_level_1
bar,one,1
bar,two,2
baz,one,3
baz,two,4


Slicing with MultiIndex

In [22]:
# Slicing inner levels
df.xs('one', level='second')

Unnamed: 0_level_0,A
first,Unnamed: 1_level_1
bar,1
baz,3


Stacking and Unstacking

In [23]:
stacked = df.stack()
stacked

first  second   
bar    one     A    1
       two     A    2
baz    one     A    3
       two     A    4
dtype: int64

In [24]:
unstacked = stacked.unstack()
unstacked

Unnamed: 0_level_0,Unnamed: 1_level_0,A
first,second,Unnamed: 2_level_1
bar,one,1
bar,two,2
baz,one,3
baz,two,4


Aggregation on Levels

In [25]:
df.groupby(level=0).sum()

Unnamed: 0_level_0,A
first,Unnamed: 1_level_1
bar,3
baz,7


In [26]:
df.groupby(level=1).sum()

Unnamed: 0_level_0,A
second,Unnamed: 1_level_1
one,4
two,6


Resetting and Setting Index

In [27]:
df_reset  = df.reset_index()
df_reset 

Unnamed: 0,first,second,A
0,bar,one,1
1,bar,two,2
2,baz,one,3
3,baz,two,4


In [28]:
df_reset.index

RangeIndex(start=0, stop=4, step=1)

In [29]:
df_set = df_reset.set_index(['first', 'second'])
df_set

Unnamed: 0_level_0,Unnamed: 1_level_0,A
first,second,Unnamed: 2_level_1
bar,one,1
bar,two,2
baz,one,3
baz,two,4


In [30]:
df_set.index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two')],
           names=['first', 'second'])

Difference from_product, from_arrays, from_tuples

In [31]:
years = [2020, 2021]
months = ['Jan', 'Feb']
index = pd.MultiIndex.from_product([years, months], names=['year', 'month'])
index

MultiIndex([(2020, 'Jan'),
            (2020, 'Feb'),
            (2021, 'Jan'),
            (2021, 'Feb')],
           names=['year', 'month'])

In [32]:
dates = [20200101, 20200102]
cities = ['New York', 'Los Angeles']
index = pd.MultiIndex.from_arrays([dates, cities], names=['date', 'city'])
index

MultiIndex([(20200101,    'New York'),
            (20200102, 'Los Angeles')],
           names=['date', 'city'])

In [33]:
data = [(20200101, 'New York'), (20200102, 'Los Angeles')]
index = pd.MultiIndex.from_tuples(data, names=['date', 'city'])
index

MultiIndex([(20200101,    'New York'),
            (20200102, 'Los Angeles')],
           names=['date', 'city'])