## MultiIndex / advanced indexing
https://pandas.pydata.org/docs/user_guide/advanced.html

In [1]:
# %load command1.py
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity='all'

%config InlineBackend.figure_format='svg'
plt.rcParams['figure.dpi']=120

pd.options.display.float_format='{:,.2f}'.format
pd.set_option('display.max_colwidth', None)


In [2]:
arrays = [
    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
    ["one", "two", "one", "two", "one", "two", "one", "two"],
]

tuples=list(zip(*arrays))
tuples
print()
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
index
print()
s=pd.Series(np.random.randn(8), index=index)
s

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]




MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])




first  second
bar    one      -0.36
       two       1.75
baz    one      -0.34
       two       0.08
foo    one      -0.14
       two       1.81
qux    one       0.44
       two       1.83
dtype: float64

In [3]:
iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]
pd.MultiIndex.from_product(iterables, names=['first', 'second'])


MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [4]:
df = pd.DataFrame(
    [["bar", "one"], ["bar", "two"], ["foo", "one"], ["foo", "two"]],
    columns=["first", "second"],
)

pd.MultiIndex.from_frame(df)

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('foo', 'one'),
            ('foo', 'two')],
           names=['first', 'second'])

In [5]:
arrays = [
    np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
    np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),
]

s=pd.Series(np.random.randn(8), index=arrays)
s
print()
df = pd.DataFrame(np.random.randn(8, 4), index=arrays)
df

bar  one   -0.52
     two    1.10
baz  one    0.77
     two    0.58
foo  one    0.33
     two   -2.00
qux  one    0.25
     two    1.27
dtype: float64




Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,-2.11,-0.59,0.26,0.68
bar,two,0.15,1.24,0.91,0.04
baz,one,-0.21,-0.46,-0.19,-0.38
baz,two,-1.12,0.22,0.03,0.93
foo,one,0.12,0.56,-1.53,0.21
foo,two,1.18,-0.33,1.17,0.01
qux,one,0.48,1.14,0.08,-1.07
qux,two,1.09,-1.37,0.05,0.83


In [6]:
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [7]:
df = pd.DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index)
df
print()
pd.DataFrame(np.random.randn(6, 6), index=index[:6], columns=index[:6])

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,-1.8,0.57,0.25,0.56,0.44,0.29,0.81,-0.52
B,-0.94,0.14,0.13,0.96,0.08,-0.55,-1.21,0.83
C,-1.42,1.47,0.38,2.85,-0.49,-1.22,-0.47,0.78





Unnamed: 0_level_0,first,bar,bar,baz,baz,foo,foo
Unnamed: 0_level_1,second,one,two,one,two,one,two
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
bar,one,1.04,-1.08,-2.21,-0.49,2.0,-0.64
bar,two,-1.16,-0.34,1.34,-1.29,0.33,-0.09
baz,one,0.13,-0.34,0.42,0.61,-0.58,0.16
baz,two,0.51,0.07,-0.22,0.71,0.05,-0.17
foo,one,0.91,0.31,0.12,-0.35,0.6,1.07
foo,two,-0.79,1.23,-0.23,0.62,0.98,1.32


**The method get_level_values() will return a vector of the labels for each location at a particular level:**

In [8]:
index.get_level_values(0)
print()
index.get_level_values(1)
print()
index.get_level_values('second')

Index(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], dtype='object', name='first')




Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')




Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')