In [2]:
import pandas as pd
import numpy as np

### Working with MultiIndex

In [7]:
arrays = [
     ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
    ["one", "two", "one", "two", "one", "two", "one", "two"],
]

tuples = list(zip(*arrays))

index = pd.MultiIndex.from_tuples(tuples, names = ["first","second"])

df = pd.DataFrame(np.random.randn(3,8), index = ["A","B","C"], columns=index)

In [50]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,-0.012646,-0.403925,2.385549,1.675968,-0.434669,-0.993215,0.704825,-1.445481
B,1.180018,-1.137566,0.409687,-1.697923,0.824046,1.118547,1.673185,0.838698
C,1.460556,0.151133,-2.152322,1.515066,0.527733,-0.811192,-1.755927,-0.545846


In [26]:
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [None]:
list(zip(*arrays))

In [25]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,-0.012646,-0.403925,2.385549,1.675968,-0.434669,-0.993215,0.704825,-1.445481
B,1.180018,-1.137566,0.409687,-1.697923,0.824046,1.118547,1.673185,0.838698
C,1.460556,0.151133,-2.152322,1.515066,0.527733,-0.811192,-1.755927,-0.545846


In [79]:
#1. create arrays with hierarchical column names
#2. zip the unpacked arrays and convert to a list
#3. create index from tuples

arrays_1 = [
    ["male" for i in range(4)],
    [f"x{i}" for i in range(1,5)]
]

arrays_2 = [ 
    ["female" for i in range(4)],
    [f"x{i}" for i in range(1,5)]
]

tuple_1 = list(zip(*arrays_1))
tuple_2 = list(zip(*arrays_2))

#appending lists
tuples = tuple_1 + tuple_2

#creating index from list of tuples 
columns = pd.MultiIndex.from_tuples(tuples, names = ["sex", "variable"])


In [86]:
#reading the data 
df_example = pd.read_csv("./listas/data_1.1.6.csv",skiprows=4, header = None)

In [90]:
df_example.values

array([[ 0.34,  3.71,  2.87, 30.87,  0.29,  5.04,  1.93, 33.85],
       [ 0.39,  5.08,  3.38, 43.85,  0.28,  3.95,  2.51, 35.82],
       [ 0.48,  5.13,  4.13, 44.51,  0.31,  4.88,  2.31, 36.4 ],
       [ 0.31,  3.95,  3.6 , 46.  ,  0.3 ,  5.97,  1.9 , 37.87],
       [ 0.36,  5.51,  3.11, 47.02,  0.28,  4.57,  2.32, 38.3 ],
       [ 0.33,  4.07,  3.95, 48.5 ,  0.11,  1.74,  2.49, 39.19],
       [ 0.43,  4.77,  4.39, 48.75,  0.25,  4.66,  2.12, 39.21],
       [ 0.48,  6.69,  3.5 , 48.86,  0.26,  5.28,  1.98, 39.94],
       [ 0.21,  3.71,  2.82, 48.92,  0.39,  7.32,  2.25, 42.41],
       [ 0.32,  4.35,  3.59, 48.38,  0.37,  6.22,  1.71, 28.97],
       [ 0.54,  7.89,  3.47, 50.56,  0.31,  4.2 ,  2.76, 37.8 ],
       [ 0.32,  5.37,  3.07, 51.15,  0.35,  5.1 ,  2.1 , 31.1 ],
       [ 0.4 ,  4.95,  4.43, 55.34,  0.29,  4.46,  2.5 , 38.3 ],
       [ 0.31,  4.97,  3.56, 56.67,  0.33,  5.6 ,  3.06, 51.8 ],
       [ 0.44,  6.68,  3.86, 58.49,  0.18,  2.8 ,  2.4 , 37.6 ],
       [ 0.32,  4.8 ,  3.

In [91]:
df_final = pd.DataFrame(df_example.values, columns=columns)

In [92]:
df_final

sex,male,male,male,male,female,female,female,female
variable,x1,x2,x3,x4,x1,x2,x3,x4
0,0.34,3.71,2.87,30.87,0.29,5.04,1.93,33.85
1,0.39,5.08,3.38,43.85,0.28,3.95,2.51,35.82
2,0.48,5.13,4.13,44.51,0.31,4.88,2.31,36.4
3,0.31,3.95,3.6,46.0,0.3,5.97,1.9,37.87
4,0.36,5.51,3.11,47.02,0.28,4.57,2.32,38.3
5,0.33,4.07,3.95,48.5,0.11,1.74,2.49,39.19
6,0.43,4.77,4.39,48.75,0.25,4.66,2.12,39.21
7,0.48,6.69,3.5,48.86,0.26,5.28,1.98,39.94
8,0.21,3.71,2.82,48.92,0.39,7.32,2.25,42.41
9,0.32,4.35,3.59,48.38,0.37,6.22,1.71,28.97


## Basic Indexing with MultiLevel data

In [99]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,-0.012646,-0.403925,2.385549,1.675968,-0.434669,-0.993215,0.704825,-1.445481
B,1.180018,-1.137566,0.409687,-1.697923,0.824046,1.118547,1.673185,0.838698
C,1.460556,0.151133,-2.152322,1.515066,0.527733,-0.811192,-1.755927,-0.545846


In [100]:
df["bar","one"]

A   -0.012646
B    1.180018
C    1.460556
Name: (bar, one), dtype: float64