## Multi Index in Pandas
MultiIndex in Pandas is a multi-level or hierarchical object that allows you to select more than one row and column in your index. It also enables you to create sophisticated data analysis and manipulation, especially for working with higher dimensional data. 
!pip install gapminder

In [7]:
# Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from gapminder import gapminder

In [9]:
df = gapminder
df.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.85303
2,Afghanistan,Asia,1962,31.997,10267083,853.10071
3,Afghanistan,Asia,1967,34.02,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106


In [10]:
df.set_index(['continent', 'country','year'], inplace=True)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lifeExp,pop,gdpPercap
continent,country,year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Asia,Afghanistan,1952,28.801,8425333,779.445314
Asia,Afghanistan,1957,30.332,9240934,820.853030
Asia,Afghanistan,1962,31.997,10267083,853.100710
Asia,Afghanistan,1967,34.020,11537966,836.197138
Asia,Afghanistan,1972,36.088,13079460,739.981106
...,...,...,...,...,...
Africa,Zimbabwe,1987,62.351,9216418,706.157306
Africa,Zimbabwe,1992,60.377,10704340,693.420786
Africa,Zimbabwe,1997,46.809,11404948,792.449960
Africa,Zimbabwe,2002,39.989,11926563,672.038623


In [11]:
df.loc['Europe']

Unnamed: 0_level_0,Unnamed: 1_level_0,lifeExp,pop,gdpPercap
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Albania,1952,55.230,1282697,1601.056136
Albania,1957,59.280,1476505,1942.284244
Albania,1962,64.820,1728137,2312.888958
Albania,1967,66.220,1984060,2760.196931
Albania,1972,67.690,2263554,3313.422188
...,...,...,...,...
United Kingdom,1987,75.007,56981620,21664.787670
United Kingdom,1992,76.420,57866349,22705.092540
United Kingdom,1997,77.218,58808266,26074.531360
United Kingdom,2002,78.471,59912431,29478.999190


In [12]:
df_pivot = df.unstack()
df_pivot

Unnamed: 0_level_0,Unnamed: 1_level_0,lifeExp,lifeExp,lifeExp,lifeExp,lifeExp,lifeExp,lifeExp,lifeExp,lifeExp,lifeExp,...,gdpPercap,gdpPercap,gdpPercap,gdpPercap,gdpPercap,gdpPercap,gdpPercap,gdpPercap,gdpPercap,gdpPercap
Unnamed: 0_level_1,year,1952,1957,1962,1967,1972,1977,1982,1987,1992,1997,...,1962,1967,1972,1977,1982,1987,1992,1997,2002,2007
continent,country,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Africa,Algeria,43.077,45.685,48.303,51.407,54.518,58.014,61.368,65.799,67.744,69.152,...,2550.816880,3246.991771,4182.663766,4910.416756,5745.160213,5681.358539,5023.216647,4797.295051,5288.040382,6223.367465
Africa,Angola,30.015,31.999,34.000,35.985,37.928,39.483,39.942,39.906,40.647,40.963,...,4269.276742,5522.776375,5473.288005,3008.647355,2756.953672,2430.208311,2627.845685,2277.140884,2773.287312,4797.231267
Africa,Benin,38.223,40.358,42.618,44.885,47.014,49.190,50.904,52.337,53.919,54.777,...,949.499064,1035.831411,1085.796879,1029.161251,1277.897616,1225.856010,1191.207681,1232.975292,1372.877931,1441.284873
Africa,Botswana,47.622,49.618,51.520,53.298,56.024,59.319,61.484,63.622,62.745,52.556,...,983.653976,1214.709294,2263.611114,3214.857818,4551.142150,6205.883850,7954.111645,8647.142313,11003.605080,12569.851770
Africa,Burkina Faso,31.975,34.906,37.814,40.697,43.591,46.137,48.122,49.557,50.260,50.324,...,722.512021,794.826560,854.735976,743.387037,807.198586,912.063142,931.752773,946.294962,1037.645221,1217.032994
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Europe,Switzerland,69.620,70.560,71.320,72.770,73.780,75.390,76.210,77.410,78.030,79.370,...,20431.092700,22966.144320,27195.113040,26982.290520,28397.715120,30281.704590,31871.530300,32135.323010,34480.957710,37506.419070
Europe,Turkey,43.585,48.079,52.098,54.336,57.005,59.507,61.036,63.108,66.146,68.835,...,2322.869908,2826.356387,3450.696380,4269.122326,4241.356344,5089.043686,5678.348271,6601.429915,6508.085718,8458.276384
Europe,United Kingdom,69.180,70.420,70.760,71.360,72.010,72.760,74.040,75.007,76.420,77.218,...,12477.177070,14142.850890,15895.116410,17428.748460,18232.424520,21664.787670,22705.092540,26074.531360,29478.999190,33203.261280
Oceania,Australia,69.120,70.330,70.930,71.100,71.930,73.490,74.740,76.320,77.560,78.830,...,12217.226860,14526.124650,16788.629480,18334.197510,19477.009280,21888.889030,23424.766830,26997.936570,30687.754730,34435.367440


In [17]:
df_unpivot = df_pivot.stack()
df_unpivot

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lifeExp,pop,gdpPercap
continent,country,year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Africa,Algeria,1952,43.077,9279525,2449.008185
Africa,Algeria,1957,45.685,10270856,3013.976023
Africa,Algeria,1962,48.303,11000948,2550.816880
Africa,Algeria,1967,51.407,12760499,3246.991771
Africa,Algeria,1972,54.518,14760787,4182.663766
...,...,...,...,...,...
Oceania,New Zealand,1987,74.320,3317166,19007.191290
Oceania,New Zealand,1992,76.330,3437674,18363.324940
Oceania,New Zealand,1997,77.550,3676187,21050.413770
Oceania,New Zealand,2002,79.110,3908037,23189.801350


In [18]:
df_unpivot.reset_index(inplace=True)
df_unpivot
#original data

Unnamed: 0,continent,country,year,lifeExp,pop,gdpPercap
0,Africa,Algeria,1952,43.077,9279525,2449.008185
1,Africa,Algeria,1957,45.685,10270856,3013.976023
2,Africa,Algeria,1962,48.303,11000948,2550.816880
3,Africa,Algeria,1967,51.407,12760499,3246.991771
4,Africa,Algeria,1972,54.518,14760787,4182.663766
...,...,...,...,...,...,...
1699,Oceania,New Zealand,1987,74.320,3317166,19007.191290
1700,Oceania,New Zealand,1992,76.330,3437674,18363.324940
1701,Oceania,New Zealand,1997,77.550,3676187,21050.413770
1702,Oceania,New Zealand,2002,79.110,3908037,23189.801350


Pandas was initially named after panel data. It’s really meant to work with panel data, which is a specific type of time series data with multiple categories. In that case, having a hierarchy really makes sense.  This works really well if you’re working with unique rows, trying to find multiple columns.
As far as the performance, it could be that the index is not necessary if you’re merging, but we weren’t merging here. We were just operating, accessing, indexing, reshaping, etc. However, coding efficiency is definitely a big benefit.

MultiIndex is an array of tuples where each tuple is unique.
You can create MultiIndex from list of arrays, arry of tuples, dataframe e.t.c
The Index constructor will attempt to return a MultiIndex when it is passed a list of tuples.
You can have Multi-level for both Index and Column labels.
Multi-level columns are used when you wanted to group columns together.
https://sparkbyexamples.com/pandas/pandas-multiindex-dataframe-examples/

In [25]:
import pandas as pd
multi_index = pd.MultiIndex.from_tuples([("Engineering", "UG"),  ("Management", "PG")],
                                       names=['Discipline','Level'])

In [26]:
cols = pd.MultiIndex.from_tuples([("Year1", "M"), 
                                  ("Year1", "F"), 
                                  ("Year2", "M"),
                                  ("Year2", "F")])

In [27]:
data=[[100,300, 900,400 ], [200,500, 300,600]]

df = pd.DataFrame(data, columns=cols,index=multi_index)
print(df)

                  Year1      Year2     
                      M    F     M    F
Discipline  Level                      
Engineering UG      100  300   900  400
Management  PG      200  500   300  600


In [29]:
df.index

MultiIndex([('Engineering', 'UG'),
            ( 'Management', 'PG')],
           names=['Discipline', 'Level'])

In [31]:
df.index.set_names(['discp','level'])
#change name of index to avoid duplicate column names

MultiIndex([('Engineering', 'UG'),
            ( 'Management', 'PG')],
           names=['discp', 'level'])

In [28]:
df2A = df.reset_index(drop=False) 
df2A# index values as columns

Unnamed: 0_level_0,Discipline,Level,Year1,Year1,Year2,Year2
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,M,F,M,F
0,Engineering,UG,100,300,900,400
1,Management,PG,200,500,300,600


In [30]:
df2A.index

RangeIndex(start=0, stop=2, step=1)

In [33]:
#original data -  drop Level as index
df.reset_index(level=1)

Unnamed: 0_level_0,Level,Year1,Year1,Year2,Year2
Unnamed: 0_level_1,Unnamed: 1_level_1,M,F,M,F
Discipline,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Engineering,UG,100,300,900,400
Management,PG,200,500,300,600


In [34]:
df.reset_index(level=1, drop=True)
#remove Level columns

Unnamed: 0_level_0,Year1,Year1,Year2,Year2
Unnamed: 0_level_1,M,F,M,F
Discipline,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Engineering,100,300,900,400
Management,200,500,300,600


In [44]:
CL1 = df.columns.get_level_values(1)
CL0 = df.columns.get_level_values(0)

In [45]:
print(CL0, CL1)

Index(['Year1', 'Year1', 'Year2', 'Year2'], dtype='object') Index(['M', 'F', 'M', 'F'], dtype='object')


In [49]:
#newCols = ['_'.join(col) for col in df.columns.get_level_values(0)]
newCols = ''.join([str(b) + '_' + str(a) + "," for a,b in zip(CL1,CL0)])
print(newCols)

Year1_M,Year1_F,Year2_M,Year2_F,


In [50]:
df.droplevel(0, axis=0)
#remove Discipline from axis=0

Unnamed: 0_level_0,Year1,Year1,Year2,Year2
Unnamed: 0_level_1,M,F,M,F
Level,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
UG,100,300,900,400
PG,200,500,300,600


In [53]:
df.droplevel(1, axis=0)
#remove leve from row index

Unnamed: 0_level_0,Year1,Year1,Year2,Year2
Unnamed: 0_level_1,M,F,M,F
Discipline,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Engineering,100,300,900,400
Management,200,500,300,600


In [51]:
df.droplevel(0, axis=1)
#remove year from columns

Unnamed: 0_level_0,Unnamed: 1_level_0,M,F,M,F
Discipline,Level,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Engineering,UG,100,300,900,400
Management,PG,200,500,300,600


In [52]:
df.droplevel(1, axis=1)
#remove Gender from columns

Unnamed: 0_level_0,Unnamed: 1_level_0,Year1,Year1,Year2,Year2
Discipline,Level,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Engineering,UG,100,300,900,400
Management,PG,200,500,300,600


In [54]:
df.droplevel(1, axis=1).reset_index()

Unnamed: 0,Discipline,Level,Year1,Year1.1,Year2,Year2.1
0,Engineering,UG,100,300,900,400
1,Management,PG,200,500,300,600


In [72]:
newCols1 = ['_'.join(col) for col in df.columns.values]
print(newCols1, sep='\t')
newCols1.insert(0,'Discipline')
newCols1.insert(1,'Level')
print(newCols1)

['Year1_M', 'Year1_F', 'Year2_M', 'Year2_F']
['Discipline', 'Level', 'Year1_M', 'Year1_F', 'Year2_M', 'Year2_F']


In [73]:
df2C = df.droplevel(1, axis=1).reset_index()
df2C

Unnamed: 0,Discipline,Level,Year1,Year1.1,Year2,Year2.1
0,Engineering,UG,100,300,900,400
1,Management,PG,200,500,300,600


In [74]:
df2C.columns = newCols1
df2C

Unnamed: 0,Discipline,Level,Year1_M,Year1_F,Year2_M,Year2_F
0,Engineering,UG,100,300,900,400
1,Management,PG,200,500,300,600


https://dev.to/ra1nbow1/8-ways-to-add-an-element-to-the-beginning-of-a-list-and-string-in-python-925