# Flatten MultiIndex in Columns and Rows

* https://stackoverflow.com/questions/14507794/pandas-how-to-flatten-a-hierarchical-index-in-columns
* https://www.pauldesalvo.com/how-to-flatten-multiindex-columns-into-a-single-index-dataframe-in-pandas/
* https://www.statology.org/pandas-flatten-multiindex/

#### Version of packages used in this Notebook

In [25]:
import numpy as np
import pandas as pd

# Make sure your package version >= them
print('numpy: ', np.__version__)
print('pandas: ', pd.__version__)

numpy:  1.18.1
pandas:  1.1.4


## 1. Columns

In [27]:
df = pd.DataFrame({
    'name': ['Tom', 'James', 'Allan', 'Chris'],
    'year': ['2000', '2000', '2001', '2001'],
    'math': [67, 80, 75, 50],
    'star': [1, 2, 3, 4]
})

df

Unnamed: 0,name,year,math,star
0,Tom,2000,67,1
1,James,2000,80,2
2,Allan,2001,75,3
3,Chris,2001,50,4


In [29]:
df_grouped = df.groupby('year').agg({ 'math': ['mean', 'sum'], 'star': 'sum'})
df_grouped

Unnamed: 0_level_0,math,math,star
Unnamed: 0_level_1,mean,sum,sum
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2000,73.5,147,3
2001,62.5,125,7


In [30]:
df_grouped.columns

MultiIndex([('math', 'mean'),
            ('math',  'sum'),
            ('star',  'sum')],
           )

### 1.1 Use `get_level_values()`

In [42]:
# Select a specific level
df_grouped.columns.get_level_values(0)

Index(['math', 'math', 'star'], dtype='object')

In [33]:
df_grouped.columns = df_grouped.columns.get_level_values(0)

df_grouped

Unnamed: 0_level_0,math,math,star
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000,73.5,147,3
2001,62.5,125,7


### 1.2 Join MultiIndex columns

In [39]:
df_grouped = df.groupby('year').agg({ 'math': ['mean', 'sum'], 'star': 'sum'})
df_grouped

Unnamed: 0_level_0,math,math,star
Unnamed: 0_level_1,mean,sum,sum
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2000,73.5,147,3
2001,62.5,125,7


In [41]:
# Get columns values
df_grouped.columns.values

array([('math', 'mean'), ('math', 'sum'), ('star', 'sum')], dtype=object)

In [36]:
# Join columns values with `_`
['_'.join(col) for col in df_grouped.columns.values]

['math_mean', 'math_sum', 'star_sum']

In [37]:
df_grouped.columns = ['_'.join(col) for col in df_grouped.columns.values]

df_grouped

Unnamed: 0_level_0,math_mean,math_sum,star_sum
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000,73.5,147,3
2001,62.5,125,7


## 2. Rows

In [71]:
index = pd.MultiIndex.from_tuples([
  ('Oxford', 'Store A', '01-01-2022'), 
  ('Oxford', 'Store B', '01-01-2022'), 
  ('Oxford', 'Store A', '02-01-2022'),
  ('Oxford', 'Store B', '02-01-2022'),
  ('London', 'Store C', '01-01-2022'), 
  ('London', 'Store D', '01-01-2022'),
  ('London', 'Store C', '02-01-2022'),
  ('London', 'Store D', '02-01-2022')], 
  names=['Location','Store', 'Date']
)

data = {
  'Num_employee': [1,2,3,4,5,6,7,8],
  'Sales': [11,22,33,44,55,66,77,88]
}

df = pd.DataFrame(data, index=index)

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Num_employee,Sales
Location,Store,Date,Unnamed: 3_level_1,Unnamed: 4_level_1
Oxford,Store A,01-01-2022,1,11
Oxford,Store B,01-01-2022,2,22
Oxford,Store A,02-01-2022,3,33
Oxford,Store B,02-01-2022,4,44
London,Store C,01-01-2022,5,55
London,Store D,01-01-2022,6,66
London,Store C,02-01-2022,7,77
London,Store D,02-01-2022,8,88


### 2.1 flatten every level of MultiIndex

In [64]:
#flatten every level of MultiIndex 
df.reset_index()

Unnamed: 0,Location,Store,Date,Num_employee,Sales
0,Oxford,Store A,01-01-2022,1,11
1,Oxford,Store B,01-01-2022,2,22
2,Oxford,Store A,02-01-2022,3,33
3,Oxford,Store B,02-01-2022,4,44
4,London,Store C,01-01-2022,5,55
5,London,Store D,01-01-2022,6,66
6,London,Store C,02-01-2022,7,77
7,London,Store D,02-01-2022,8,88


## 2.2 flatten 'Date' level only

In [65]:
#flatten 'Date' level only
df.reset_index(level = ['Date'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,Num_employee,Sales
Location,Store,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Oxford,Store A,01-01-2022,1,11
Oxford,Store B,01-01-2022,2,22
Oxford,Store A,02-01-2022,3,33
Oxford,Store B,02-01-2022,4,44
London,Store C,01-01-2022,5,55
London,Store D,01-01-2022,6,66
London,Store C,02-01-2022,7,77
London,Store D,02-01-2022,8,88


## 2.3 Join Index labels

In [66]:
# Display Index
df.index

MultiIndex([('Oxford', 'Store A', '01-01-2022'),
            ('Oxford', 'Store B', '01-01-2022'),
            ('Oxford', 'Store A', '02-01-2022'),
            ('Oxford', 'Store B', '02-01-2022'),
            ('London', 'Store C', '01-01-2022'),
            ('London', 'Store D', '01-01-2022'),
            ('London', 'Store C', '02-01-2022'),
            ('London', 'Store D', '02-01-2022')],
           names=['Location', 'Store', 'Date'])

In [67]:
# Get index values
df.index.values

array([('Oxford', 'Store A', '01-01-2022'),
       ('Oxford', 'Store B', '01-01-2022'),
       ('Oxford', 'Store A', '02-01-2022'),
       ('Oxford', 'Store B', '02-01-2022'),
       ('London', 'Store C', '01-01-2022'),
       ('London', 'Store D', '01-01-2022'),
       ('London', 'Store C', '02-01-2022'),
       ('London', 'Store D', '02-01-2022')], dtype=object)

In [68]:
# Join index label with `_`
['_'.join(col) for col in df.index.values]

['Oxford_Store A_01-01-2022',
 'Oxford_Store B_01-01-2022',
 'Oxford_Store A_02-01-2022',
 'Oxford_Store B_02-01-2022',
 'London_Store C_01-01-2022',
 'London_Store D_01-01-2022',
 'London_Store C_02-01-2022',
 'London_Store D_02-01-2022']

In [69]:
df.index = ['_'.join(col) for col in df.index.values]
df

Unnamed: 0,Num_employee,Sales
Oxford_Store A_01-01-2022,1,11
Oxford_Store B_01-01-2022,2,22
Oxford_Store A_02-01-2022,3,33
Oxford_Store B_02-01-2022,4,44
London_Store C_01-01-2022,5,55
London_Store D_01-01-2022,6,66
London_Store C_02-01-2022,7,77
London_Store D_02-01-2022,8,88


## Thanks for reading

This is a Notebook for the medium article [Delete rows and columns from a DataFrame using Pandas drop()](https://bindichen.medium.com/delete-rows-and-columns-from-a-dataframe-using-pandas-drop-d2533cf7b4bd)

Please check out article for instructions

**License**: [BSD 2-Clause](https://opensource.org/licenses/BSD-2-Clause)