# Flatten MultiIndex columns and rows in Pandas

This is a Notebook for the medium article [How to flatten MultiIndex Columns and Rows in Pandas](https://bindichen.medium.com/how-to-flatten-multiindex-columns-and-rows-in-pandas-f5406c50e569)

Please check out article for instructions

**License**: [BSD 2-Clause](https://opensource.org/licenses/BSD-2-Clause)


#### Version of packages used in this Notebook

In [1]:
import numpy as np
import pandas as pd

# Make sure your package version >= them
print('numpy: ', np.__version__)
print('pandas: ', pd.__version__)

numpy:  1.18.1
pandas:  1.1.4


## 1. Columns

In [2]:
df = pd.DataFrame({
    'name': ['Tom', 'James', 'Allan', 'Chris'],
    'year': ['2000', '2000', '2001', '2001'],
    'math': [67, 80, 75, 50],
    'star': [1, 2, 3, 4]
})

df

Unnamed: 0,name,year,math,star
0,Tom,2000,67,1
1,James,2000,80,2
2,Allan,2001,75,3
3,Chris,2001,50,4


In [3]:
df_grouped = df.groupby('year').agg(
    { 'math': ['mean', 'sum'], 'star': 'sum'}
)

df_grouped

Unnamed: 0_level_0,math,math,star
Unnamed: 0_level_1,mean,sum,sum
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2000,73.5,147,3
2001,62.5,125,7


In [4]:
df_grouped.columns

MultiIndex([('math', 'mean'),
            ('math',  'sum'),
            ('star',  'sum')],
           )

### 1.1 Use `get_level_values()`

In [5]:
# Select a specific level
df_grouped.columns.get_level_values(0)

Index(['math', 'math', 'star'], dtype='object')

In [6]:
df_grouped.columns = df_grouped.columns.get_level_values(0)

df_grouped

Unnamed: 0_level_0,math,math,star
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000,73.5,147,3
2001,62.5,125,7


### 1.2 use `to_flat_index()`

In [7]:
df_grouped = df.groupby('year').agg({ 'math': ['mean', 'sum'], 'star': 'sum'})
df_grouped

Unnamed: 0_level_0,math,math,star
Unnamed: 0_level_1,mean,sum,sum
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2000,73.5,147,3
2001,62.5,125,7


In [8]:
df_grouped.columns.to_flat_index()

Index([('math', 'mean'), ('math', 'sum'), ('star', 'sum')], dtype='object')

In [9]:
df_grouped.columns = df_grouped.columns.to_flat_index()

df_grouped

Unnamed: 0_level_0,"(math, mean)","(math, sum)","(star, sum)"
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000,73.5,147,3
2001,62.5,125,7


### 1.3 Join MultiIndex columns

In [10]:
df_grouped = df.groupby('year').agg({ 'math': ['mean', 'sum'], 'star': 'sum'})
df_grouped

Unnamed: 0_level_0,math,math,star
Unnamed: 0_level_1,mean,sum,sum
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2000,73.5,147,3
2001,62.5,125,7


In [11]:
# Get columns values
df_grouped.columns.values

array([('math', 'mean'), ('math', 'sum'), ('star', 'sum')], dtype=object)

In [12]:
# Join columns values with `_`
['_'.join(col) for col in df_grouped.columns.values]

['math_mean', 'math_sum', 'star_sum']

In [13]:
df_grouped.columns = ['_'.join(col) for col in df_grouped.columns.values]

df_grouped

Unnamed: 0_level_0,math_mean,math_sum,star_sum
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000,73.5,147,3
2001,62.5,125,7


## 2. Rows

In [14]:
multi_index = pd.MultiIndex.from_tuples([
  ('Oxford', 'A', '01-01-2022'), 
  ('Oxford', 'B', '01-01-2022'), 
  ('Oxford', 'A', '02-01-2022'),
  ('Oxford', 'B', '02-01-2022'),
  ('London', 'C', '01-01-2022'), 
  ('London', 'D', '01-01-2022'),
  ('London', 'C', '02-01-2022'),
  ('London', 'D', '02-01-2022')], 
  names=['Location','Store', 'Date']
)

data = {
  'Num_employee': [1,2,3,4,5,6,7,8],
  'Sales': [11,22,33,44,55,66,77,88]
}

df = pd.DataFrame(data, index=multi_index)

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Num_employee,Sales
Location,Store,Date,Unnamed: 3_level_1,Unnamed: 4_level_1
Oxford,A,01-01-2022,1,11
Oxford,B,01-01-2022,2,22
Oxford,A,02-01-2022,3,33
Oxford,B,02-01-2022,4,44
London,C,01-01-2022,5,55
London,D,01-01-2022,6,66
London,C,02-01-2022,7,77
London,D,02-01-2022,8,88


### 2.1 flatten all levels

In [15]:
#flatten every level of MultiIndex 
df.reset_index()

Unnamed: 0,Location,Store,Date,Num_employee,Sales
0,Oxford,A,01-01-2022,1,11
1,Oxford,B,01-01-2022,2,22
2,Oxford,A,02-01-2022,3,33
3,Oxford,B,02-01-2022,4,44
4,London,C,01-01-2022,5,55
5,London,D,01-01-2022,6,66
6,London,C,02-01-2022,7,77
7,London,D,02-01-2022,8,88


### 2.2 flatten a specific level(s)

In [16]:
#flatten 'Date' level only

df.reset_index(2)
# df.reset_index('Date')

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,Num_employee,Sales
Location,Store,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Oxford,A,01-01-2022,1,11
Oxford,B,01-01-2022,2,22
Oxford,A,02-01-2022,3,33
Oxford,B,02-01-2022,4,44
London,C,01-01-2022,5,55
London,D,01-01-2022,6,66
London,C,02-01-2022,7,77
London,D,02-01-2022,8,88


In [17]:
# Flatten 'Store' and 'Date'

df.reset_index([1, 2])
# df.reset_index(['Store', 'Date'])

Unnamed: 0_level_0,Store,Date,Num_employee,Sales
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Oxford,A,01-01-2022,1,11
Oxford,B,01-01-2022,2,22
Oxford,A,02-01-2022,3,33
Oxford,B,02-01-2022,4,44
London,C,01-01-2022,5,55
London,D,01-01-2022,6,66
London,C,02-01-2022,7,77
London,D,02-01-2022,8,88


### 2.3 Join Index labels

In [18]:
# Display Index
df.index

MultiIndex([('Oxford', 'A', '01-01-2022'),
            ('Oxford', 'B', '01-01-2022'),
            ('Oxford', 'A', '02-01-2022'),
            ('Oxford', 'B', '02-01-2022'),
            ('London', 'C', '01-01-2022'),
            ('London', 'D', '01-01-2022'),
            ('London', 'C', '02-01-2022'),
            ('London', 'D', '02-01-2022')],
           names=['Location', 'Store', 'Date'])

In [19]:
# Get index values
df.index.values

array([('Oxford', 'A', '01-01-2022'), ('Oxford', 'B', '01-01-2022'),
       ('Oxford', 'A', '02-01-2022'), ('Oxford', 'B', '02-01-2022'),
       ('London', 'C', '01-01-2022'), ('London', 'D', '01-01-2022'),
       ('London', 'C', '02-01-2022'), ('London', 'D', '02-01-2022')],
      dtype=object)

In [20]:
# Join index label with `_`
['_'.join(col) for col in df.index.values]

['Oxford_A_01-01-2022',
 'Oxford_B_01-01-2022',
 'Oxford_A_02-01-2022',
 'Oxford_B_02-01-2022',
 'London_C_01-01-2022',
 'London_D_01-01-2022',
 'London_C_02-01-2022',
 'London_D_02-01-2022']

In [21]:
df.index = ['_'.join(col) for col in df.index.values]
df

Unnamed: 0,Num_employee,Sales
Oxford_A_01-01-2022,1,11
Oxford_B_01-01-2022,2,22
Oxford_A_02-01-2022,3,33
Oxford_B_02-01-2022,4,44
London_C_01-01-2022,5,55
London_D_01-01-2022,6,66
London_C_02-01-2022,7,77
London_D_02-01-2022,8,88


## Thanks for reading

This is a Notebook for the medium article [How to flatten MultiIndex Columns and Rows in Pandas](https://bindichen.medium.com/how-to-flatten-multiindex-columns-and-rows-in-pandas-f5406c50e569)

Please check out article for instructions

**License**: [BSD 2-Clause](https://opensource.org/licenses/BSD-2-Clause)