## Multi-Index Sorting in Pandas

In [1]:
# https://stackoverflow.com/questions/17242970/multi-index-sorting-in-pandas
import pandas as pd
data = {
  'Date': ['2013-01-01', '2013-01-01', '2013-01-01', '2013-01-01', '2013-01-02', '2013-01-02', '2013-01-02', '2013-01-02'],
  'Manufacturer' : ['Apple', 'Apple', 'Samsung', 'Samsung', 'Apple', 'Apple', 'Samsung', 'Samsung',],
  'Product Name' : ['iPod', 'iPad', 'Galaxy', 'Galaxy Tab', 'iPod', 'iPad', 'Galaxy', 'Galaxy Tab'], 
  'Product Launch Date' : ['2001-10-23', '2010-04-03', '2009-04-27', '2010-09-02','2001-10-23', '2010-04-03', '2009-04-27', '2010-09-02'],
  'Sales' : [12, 13, 14, 15, 22, 17, 10, 7]
}
df = pd.DataFrame(data, columns=['Date', 'Manufacturer', 'Product Name', 'Product Launch Date', 'Sales'])

df

Unnamed: 0,Date,Manufacturer,Product Name,Product Launch Date,Sales
0,2013-01-01,Apple,iPod,2001-10-23,12
1,2013-01-01,Apple,iPad,2010-04-03,13
2,2013-01-01,Samsung,Galaxy,2009-04-27,14
3,2013-01-01,Samsung,Galaxy Tab,2010-09-02,15
4,2013-01-02,Apple,iPod,2001-10-23,22
5,2013-01-02,Apple,iPad,2010-04-03,17
6,2013-01-02,Samsung,Galaxy,2009-04-27,10
7,2013-01-02,Samsung,Galaxy Tab,2010-09-02,7


In [2]:
df.groupby(['Manufacturer',
            'Product Name', 
            'Product Launch Date']
          ).sum().sort_index(level=['Manufacturer','Product Launch Date'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Sales
Manufacturer,Product Name,Product Launch Date,Unnamed: 3_level_1
Apple,iPod,2001-10-23,34
Apple,iPad,2010-04-03,30
Samsung,Galaxy,2009-04-27,24
Samsung,Galaxy Tab,2010-09-02,22


In [3]:
df.groupby(['Manufacturer',
            'Product Name', 
            'Product Launch Date']
          ).sum().sort_index(level=['Manufacturer','Product Launch Date'], ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Sales
Manufacturer,Product Name,Product Launch Date,Unnamed: 3_level_1
Samsung,Galaxy Tab,2010-09-02,22
Samsung,Galaxy,2009-04-27,24
Apple,iPad,2010-04-03,30
Apple,iPod,2001-10-23,34


In [4]:
grouped=df.groupby(['Manufacturer',
            'Product Name', 
            'Product Launch Date']
          )

In [5]:
grouped.sum().sort_index(level=['Manufacturer','Product Launch Date'], ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Sales
Manufacturer,Product Name,Product Launch Date,Unnamed: 3_level_1
Samsung,Galaxy Tab,2010-09-02,22
Samsung,Galaxy,2009-04-27,24
Apple,iPad,2010-04-03,30
Apple,iPod,2001-10-23,34


In [6]:
df=pd.DataFrame({'Magnitude Type':['MB', 'MWC', 'MWW', 'MWC', 'MWB'],
             'Depth':[100.0, 10.0, 21.0, 35.0, 45.0],
             'Magnitude':[5.6, 5.5, 6.0, 5.5, 5.6]})
df

Unnamed: 0,Magnitude Type,Depth,Magnitude
0,MB,100.0,5.6
1,MWC,10.0,5.5
2,MWW,21.0,6.0
3,MWC,35.0,5.5
4,MWB,45.0,5.6


In [7]:
df_multi = df.groupby(['Magnitude Type'])[['Depth', 'Magnitude']].agg(['mean', 'count', 'sum'])
df_multi

Unnamed: 0_level_0,Depth,Depth,Depth,Magnitude,Magnitude,Magnitude
Unnamed: 0_level_1,mean,count,sum,mean,count,sum
Magnitude Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
MB,100.0,1,100.0,5.6,1,5.6
MWB,45.0,1,45.0,5.6,1,5.6
MWC,22.5,2,45.0,5.5,2,11.0
MWW,21.0,1,21.0,6.0,1,6.0


In [8]:
# Find the MultiIndex levels
df_multi.columns

MultiIndex([(    'Depth',  'mean'),
            (    'Depth', 'count'),
            (    'Depth',   'sum'),
            ('Magnitude',  'mean'),
            ('Magnitude', 'count'),
            ('Magnitude',   'sum')],
           )

In [9]:
# Get a specific level
print(df_multi.columns.get_level_values(0))
print(df_multi.columns.get_level_values(1))

Index(['Depth', 'Depth', 'Depth', 'Magnitude', 'Magnitude', 'Magnitude'], dtype='object')
Index(['mean', 'count', 'sum', 'mean', 'count', 'sum'], dtype='object')


In [10]:
# Sort MultiIndex in Pandas
df_multi.sort_values(by=[('Depth', 'mean')], ascending=True)

Unnamed: 0_level_0,Depth,Depth,Depth,Magnitude,Magnitude,Magnitude
Unnamed: 0_level_1,mean,count,sum,mean,count,sum
Magnitude Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
MWW,21.0,1,21.0,6.0,1,6.0
MWC,22.5,2,45.0,5.5,2,11.0
MWB,45.0,1,45.0,5.6,1,5.6
MB,100.0,1,100.0,5.6,1,5.6


In [11]:
# Sort MultiIndex by multiple levels

df_multi.sort_values(by=[('Depth', 'mean'), ('Depth', 'sum')], ascending=True)

Unnamed: 0_level_0,Depth,Depth,Depth,Magnitude,Magnitude,Magnitude
Unnamed: 0_level_1,mean,count,sum,mean,count,sum
Magnitude Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
MWW,21.0,1,21.0,6.0,1,6.0
MWC,22.5,2,45.0,5.5,2,11.0
MWB,45.0,1,45.0,5.6,1,5.6
MB,100.0,1,100.0,5.6,1,5.6


In [12]:
# Sort MultiIndex by the level number
print(df_multi.columns[2])
df_multi.sort_values(by=[df_multi.columns[2]], ascending=False).head(5)

('Depth', 'sum')


Unnamed: 0_level_0,Depth,Depth,Depth,Magnitude,Magnitude,Magnitude
Unnamed: 0_level_1,mean,count,sum,mean,count,sum
Magnitude Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
MB,100.0,1,100.0,5.6,1,5.6
MWB,45.0,1,45.0,5.6,1,5.6
MWC,22.5,2,45.0,5.5,2,11.0
MWW,21.0,1,21.0,6.0,1,6.0


## How to Use set_index With MultiIndex Columns in Pandas

In [13]:
import pandas as pd

cols = pd.MultiIndex.from_tuples([('company A', 'rank'), ('company A', 'points'), ('company B', 'rank'), ('company B', 'points')])
df = pd.DataFrame([[1,2,3,4], [2,3, 3,4]], columns=cols)
df

Unnamed: 0_level_0,company A,company A,company B,company B
Unnamed: 0_level_1,rank,points,rank,points
0,1,2,3,4
1,2,3,3,4


In [14]:
df.columns

MultiIndex([('company A',   'rank'),
            ('company A', 'points'),
            ('company B',   'rank'),
            ('company B', 'points')],
           )

In [15]:
df.set_index([('company A', 'rank')])

Unnamed: 0_level_0,company A,company B,company B
Unnamed: 0_level_1,points,rank,points
"(company A, rank)",Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,2,3,4
2,3,3,4


In [16]:
df.set_index(df.columns[0])

Unnamed: 0_level_0,company A,company B,company B
Unnamed: 0_level_1,points,rank,points
"(company A, rank)",Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,2,3,4
2,3,3,4


In [17]:
df.set_index([('company A',   'rank'), ('company B',   'rank')])

Unnamed: 0_level_0,Unnamed: 1_level_0,company A,company B
Unnamed: 0_level_1,Unnamed: 1_level_1,points,points
"(company A, rank)","(company B, rank)",Unnamed: 2_level_2,Unnamed: 3_level_2
1,3,2,4
2,3,3,4


## How to Flatten a MultiIndex in Pandas

In [18]:
import pandas as pd

cols = pd.MultiIndex.from_tuples([('company', 'rank'), ('company', 'points')])
df = pd.DataFrame([[1,2], [3,4]], columns=cols)
df

Unnamed: 0_level_0,company,company
Unnamed: 0_level_1,rank,points
0,1,2
1,3,4


In [19]:
df.columns = df.columns.to_flat_index()
df

Unnamed: 0,"(company, rank)","(company, points)"
0,1,2
1,3,4


In [20]:
df.columns=[(item[0] + '_' + item[1]) for item in df.columns.get_level_values(0)]
df.columns

Index(['company_rank', 'company_points'], dtype='object')

In [21]:
# reset_index() can flatten hierarchical index on rows and/or columns
df.reset_index()

Unnamed: 0,index,company_rank,company_points
0,0,1,2
1,1,3,4


In [22]:
df.reset_index(drop=True)

Unnamed: 0,company_rank,company_points
0,1,2
1,3,4


In [23]:
df.T.reset_index(drop=True)


Unnamed: 0,0,1
0,1,3
1,2,4


In [24]:
df.reset_index(drop=True).T


Unnamed: 0,0,1
company_rank,1,3
company_points,2,4


In [25]:
df.T.reset_index(drop=True).T


Unnamed: 0,0,1
0,1,2
1,3,4


In [26]:
# Flatten MultiIndex in Pandas with list comprehension
print(df.columns.values)
df.columns = [col.replace('_', ' & ') for col in df.columns.values]
df

['company_rank' 'company_points']


Unnamed: 0,company & rank,company & points
0,1,2
1,3,4
