In [98]:
import numpy as np
import pandas as pd
print('Pandas version = ' + pd.__version__)

Pandas version = 1.5.3


In [99]:
# Create categorical ordered indices
cat_indx_1 = pd.CategoricalIndex(['Mustang','Corvette','Aventador'], categories=['Mustang','Corvette','Aventador'], ordered=True)
cat_indx_2 = pd.CategoricalIndex(['good','better','best'], categories=['good','better','best'], ordered=True)
cat_indx_3 = pd.CategoricalIndex(['one','two','three'], categories=['one','two','three'], ordered=True)

In [108]:
# Generate random DataFrame, the columns 'indx_l1':'indx_l3' will be
# filled with values selected from the respective indices,
# these columns will then form a multi-level index
nrows = 20
np.random.seed(seed=1)
dict1 = dict() #dictionary that will form the dataframe
dict1['indx_l1'] = [cat_indx_1[i] for i in np.random.randint(0,3,size=(nrows), )]
dict1['indx_l2'] = [cat_indx_2[i] for i in np.random.randint(0,3,size=(nrows), )]
dict1['indx_l3'] = [cat_indx_3[i] for i in np.random.randint(0,3,size=(nrows), )]
dict1['values'] = np.random.randn(nrows)
df1 = pd.DataFrame(dict1)
df1.set_index(['indx_l1', 'indx_l2', 'indx_l3'], inplace=True)
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,values
indx_l1,indx_l2,indx_l3,Unnamed: 3_level_1
Corvette,best,two,0.098371
Mustang,good,one,-0.369457
Mustang,better,one,-1.27322
Corvette,best,two,1.014987
Corvette,best,three,-1.48106
Mustang,good,two,-0.2871
Mustang,better,one,-0.056824
Corvette,better,three,-0.788105
Mustang,best,three,0.06771
Corvette,good,two,-1.038906


In [109]:
# For now, no index is categorical and ordered
for i in range(len(df1.index.levels)):
    print(df1.index.get_level_values(i))

Index(['Corvette', 'Mustang', 'Mustang', 'Corvette', 'Corvette', 'Mustang',
       'Mustang', 'Corvette', 'Mustang', 'Corvette', 'Mustang', 'Aventador',
       'Corvette', 'Aventador', 'Mustang', 'Aventador', 'Corvette',
       'Aventador', 'Mustang', 'Mustang'],
      dtype='object', name='indx_l1')
Index(['best', 'good', 'better', 'best', 'best', 'good', 'better', 'better',
       'best', 'good', 'best', 'better', 'better', 'better', 'better', 'best',
       'better', 'better', 'good', 'good'],
      dtype='object', name='indx_l2')
Index(['two', 'one', 'one', 'two', 'three', 'two', 'one', 'three', 'three',
       'two', 'two', 'two', 'one', 'one', 'two', 'one', 'three', 'one', 'one',
       'two'],
      dtype='object', name='indx_l3')


In [102]:
# So no ordering yet
df1.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,values
indx_l1,indx_l2,indx_l3,Unnamed: 3_level_1
Aventador,best,one,0.119345
Aventador,better,one,1.956677
Aventador,better,one,-0.761948
Aventador,better,two,0.538251
Corvette,best,three,-1.48106
Corvette,best,two,0.098371
Corvette,best,two,1.014987
Corvette,better,one,1.52826
Corvette,better,three,-0.788105
Corvette,better,three,-0.857465


In [110]:
# Set levels to each index level in the multi-index
df1.index = df1.index.set_levels([cat_indx_1, cat_indx_2, cat_indx_3], level=['indx_l1', 'indx_l2', 'indx_l3'])

In [104]:
df1.index

MultiIndex([( 'Corvette',   'good', 'three'),
            ('Aventador',   'best',   'one'),
            ('Aventador', 'better',   'one'),
            ( 'Corvette',   'good', 'three'),
            ( 'Corvette',   'good',   'two'),
            ('Aventador',   'best', 'three'),
            ('Aventador', 'better',   'one'),
            ( 'Corvette', 'better',   'two'),
            ('Aventador',   'good',   'two'),
            ( 'Corvette',   'best', 'three'),
            ('Aventador',   'good', 'three'),
            (  'Mustang', 'better', 'three'),
            ( 'Corvette', 'better',   'one'),
            (  'Mustang', 'better',   'one'),
            ('Aventador', 'better', 'three'),
            (  'Mustang',   'good',   'one'),
            ( 'Corvette', 'better',   'two'),
            (  'Mustang', 'better',   'one'),
            ('Aventador',   'best',   'one'),
            ('Aventador',   'best', 'three')],
           names=['indx_l1', 'indx_l2', 'indx_l3'])

In [111]:
# Now the indices are categorical and ordered
for i in range(len(df1.index.levels)):
    print(df1.index.get_level_values(i))

CategoricalIndex(['Corvette', 'Aventador', 'Aventador', 'Corvette', 'Corvette',
                  'Aventador', 'Aventador', 'Corvette', 'Aventador',
                  'Corvette', 'Aventador', 'Mustang', 'Corvette', 'Mustang',
                  'Aventador', 'Mustang', 'Corvette', 'Mustang', 'Aventador',
                  'Aventador'],
                 categories=['Mustang', 'Corvette', 'Aventador'], ordered=True, dtype='category', name='indx_l1')
CategoricalIndex(['good', 'best', 'better', 'good', 'good', 'best', 'better',
                  'better', 'good', 'best', 'good', 'better', 'better',
                  'better', 'better', 'good', 'better', 'better', 'best',
                  'best'],
                 categories=['good', 'better', 'best'], ordered=True, dtype='category', name='indx_l2')
CategoricalIndex(['three', 'one', 'one', 'three', 'two', 'three', 'one', 'two',
                  'two', 'three', 'three', 'three', 'one', 'one', 'three',
                  'one', 'two', 'one', '

In [112]:
# So the DataFrame should now order correctly
df1.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,values
indx_l1,indx_l2,indx_l3,Unnamed: 3_level_1
Mustang,good,one,0.119345
Mustang,better,one,1.956677
Mustang,better,one,-0.761948
Mustang,better,three,0.538251
Corvette,good,two,-1.48106
Corvette,good,three,0.098371
Corvette,good,three,1.014987
Corvette,better,one,1.52826
Corvette,better,two,-0.788105
Corvette,better,two,-0.857465
