In [31]:
from pandas.api.types import CategoricalDtype
import pandas as pd
from numpy import nan

df = pd.DataFrame(
        {
            'Name':['Rakib','Shakib','Ranak'],
            'Division':['Science','Commerce','Arts'],
            'Area':['Dhaka','nan','Sylhet'],
            'Grade':['A+','A-','A'],
            'Class':[8,10,7],
            'Marks':[86,64,76]
        }
    )
print(df)
print(df['Grade'])
df.info()

     Name  Division    Area Grade  Class  Marks
0   Rakib   Science   Dhaka    A+      8     86
1  Shakib  Commerce     nan    A-     10     64
2   Ranak      Arts  Sylhet     A      7     76
0    A+
1    A-
2     A
Name: Grade, dtype: object
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Name      3 non-null      object
 1   Division  3 non-null      object
 2   Area      3 non-null      object
 3   Grade     3 non-null      object
 4   Class     3 non-null      int64 
 5   Marks     3 non-null      int64 
dtypes: int64(2), object(4)
memory usage: 272.0+ bytes


In [32]:
ordered_levels={
    'Class':[7,8,10],
    'Grade':['A-','A','A+']
}

# Ordinal categories
for name, levels in ordered_levels.items():
    df[name] = df[name].astype(CategoricalDtype(levels,
                                                    ordered=True))

In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   Name      3 non-null      object  
 1   Division  3 non-null      object  
 2   Area      3 non-null      object  
 3   Grade     3 non-null      category
 4   Class     3 non-null      category
 5   Marks     3 non-null      int64   
dtypes: category(2), int64(1), object(3)
memory usage: 438.0+ bytes


In [34]:
df['Grade']

0    A+
1    A-
2     A
Name: Grade, dtype: category
Categories (3, object): [A- < A < A+]

## So this time A- < A < A+ is showing sorted, because we formated the 
## column as categoricalDtype(ordered=true)
## this is how my program knows which category is precous than others

In [35]:
df['Class']

0     8
1    10
2     7
Name: Class, dtype: category
Categories (3, int64): [7 < 8 < 10]

In [36]:
# Name is also nominal(of no use),
# Division and Area are also nominal without order
# but both are categorical,  name is not necessary at all
features_nom = ['Name','Division','Area']
for name in features_nom:
    df[name] = df[name].astype("category")
    # Add a None category for missing values
    if "None" not in df[name].cat.categories:
        df[name].cat.add_categories("None", inplace=True)

In [37]:
df['Area']

0     Dhaka
1       nan
2    Sylhet
Name: Area, dtype: category
Categories (4, object): [Dhaka, Sylhet, nan, None]

In [38]:
# you can see the above, that output is Categories (4, object): [Dhaka, Sylhet, nan, None], but not sorted