In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder

In [2]:
data={'Name':['Eduard', 'Arnav', 'Sophia', 'Jordan'],
      'Gender':['Male', 'Male', 'Female', 'Male'],
      'Age':[23, 26, 19, 29],
      'Degree':['Bachelors', 'Masters', 'High School', 'Doctoral']
      }

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Gender,Age,Degree
0,Eduard,Male,23,Bachelors
1,Arnav,Male,26,Masters
2,Sophia,Female,19,High School
3,Jordan,Male,29,Doctoral


In [3]:
# We will use OneHotEncoding for generating new columns such that no bias is induced.
# Remember that Gender is a nominal attribute.

df_Gender = pd.get_dummies(df[['Gender']], prefix="", prefix_sep="", dtype=int)
df = pd.concat([df, df_Gender], axis=1)

In [4]:
df

Unnamed: 0,Name,Gender,Age,Degree,Female,Male
0,Eduard,Male,23,Bachelors,0,1
1,Arnav,Male,26,Masters,0,1
2,Sophia,Female,19,High School,1,0
3,Jordan,Male,29,Doctoral,0,1


In [5]:
# For Degree, we can replace the value with numerical values, which can be ordered,
# For example, a 'Doctoral degree' is higher than 'Masters'.
# This corresponds to an ordinal attribute.

Encoder_Degree = OrdinalEncoder(categories=[['High School', 'Bachelors', 'Masters', 'Doctoral']], dtype=int)
df[['Degree_Encoded']] = Encoder_Degree.fit_transform(df[['Degree']])

In [6]:
df

Unnamed: 0,Name,Gender,Age,Degree,Female,Male,Degree_Encoded
0,Eduard,Male,23,Bachelors,0,1,1
1,Arnav,Male,26,Masters,0,1,2
2,Sophia,Female,19,High School,1,0,0
3,Jordan,Male,29,Doctoral,0,1,3


In [8]:
x = Encoder_Degree.inverse_transform(df[['Degree_Encoded']])
x

array([['Bachelors'],
       ['Masters'],
       ['High School'],
       ['Doctoral']], dtype=object)