In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Create the DataFrame

df = pd.DataFrame({
    'Education': ['High School', 'Bachelor', 'Master', 'PhD', 'Bachelor', 'Master'],
    'Satisfaction': ['Neutral', 'Satisfied', 'Very Satisfied', 'Dissatisfied', 'Very Dissatisfied', 'Neutral']
})
df

Unnamed: 0,Education,Satisfaction
0,High School,Neutral
1,Bachelor,Satisfied
2,Master,Very Satisfied
3,PhD,Dissatisfied
4,Bachelor,Very Dissatisfied
5,Master,Neutral


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Education     6 non-null      object
 1   Satisfaction  6 non-null      object
dtypes: object(2)
memory usage: 228.0+ bytes


In [4]:
# Create the DataFrame
df = pd.DataFrame({
    'Education': ['High School', 'Bachelor', 'Master', 'PhD', 'Bachelor', 'Master'],
    'Satisfaction': ['Neutral', 'Satisfied', 'Very Satisfied', 'Dissatisfied', 'Very Dissatisfied', 'Neutral']
})

# Define ordered categorical types
education_order = pd.CategoricalDtype(categories=['High School', 'Bachelor', 'Master', 'PhD'], ordered=True)
satisfaction_order = pd.CategoricalDtype(categories=['Very Dissatisfied', 'Dissatisfied', 'Neutral', 'Satisfied', 'Very Satisfied'], ordered=True)

# Convert columns to ordered categorical types
df['Education'] = df['Education'].astype(education_order)
df['Satisfaction'] = df['Satisfaction'].astype(satisfaction_order)

df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   Education     6 non-null      category
 1   Satisfaction  6 non-null      category
dtypes: category(2)
memory usage: 560.0 bytes


In [5]:
df["Education_encoded"] = df["Education"].cat.codes
df["Satisfaction_encoded"] = df["Satisfaction"].cat.codes

df

Unnamed: 0,Education,Satisfaction,Education_encoded,Satisfaction_encoded
0,High School,Neutral,0,2
1,Bachelor,Satisfied,1,3
2,Master,Very Satisfied,2,4
3,PhD,Dissatisfied,3,1
4,Bachelor,Very Dissatisfied,1,0
5,Master,Neutral,2,2


In [6]:
from sklearn.preprocessing import OneHotEncoder

# Example DataFrame
df1 = pd.DataFrame({'Education': ['High School', 'Bachelor', 'Master', 'PhD', 'Bachelor', 'Master']})

# Define ordinal categories for OneHotEncoder
education_order = [['High School', 'Bachelor', 'Master', 'PhD']]

# Create OneHotEncoder instance with defined categories
ohe = OneHotEncoder(categories=education_order, sparse_output=False)

# Perform one-hot encoding
encoded_array = ohe.fit_transform(df1[['Education']])

# Convert the encoded array to a DataFrame
encoded_df = pd.DataFrame(encoded_array, columns=ohe.get_feature_names_out(['Education']))

# Concatenate the original DataFrame with the encoded DataFrame
df1 = pd.concat([df1, encoded_df], axis=1)

df1

Unnamed: 0,Education,Education_High School,Education_Bachelor,Education_Master,Education_PhD
0,High School,1.0,0.0,0.0,0.0
1,Bachelor,0.0,1.0,0.0,0.0
2,Master,0.0,0.0,1.0,0.0
3,PhD,0.0,0.0,0.0,1.0
4,Bachelor,0.0,1.0,0.0,0.0
5,Master,0.0,0.0,1.0,0.0


In [7]:
from sklearn.preprocessing import OrdinalEncoder

# Example DataFrame
df1 = pd.DataFrame({'Education': ['High School', 'Bachelor', 'Master', 'PhD', 'Bachelor', 'Master']})

# Define ordinal categories for OneHotEncoder
education_order = [['High School', 'Bachelor', 'Master', 'PhD']]

oe = OrdinalEncoder(categories=education_order)

df1["Education_encoded"] = oe.fit_transform(df1[["Education"]])

df1

Unnamed: 0,Education,Education_encoded
0,High School,0.0
1,Bachelor,1.0
2,Master,2.0
3,PhD,3.0
4,Bachelor,1.0
5,Master,2.0
