In [19]:
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Betty'],
    'Age': [25, 32, 27, 35, 40, 34],
    'Gender': ['F', 'M', 'M', 'M', 'F', 'F'],
    'City': ['New York', 'London', 'Paris', 'London', 'Sydney', 'New York'],
    'Education': ['High School', 'Bachelor', 'Master', 'Bachelor', 'PhD', 'Master']
    
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Gender,City,Education
0,Alice,25,F,New York,High School
1,Bob,32,M,London,Bachelor
2,Charlie,27,M,Paris,Master
3,David,35,M,London,Bachelor
4,Eve,40,F,Sydney,PhD
5,Betty,34,F,New York,Master


## Label Encoding 
Label encoding is a technique used to convert categorical variables into numerical form. It assigns a unique numeric value to each category. This is a simple and efficient way to encode categorical data, but it can introduce an ordinal relationship between categories, which may not always be desirable.

In [11]:
df['Gender'] = df['Gender'].map({'M':0, 'F':1})
df

Unnamed: 0,Name,Age,Gender,City,Performance
0,Alice,25,1,New York,High
1,Bob,32,0,London,Low
2,Charlie,27,0,Paris,Med
3,David,35,0,London,High
4,Eve,40,1,Sydney,Low
5,Betty,34,1,New York,Med


In [12]:
from sklearn.preprocessing import LabelEncoder

# Create a label encoder object
le = LabelEncoder()

# Encode the 'Gender' column
df['Gender'] = le.fit_transform(df['Gender'])
df

Unnamed: 0,Name,Age,Gender,City,Performance
0,Alice,25,1,New York,High
1,Bob,32,0,London,Low
2,Charlie,27,0,Paris,Med
3,David,35,0,London,High
4,Eve,40,1,Sydney,Low
5,Betty,34,1,New York,Med


## One-Hot Encoding 
One-hot encoding is another technique for encoding categorical variables. It creates a new binary column for each unique category in the original column. This encoding preserves the categorical nature of the variable and does not introduce any ordinal relationship between categories.

![one_hot](https://labcontent.simplicdn.net/data-content/content-assets/Data_and_AI/Applied_Machine_Learning/Images/0.4_Feature_Engineering/Trainer_PPT_and_IPYNB/one_hot.png)

In [None]:
# method 1 - pandas
df = pd.get_dummies(df, columns=['City'], dtype=int)
df

In [15]:
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

# Create a one-hot encoder object
ohe = OneHotEncoder()

# Encode the 'City' column
encoded_cities = ohe.fit_transform(df[['City']]).toarray()

# Convert the encoded data to a dataframe
encoded_cities_df = pd.DataFrame(encoded_cities, columns=ohe.get_feature_names_out(['City']))


# Concatenate the original dataframe with the encoded cities
df = pd.concat([df, encoded_cities_df], axis=1)
df

Unnamed: 0,Name,Age,Gender,City,Performance,City_London,City_New York,City_Paris,City_Sydney
0,Alice,25,F,New York,High,0.0,1.0,0.0,0.0
1,Bob,32,M,London,Low,1.0,0.0,0.0,0.0
2,Charlie,27,M,Paris,Med,0.0,0.0,1.0,0.0
3,David,35,M,London,High,1.0,0.0,0.0,0.0
4,Eve,40,F,Sydney,Low,0.0,0.0,0.0,1.0
5,Betty,34,F,New York,Med,0.0,1.0,0.0,0.0


## Ordinal Encoding
Ordinal encoding is another technique used for encoding categorical variables that have an inherent order or ranking. This encoding assigns a unique numerical value to each category based on its ordinal position or rank.

In [21]:
from sklearn.preprocessing import OrdinalEncoder

# Create an ordinal encoder object
ordinal_encoder = OrdinalEncoder(categories=[['High School', 'Bachelor', 'Master', 'PhD']])

# Encode the 'Education' column
df['Education_Encoded'] = ordinal_encoder.fit_transform(df[['Education']]).astype(int)
df

Unnamed: 0,Name,Age,Gender,City,Education,Education_Encoded
0,Alice,25,F,New York,High School,0
1,Bob,32,M,London,Bachelor,1
2,Charlie,27,M,Paris,Master,2
3,David,35,M,London,Bachelor,1
4,Eve,40,F,Sydney,PhD,3
5,Betty,34,F,New York,Master,2


In [17]:
from pandas.api.types import CategoricalDtype

# Create an ordered category type
education_order = ['High School', 'Bachelor', 'Master', 'PhD']
education_cat_type = CategoricalDtype(categories=education_order, ordered=True)

# Encode the 'Education' column
df['Education'] = df['Education'].astype(education_cat_type)

# Convert the encoded data to numerical values
df['Education'] = df['Education'].cat.codes
df

Unnamed: 0,Name,Age,Gender,City,Education
0,Alice,25,F,New York,0
1,Bob,32,M,London,1
2,Charlie,27,M,Paris,2
3,David,35,M,London,1
4,Eve,40,F,Sydney,3
5,Betty,34,F,New York,2
