One Hot Encoder

In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Sample dataset
data = {
    "customer_id": [1, 2, 3, 4],
    "gender": ["Male", "Female", "Female", "Male"],
    "city": ["Hyderabad", "pune", "Banglore", "mumbai"]
}

# Convert data to a DataFrame
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
one_hot_encoder = OneHotEncoder(sparse_output=False)
columns_to_encode = ["gender", "city"]
encoded_data = one_hot_encoder.fit_transform(df[columns_to_encode])
encoded_columns = one_hot_encoder.get_feature_names_out(columns_to_encode)
encoded_df = pd.DataFrame(encoded_data, columns=encoded_columns)
final_df = pd.concat([df.drop(columns=columns_to_encode), encoded_df], axis=1)
print("\nOne-hot Encoded Dataframe with sklearn:")
print(final_df)

Original DataFrame:
   customer_id  gender       city
0            1    Male  Hyderabad
1            2  Female       pune
2            3  Female   Banglore
3            4    Male     mumbai

One-hot Encoded Dataframe with sklearn:
   customer_id  gender_Female  gender_Male  city_Banglore  city_Hyderabad  \
0            1            0.0          1.0            0.0             1.0   
1            2            1.0          0.0            0.0             0.0   
2            3            1.0          0.0            1.0             0.0   
3            4            0.0          1.0            0.0             0.0   

   city_mumbai  city_pune  
0          0.0        0.0  
1          0.0        1.0  
2          0.0        0.0  
3          1.0        0.0  


Label Encoder

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Sample dataset
data = {
    "customer_id": [1, 2, 3, 4],
    "gender": ["Male", "Female", "Female", "Male"],
    "city": ["Hyderabad", "pune", "Banglore", "mumbai"]
}

# Convert data to a DataFrame
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Apply label encoding
label_encoders = {}
for column in ["gender", "city"]:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

print("\nLabel Encoded DataFrame:")
print(df)


Original DataFrame:
   customer_id  gender       city
0            1    Male  Hyderabad
1            2  Female       pune
2            3  Female   Banglore
3            4    Male     mumbai

Label Encoded DataFrame:
   customer_id  gender  city
0            1       1     1
1            2       0     3
2            3       0     0
3            4       1     2


Ordinal Encoding

In [None]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder

# Sample dataset
data = {
    "customer_id": [1, 2, 3, 4],
    "gender": ["Male", "Female", "Female", "Male"],
    "city": ["Hyderabad", "pune", "Banglore", "mumbai"]
}

# Convert data to a DataFrame
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Define ordinal encoding
ordinal_encoder = OrdinalEncoder()
encoded_data = ordinal_encoder.fit_transform(df[["gender", "city"]])
df[["gender", "city"]] = encoded_data

print("\nOrdinal Encoded DataFrame:")
print(df)


Original DataFrame:
   customer_id  gender       city
0            1    Male  Hyderabad
1            2  Female       pune
2            3  Female   Banglore
3            4    Male     mumbai

Ordinal Encoded DataFrame:
   customer_id  gender  city
0            1     1.0   1.0
1            2     0.0   3.0
2            3     0.0   0.0
3            4     1.0   2.0


Mean Encoding

In [None]:
import pandas as pd

# Sample dataset
data = {
    "customer_id": [1, 2, 3, 4],
    "gender": ["Male", "Female", "Female", "Male"],
    "city": ["Hyderabad", "pune", "Banglore", "mumbai"],
    "purchased": [1, 0, 1, 0]  # Example target variable
}

# Convert data to a DataFrame
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Perform target encoding
for column in ["gender", "city"]:
    target_means = df.groupby(column)["purchased"].mean()
    df[column] = df[column].map(target_means)

print("\nTarget Encoded DataFrame:")
print(df)


Original DataFrame:
   customer_id  gender       city  purchased
0            1    Male  Hyderabad          1
1            2  Female       pune          0
2            3  Female   Banglore          1
3            4    Male     mumbai          0

Target Encoded DataFrame:
   customer_id  gender  city  purchased
0            1     0.5   1.0          1
1            2     0.5   0.0          0
2            3     0.5   1.0          1
3            4     0.5   0.0          0
