In [4]:
# ==============================================
# ENCODING METHODS DEMONSTRATION
# ==============================================

import pandas as pd
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from category_encoders import BinaryEncoder, TargetEncoder, HashingEncoder

# ----------------------------------------------
# 1️⃣ Create a sample dataset
# ----------------------------------------------
df = pd.DataFrame({
    'City': ['Nairobi', 'Mombasa', 'Kisumu', 'Nakuru', 'Nairobi', 'Kisumu'],
    'Education': ['Primary', 'Secondary', 'Tertiary', 'Tertiary', 'Secondary', 'Primary'],
    'Salary': [45000, 52000, 61000, 58000, 47000, 50000],
    'Churn': [0, 1, 0, 0, 1, 0]   # Target variable
})

print("=== Original Data ===")
print(df)
print("\n")

# ----------------------------------------------
# 2️⃣ Label Encoding
# ----------------------------------------------
print("=== LABEL ENCODING ===")
le = LabelEncoder()
df['City_Label'] = le.fit_transform(df['City'])
print(df[['City', 'City_Label']])
print("\n")

# ----------------------------------------------
# 3️⃣ One-Hot Encoding
# ----------------------------------------------
print("=== ONE-HOT ENCODING ===")
df_onehot = pd.get_dummies(df, columns=['City'], drop_first=False)
bool_cols = df_onehot.select_dtypes(bool).columns
df_onehot[bool_cols] = df_onehot[bool_cols].astype(int)

print(df_onehot.head())
print("\n")

# ----------------------------------------------
# 4️⃣ Ordinal Encoding
# ----------------------------------------------
print("=== ORDINAL ENCODING ===")
ord_enc = OrdinalEncoder(categories=[['Primary', 'Secondary', 'Tertiary']])
df['Education_Encoded'] = ord_enc.fit_transform(df[['Education']])
print(df[['Education', 'Education_Encoded']])
print("\n")

# ----------------------------------------------
# 5️⃣ Binary Encoding
# ----------------------------------------------
print("=== BINARY ENCODING ===")
bin_enc = BinaryEncoder(cols=['City'])
df_binary = bin_enc.fit_transform(df[['City']])
print(df_binary.head())
print("\n")

# ----------------------------------------------
# 6️⃣ Target Encoding
# ----------------------------------------------
print("=== TARGET ENCODING ===")
tgt_enc = TargetEncoder(cols=['City'])
df_target = tgt_enc.fit_transform(df, df['Churn'])
print(df_target[['City']].head())
print("\n")

# ----------------------------------------------
# 7️⃣ Frequency (Count) Encoding
# ----------------------------------------------
print("=== FREQUENCY ENCODING ===")
freq = df['City'].value_counts()
df['City_Freq'] = df['City'].map(freq)
print(df[['City', 'City_Freq']])
print("\n")

# ----------------------------------------------
# 8️⃣ Hash Encoding
# ----------------------------------------------
print("=== HASH ENCODING ===")
hash_enc = HashingEncoder(cols=['City'], n_components=4)
df_hash = hash_enc.fit_transform(df)
print(df_hash.head())
print("\n")

print("✅ All encoding examples completed successfully!")


=== Original Data ===
      City  Education  Salary  Churn
0  Nairobi    Primary   45000      0
1  Mombasa  Secondary   52000      1
2   Kisumu   Tertiary   61000      0
3   Nakuru   Tertiary   58000      0
4  Nairobi  Secondary   47000      1
5   Kisumu    Primary   50000      0


=== LABEL ENCODING ===
      City  City_Label
0  Nairobi           2
1  Mombasa           1
2   Kisumu           0
3   Nakuru           3
4  Nairobi           2
5   Kisumu           0


=== ONE-HOT ENCODING ===
   Education  Salary  Churn  City_Label  City_Kisumu  City_Mombasa  \
0    Primary   45000      0           2            0             0   
1  Secondary   52000      1           1            0             1   
2   Tertiary   61000      0           0            1             0   
3   Tertiary   58000      0           3            0             0   
4  Secondary   47000      1           2            0             0   

   City_Nairobi  City_Nakuru  
0             1            0  
1             0        