### 1.Import Dependencies

In [12]:
import os
import pandas as pd #alias
import numpy as np  #alias
import seaborn as sns
import matplotlib.pyplot as plt 

In [13]:
df = pd.read_csv('processed/ChurnModelling_Binning_Applied.csv')
df.head()

Unnamed: 0,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,CreditScoreBins
0,France,Female,42.0,2,0.0,1,1,1,101348.88,1,Fair
1,Spain,Female,41.0,1,83807.86,1,0,1,112542.58,0,Fair
2,France,Female,42.0,8,159660.8,3,1,0,113931.57,1,Poor
3,France,Female,38.91,1,0.0,2,0,0,93826.63,0,Good
4,Spain,Female,43.0,2,125510.82,1,1,1,79084.1,0,Excellent


## 🔢 Encoding Categorical Variables

| Variable Type | Preferred Encoding | Why?                                                                 |
|---------------|--------------------|----------------------------------------------------------------------|
| Nominal       | One-Hot Encoding   | No inherent order → avoids implying false ordinal relationships      |
| Ordinal       | Label Encoding     | Preserves order → small integers represent increasing levels         |


Gender -> Nominal <br>
Geography -> Nominal <br>
CreditScoreBins -> Ordinal

### 2.Nominal Variables

In [14]:
nominal_variables = ['Geography', 'Gender']

geography_dummies = pd.get_dummies(df['Geography'], prefix = 'Geography')
gender_dummies = pd.get_dummies(df['Gender'], prefix = 'Gender')

df_encoded = pd.concat([df, geography_dummies], axis = 1)
del df_encoded['Geography']

df_encoded = pd.concat([df_encoded, gender_dummies], axis = 1)
del df_encoded['Gender']
df_encoded.head()

Unnamed: 0,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,CreditScoreBins,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,42.0,2,0.0,1,1,1,101348.88,1,Fair,True,False,False,True,False
1,41.0,1,83807.86,1,0,1,112542.58,0,Fair,False,False,True,True,False
2,42.0,8,159660.8,3,1,0,113931.57,1,Poor,True,False,False,True,False
3,38.91,1,0.0,2,0,0,93826.63,0,Good,True,False,False,True,False
4,43.0,2,125510.82,1,1,1,79084.1,0,Excellent,False,False,True,True,False


### 2.Encode Ordinal Variables

In [15]:
encode_dict_creditscore = {'Poor': 0, 'Fair': 1, 'Good': 2, 'Very Good': 3, 'Excellent': 4}

df_encoded['CreditScoreBins'] = df_encoded['CreditScoreBins'].map(encode_dict_creditscore)
df_encoded.head()

Unnamed: 0,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,CreditScoreBins,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,42.0,2,0.0,1,1,1,101348.88,1,1,True,False,False,True,False
1,41.0,1,83807.86,1,0,1,112542.58,0,1,False,False,True,True,False
2,42.0,8,159660.8,3,1,0,113931.57,1,0,True,False,False,True,False
3,38.91,1,0.0,2,0,0,93826.63,0,2,True,False,False,True,False
4,43.0,2,125510.82,1,1,1,79084.1,0,4,False,False,True,True,False


In [16]:
df_encoded.to_csv('processed/ChurnModelling_Encoded.csv', index = False)