### Import Required Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer

### Load Data

In [None]:
data = {
    "CustomerID": [1,2,3,4,5,6,7,8,9,10],
    "Income": [45000, 52000, 38000, np.nan, 60000, 72000, 31000, 80000, 40000, 55000],
    "LoanAmount": [15000, 20000, 18000, 25000, 22000, np.nan, 12000, 30000, 15000, 18000],
    "CreditScore": [720, 680, np.nan, 650, 700, 740, 610, 770, 680, 710],
    "EmploymentStatus": ["Employed","Self-Employed","Employed","Unemployed",
                         "Employed","Employed","Self-Employed","Employed","Unemployed","Employed"],
    "DebtToIncome": [0.25,0.35,0.40,0.55,0.30,0.20,0.50,np.nan,0.45,0.28],
    "Default": [0,0,1,1,0,0,1,0,1,0]
}

df = pd.DataFrame(data)
print("Original Data:\n", df)

Original Data:
    CustomerID   Income  LoanAmount  CreditScore EmploymentStatus  \
0           1  45000.0     15000.0        720.0         Employed   
1           2  52000.0     20000.0        680.0    Self-Employed   
2           3  38000.0     18000.0          NaN         Employed   
3           4      NaN     25000.0        650.0       Unemployed   
4           5  60000.0     22000.0        700.0         Employed   
5           6  72000.0         NaN        740.0         Employed   
6           7  31000.0     12000.0        610.0    Self-Employed   
7           8  80000.0     30000.0        770.0         Employed   
8           9  40000.0     15000.0        680.0       Unemployed   
9          10  55000.0     18000.0        710.0         Employed   

   DebtToIncome  Default  
0          0.25        0  
1          0.35        0  
2          0.40        1  
3          0.55        1  
4          0.30        0  
5          0.20        0  
6          0.50        1  
7           NaN    

### Handle Missing Values with SimpleImputer

In [3]:
# Imputer for numerical (mean strategy)
imputer = SimpleImputer(strategy='mean')
df[['Income', 'LoanAmount', 'CreditScore', 'DebtToIncome']] = imputer.fit_transform(df[['Income', 'LoanAmount', 'CreditScore', 'DebtToIncome']])

print("\nDataset after Handling Missing Values:\n", df)


Dataset after Handling Missing Values:
    CustomerID        Income    LoanAmount  CreditScore EmploymentStatus  \
0           1  45000.000000  15000.000000   720.000000         Employed   
1           2  52000.000000  20000.000000   680.000000    Self-Employed   
2           3  38000.000000  18000.000000   695.555556         Employed   
3           4  52555.555556  25000.000000   650.000000       Unemployed   
4           5  60000.000000  22000.000000   700.000000         Employed   
5           6  72000.000000  19444.444444   740.000000         Employed   
6           7  31000.000000  12000.000000   610.000000    Self-Employed   
7           8  80000.000000  30000.000000   770.000000         Employed   
8           9  40000.000000  15000.000000   680.000000       Unemployed   
9          10  55000.000000  18000.000000   710.000000         Employed   

   DebtToIncome  Default  
0      0.250000        0  
1      0.350000        0  
2      0.400000        1  
3      0.550000        1 

### Encode Categorical Variables

In [4]:
le = LabelEncoder()
df['EmploymentStatus'] = le.fit_transform(df['EmploymentStatus'])

### Train-Test Split

In [5]:
X = df.drop(['CustomerID', 'Default'], axis=1)
y = df['Default']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)

Train shape: (7, 5)
Test shape: (3, 5)


### Normalize Features

In [8]:
scaler_standard = StandardScaler() 
X_train_standardized = scaler_standard.fit_transform(X_train) 
X_test_standardized = scaler_standard.transform(X_test) 
 
print("\nStandardized Data (First 3 rows):\n", X_train_standardized[:3]) 


Standardized Data (First 3 rows):
 [[-0.44803943 -0.8799638   0.55570944 -0.58834841 -1.21667579]
 [ 1.90978145  1.75992759  1.61021832 -0.58834841 -0.12695747]
 [-0.91960361 -0.35198552  0.04017177 -0.58834841  0.21159579]]
