###  Import Required Libraries

In [1]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split 
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler

###  Load the Dataset

In [2]:
# Simulated customer churn dataset 
data = { 
    'Tenure (months)': [12, 24, np.nan, 36, 48, 6, 18, 30, np.nan, 40], 
    'Monthly Charges ($)': [70, 50, 60, 90, 80, 40, np.nan, 55, 65, 85], 
    'Churn (0 = No, 1 = Yes)': [0, 0, 1, 0, 0, 1, 1, 0, 1, 0] 
} 
 
df = pd.DataFrame(data) 
print("Original Dataset:\n", df) 

Original Dataset:
    Tenure (months)  Monthly Charges ($)  Churn (0 = No, 1 = Yes)
0             12.0                 70.0                        0
1             24.0                 50.0                        0
2              NaN                 60.0                        1
3             36.0                 90.0                        0
4             48.0                 80.0                        0
5              6.0                 40.0                        1
6             18.0                  NaN                        1
7             30.0                 55.0                        0
8              NaN                 65.0                        1
9             40.0                 85.0                        0


### Handling Missing Values using SimpleImputer

In [3]:
# Replace missing values with mean 
imputer = SimpleImputer(strategy='mean') 
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns) 
 
print("\nDataset after Handling Missing Values:\n", df_imputed)


Dataset after Handling Missing Values:
    Tenure (months)  Monthly Charges ($)  Churn (0 = No, 1 = Yes)
0            12.00            70.000000                      0.0
1            24.00            50.000000                      0.0
2            26.75            60.000000                      1.0
3            36.00            90.000000                      0.0
4            48.00            80.000000                      0.0
5             6.00            40.000000                      1.0
6            18.00            66.111111                      1.0
7            30.00            55.000000                      0.0
8            26.75            65.000000                      1.0
9            40.00            85.000000                      0.0


### Splitting Data into Training and Testing Sets

In [5]:
# Features and target variable 
X = df_imputed[['Tenure (months)', 'Monthly Charges ($)']] 
y = df_imputed['Churn (0 = No, 1 = Yes)'] 
 
# Split data (80% training, 20% testing) 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 
 
print("\nTraining Set Size:", X_train.shape) 
print("Testing Set Size:", X_test.shape)


Training Set Size: (8, 2)
Testing Set Size: (2, 2)


###  Feature Scaling using StandardScaler and MinMaxScaler 

In [6]:
# Standardization 
scaler_standard = StandardScaler() 
X_train_standardized = scaler_standard.fit_transform(X_train) 
X_test_standardized = scaler_standard.transform(X_test) 
 
# Normalization 
scaler_minmax = MinMaxScaler() 
X_train_normalized = scaler_minmax.fit_transform(X_train) 
X_test_normalized = scaler_minmax.transform(X_test) 
 
print("\nStandardized Data (First 3 rows):\n", X_train_standardized[:3]) 
print("\nNormalized Data (First 3 rows):\n", X_train_normalized[:3]) 


Standardized Data (First 3 rows):
 [[-1.568432   -1.81452491]
 [-1.12230023  0.1114573 ]
 [ 0.21609508 -0.8515338 ]]

Normalized Data (First 3 rows):
 [[0.         0.        ]
 [0.14285714 0.6       ]
 [0.57142857 0.3       ]]
