In [40]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

In [56]:
df = pd.read_csv('/content/Churn_Modelling.csv')

In [58]:
df.drop(columns=['RowNumber', 'CustomerId', 'Surname'], inplace=True)
df.dropna(inplace=True)

In [59]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9998 entries, 0 to 10001
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CreditScore      9998 non-null   int64  
 1   Geography        9998 non-null   object 
 2   Gender           9998 non-null   object 
 3   Age              9998 non-null   float64
 4   Tenure           9998 non-null   int64  
 5   Balance          9998 non-null   float64
 6   NumOfProducts    9998 non-null   int64  
 7   HasCrCard        9998 non-null   float64
 8   IsActiveMember   9998 non-null   float64
 9   EstimatedSalary  9998 non-null   float64
 10  Exited           9998 non-null   int64  
dtypes: float64(5), int64(4), object(2)
memory usage: 937.3+ KB


In [60]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42.0,2,0.0,1,1.0,1.0,101348.88,1
1,608,Spain,Female,41.0,1,83807.86,1,0.0,1.0,112542.58,0
2,502,France,Female,42.0,8,159660.8,3,1.0,0.0,113931.57,1
3,699,France,Female,39.0,1,0.0,2,0.0,0.0,93826.63,0
5,645,Spain,Male,44.0,8,113755.78,2,1.0,0.0,149756.71,1


In [61]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

le_geo = LabelEncoder()
le_gender = LabelEncoder()

df['Geography'] = le_geo.fit_transform(df['Geography'].astype(str))
df['Gender'] = le_gender.fit_transform(df['Gender'].astype(str))

scaler = StandardScaler()
cols_to_scale = ['CreditScore', 'Balance', 'EstimatedSalary']
df[cols_to_scale] = scaler.fit_transform(df[cols_to_scale])

df.head()


Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,-0.326298,0,0,42.0,2,-1.22586,1,1.0,1.0,0.02172,1
1,-0.440137,2,0,41.0,1,0.117428,1,0.0,1.0,0.216366,0
2,-1.537125,0,0,42.0,8,1.333214,3,1.0,0.0,0.240519,1
3,0.501618,0,0,39.0,1,-1.22586,2,0.0,0.0,-0.109083,0
5,-0.057226,2,1,44.0,8,0.597439,2,1.0,0.0,0.863478,1


In [62]:
X = df.drop('Exited', axis=1)
y = df['Exited']

In [63]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [64]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [65]:
# Normal Model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)
print(f"Logistic Regression Model Accuracy : {accuracy_score(y_test, model.predict(X_test_scaled))}")

# L2
logreg_l2 = LogisticRegression(penalty='l2', C=1.0, solver='lbfgs', max_iter=1000)
logreg_l2.fit(X_train_scaled, y_train)
print(f"Logistic Regression Model with L2 Accuracy: {accuracy_score(y_test, logreg_l2.predict(X_test_scaled))}")


# L1
logreg_l1 = LogisticRegression(penalty='l1', C=1.0, solver='liblinear', max_iter=1000)
logreg_l1.fit(X_train_scaled, y_train)
print(f"Logistic Regression Model with L1 Accuracy: {accuracy_score(y_test, logreg_l1.predict(X_test_scaled))}")


# Elastic Net
logreg_en = LogisticRegression(penalty='elasticnet', l1_ratio=0.5, C=1.0, solver='saga', max_iter=1000)
logreg_en.fit(X_train_scaled, y_train)
print(f"Logistic Regression Model with Elastic Net Accuracy: {accuracy_score(y_test, logreg_en.predict(X_test_scaled))}")

Logistic Regression Model Accuracy : 0.797
Logistic Regression Model with L2 Accuracy: 0.797
Logistic Regression Model with L1 Accuracy: 0.797
Logistic Regression Model with Elastic Net Accuracy: 0.797


In [66]:
# Normal Model
knn_model = KNeighborsClassifier(n_neighbors=21)
knn_model.fit(X_train_scaled, y_train)
print(f"KNNs Model Accuracy : {accuracy_score(y_test, knn_model.predict(X_test_scaled))}")

# Varying Neighbours
print("Varying Neighbours Accuracies")
for k in [4, 8, 16, 32, 64, 128]:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train)
    acc = accuracy_score(y_test, knn.predict(X_test_scaled))
    print(f"k={k}, Accuracy={acc:.3f}")

# Manhattin Distance
knn_manhattan = KNeighborsClassifier(n_neighbors=5, metric='manhattan')
knn_manhattan.fit(X_train_scaled, y_train)

print("Manhattan KNN Accuracy:", accuracy_score(y_test, knn_manhattan.predict(X_test_scaled)))



KNNs Model Accuracy : 0.8365
Varying Neighbours Accuracies
k=4, Accuracy=0.837
k=8, Accuracy=0.831
k=16, Accuracy=0.831
k=32, Accuracy=0.824
k=64, Accuracy=0.823
k=128, Accuracy=0.812
Manhattan KNN Accuracy: 0.8295
