In [1]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

# Load the dataset
df = pd.read_csv('Churn_Modelling.csv')

# Preprocess the dataset (assuming 'Exited' is the target and other columns are features)
# Encoding categorical features
le = LabelEncoder()
df['Geography'] = le.fit_transform(df['Geography'])
df['Gender'] = le.fit_transform(df['Gender'])

# Selecting features and target
X = df.drop(columns=['RowNumber', 'CustomerId', 'Surname', 'Exited'])
y = df['Exited']

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# List of classifiers
classifiers = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "k-NN": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}

# Train and evaluate each classifier
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    # Evaluate performance
    print(f"Accuracy of {name}: {accuracy_score(y_test, y_pred):.2f}")
    # print(f"Classification Report for {name}:\n{classification_report(y_test, y_pred)}\n")



Accuracy of Logistic Regression: 0.82
Accuracy of Decision Tree: 0.78
Accuracy of Random Forest: 0.87
Accuracy of SVM: 0.86
Accuracy of k-NN: 0.84
Accuracy of Gradient Boosting: 0.87


In [3]:
# Evaluate Logistic Regression
clf_lr = classifiers["Logistic Regression"]
y_pred_lr = clf_lr.predict(X_test)

accuracy_lr = accuracy_score(y_test, y_pred_lr)
recall_lr = recall_score(y_test, y_pred_lr)

print(f"Accuracy of Logistic Regression: {accuracy_lr:.2f}")
print(f"Recall of Logistic Regression: {recall_lr:.2f}")
print(f"Classification Report for Logistic Regression:\n{classification_report(y_test, y_pred_lr)}")


Accuracy of Logistic Regression: 0.82
Recall of Logistic Regression: 0.18
Classification Report for Logistic Regression:
              precision    recall  f1-score   support

           0       0.83      0.97      0.89      1607
           1       0.60      0.18      0.28       393

    accuracy                           0.82      2000
   macro avg       0.71      0.58      0.59      2000
weighted avg       0.78      0.82      0.77      2000



In [4]:
# Evaluate Decision Tree
clf_dt = classifiers["Decision Tree"]
y_pred_dt = clf_dt.predict(X_test)

accuracy_dt = accuracy_score(y_test, y_pred_dt)
recall_dt = recall_score(y_test, y_pred_dt)

print(f"Accuracy of Decision Tree: {accuracy_dt:.2f}")
print(f"Recall of Decision Tree: {recall_dt:.2f}")
print(f"Classification Report for Decision Tree:\n{classification_report(y_test, y_pred_dt)}")


Accuracy of Decision Tree: 0.78
Recall of Decision Tree: 0.50
Classification Report for Decision Tree:
              precision    recall  f1-score   support

           0       0.87      0.84      0.86      1607
           1       0.44      0.50      0.47       393

    accuracy                           0.78      2000
   macro avg       0.66      0.67      0.66      2000
weighted avg       0.79      0.78      0.78      2000



In [5]:
# Evaluate Random Forest
clf_rf = classifiers["Random Forest"]
y_pred_rf = clf_rf.predict(X_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)

print(f"Accuracy of Random Forest: {accuracy_rf:.2f}")
print(f"Recall of Random Forest: {recall_rf:.2f}")
print(f"Classification Report for Random Forest:\n{classification_report(y_test, y_pred_rf)}")


Accuracy of Random Forest: 0.87
Recall of Random Forest: 0.47
Classification Report for Random Forest:
              precision    recall  f1-score   support

           0       0.88      0.97      0.92      1607
           1       0.78      0.47      0.59       393

    accuracy                           0.87      2000
   macro avg       0.83      0.72      0.75      2000
weighted avg       0.86      0.87      0.86      2000



In [6]:
# Evaluate Logistic Regression
clf_lr = classifiers["SVM"]
y_pred_lr = clf_lr.predict(X_test)

accuracy_lr = accuracy_score(y_test, y_pred_lr)
recall_lr = recall_score(y_test, y_pred_lr)

print(f"Accuracy of Logistic Regression: {accuracy_lr:.2f}")
print(f"Recall of Logistic Regression: {recall_lr:.2f}")
print(f"Classification Report for Logistic Regression:\n{classification_report(y_test, y_pred_lr)}")


Accuracy of Logistic Regression: 0.86
Recall of Logistic Regression: 0.37
Classification Report for Logistic Regression:
              precision    recall  f1-score   support

           0       0.86      0.98      0.92      1607
           1       0.80      0.37      0.50       393

    accuracy                           0.86      2000
   macro avg       0.83      0.67      0.71      2000
weighted avg       0.85      0.86      0.84      2000



In [7]:
# Evaluate Logistic Regression
clf_lr = classifiers["k-NN"]
y_pred_lr = clf_lr.predict(X_test)

accuracy_lr = accuracy_score(y_test, y_pred_lr)
recall_lr = recall_score(y_test, y_pred_lr)

print(f"Accuracy of Logistic Regression: {accuracy_lr:.2f}")
print(f"Recall of Logistic Regression: {recall_lr:.2f}")
print(f"Classification Report for Logistic Regression:\n{classification_report(y_test, y_pred_lr)}")


Accuracy of Logistic Regression: 0.84
Recall of Logistic Regression: 0.39
Classification Report for Logistic Regression:
              precision    recall  f1-score   support

           0       0.86      0.95      0.90      1607
           1       0.63      0.39      0.48       393

    accuracy                           0.84      2000
   macro avg       0.75      0.67      0.69      2000
weighted avg       0.82      0.84      0.82      2000



In [8]:
# Evaluate Logistic Regression
clf_lr = classifiers["Gradient Boosting"]
y_pred_lr = clf_lr.predict(X_test)

accuracy_lr = accuracy_score(y_test, y_pred_lr)
recall_lr = recall_score(y_test, y_pred_lr)

print(f"Accuracy of Logistic Regression: {accuracy_lr:.2f}")
print(f"Recall of Logistic Regression: {recall_lr:.2f}")
print(f"Classification Report for Logistic Regression:\n{classification_report(y_test, y_pred_lr)}")


Accuracy of Logistic Regression: 0.87
Recall of Logistic Regression: 0.47
Classification Report for Logistic Regression:
              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.75      0.47      0.58       393

    accuracy                           0.87      2000
   macro avg       0.82      0.72      0.75      2000
weighted avg       0.86      0.87      0.85      2000

