In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
data = pd.read_csv(r"C:\Users\sssuj\Downloads\Churn_Modelling.csv")

In [3]:
data_cleaned = data.drop(columns=['RowNumber', 'CustomerId', 'Surname'])

In [4]:
label_encoder_gender = LabelEncoder()
label_encoder_geo = LabelEncoder()

In [5]:
data_cleaned['Gender'] = label_encoder_gender.fit_transform(data_cleaned['Gender'])
data_cleaned['Geography'] = label_encoder_geo.fit_transform(data_cleaned['Geography'])

In [6]:
X = data_cleaned.drop(columns=['Exited'])
y = data_cleaned['Exited']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train, y_train)
y_pred_logreg = logreg.predict(X_test)

In [9]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [10]:
gb = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb.fit(X_train, y_train)
y_pred_gb = gb.predict(X_test)

In [11]:
def evaluate_model(model_name, y_test, y_pred):
    print(f"Results for {model_name}:")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("="*60)

In [12]:
evaluate_model("Logistic Regression", y_test, y_pred_logreg)

Results for Logistic Regression:
Accuracy: 0.8005
Confusion Matrix:
 [[1573   34]
 [ 365   28]]
Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.98      0.89      1607
           1       0.45      0.07      0.12       393

    accuracy                           0.80      2000
   macro avg       0.63      0.53      0.51      2000
weighted avg       0.74      0.80      0.74      2000



In [13]:
evaluate_model("Random Forest", y_test, y_pred_rf)

Results for Random Forest:
Accuracy: 0.8645
Confusion Matrix:
 [[1546   61]
 [ 210  183]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.75      0.47      0.57       393

    accuracy                           0.86      2000
   macro avg       0.82      0.71      0.75      2000
weighted avg       0.85      0.86      0.85      2000



In [14]:
evaluate_model("Gradient Boosting", y_test, y_pred_gb)

Results for Gradient Boosting:
Accuracy: 0.8655
Confusion Matrix:
 [[1547   60]
 [ 209  184]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.75      0.47      0.58       393

    accuracy                           0.87      2000
   macro avg       0.82      0.72      0.75      2000
weighted avg       0.86      0.87      0.85      2000

