In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

file = "/content/CCP.csv"
data = pd.read_csv(file)

le = LabelEncoder()
data['Geography'] = le.fit_transform(data['Geography'])
data['Gender'] = le.fit_transform(data['Gender'])

X = data.drop(columns=['RowNumber', 'CustomerId', 'Surname', 'Exited'])
y = data['Exited']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)

rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

gb_model = GradientBoostingClassifier()
gb_model.fit(X_train, y_train)
gb_pred = gb_model.predict(X_test)

def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    return accuracy, precision, recall, f1

lr_metrics = evaluate_model(y_test, lr_pred)
rf_metrics = evaluate_model(y_test, rf_pred)
gb_metrics = evaluate_model(y_test, gb_pred)

print("Logistic Regression Metrics: ", lr_metrics)
print("Random Forest Metrics: ", rf_metrics)
print("Gradient Boosting Metrics: ", gb_metrics)
print("Classification Report for Gradient Boosting:")
print(classification_report(y_test, gb_pred))


Logistic Regression Metrics:  (0.8166666666666667, 0.6011904761904762, 0.17294520547945205, 0.26861702127659576)
Random Forest Metrics:  (0.8673333333333333, 0.7735294117647059, 0.4503424657534247, 0.5692640692640693)
Gradient Boosting Metrics:  (0.87, 0.783625730994152, 0.4589041095890411, 0.5788336933045356)
Classification Report for Gradient Boosting:
              precision    recall  f1-score   support

           0       0.88      0.97      0.92      2416
           1       0.78      0.46      0.58       584

    accuracy                           0.87      3000
   macro avg       0.83      0.71      0.75      3000
weighted avg       0.86      0.87      0.86      3000

