In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load your dataset
data = pd.read_csv("/content/drive/MyDrive/Churn_Modelling.csv")

# Identify the target column
target_column = "Exited"

# Drop unnecessary columns
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

# Convert categorical variables to dummy variables
data = pd.get_dummies(data, columns=['Geography', 'Gender'], drop_first=True)

# Select features and target
X = data.drop(target_column, axis=1)  # Features
y = data[target_column]  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Logistic Regression
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
logreg_pred = logreg.predict(X_test)

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# Gradient Boosting
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_model.fit(X_train, y_train)
gb_pred = gb_model.predict(X_test)

# Evaluate Models
print("Gradient Boosting Accuracy:", accuracy_score(y_test, gb_pred))

# Confusion Matrix and Classification Report
print("Confusion Matrix (Gradient Boosting):\n", confusion_matrix(y_test, gb_pred))
print("Classification Report (Gradient Boosting):\n", classification_report(y_test, gb_pred))


Logistic Regression Accuracy: 0.811
Random Forest Accuracy: 0.8665
Gradient Boosting Accuracy: 0.8675
Confusion Matrix (Logistic Regression):
 [[1543   64]
 [ 314   79]]
Classification Report (Logistic Regression):
               precision    recall  f1-score   support

           0       0.83      0.96      0.89      1607
           1       0.55      0.20      0.29       393

    accuracy                           0.81      2000
   macro avg       0.69      0.58      0.59      2000
weighted avg       0.78      0.81      0.77      2000

