In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset
df = pd.read_csv("customer_churn_dataset.csv")

# Encode categorical variables
label_encoders = {}
categorical_features = ['Contract', 'PaymentMethod', 'InternetService']
for col in categorical_features:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Select features and target variable
X = df.drop(columns=['CustomerID', 'Churn'])
y = df['Churn']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train logistic regression model
log_reg = LogisticRegression()
param_grid = {'C': [0.01, 0.1, 1, 10], 'max_iter': [100, 200, 300]}
grid_search = GridSearchCV(log_reg, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, zero_division=1))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.845
Classification Report:
               precision    recall  f1-score   support

           0       0.84      1.00      0.92       169
           1       1.00      0.00      0.00        31

    accuracy                           0.84       200
   macro avg       0.92      0.50      0.46       200
weighted avg       0.87      0.84      0.77       200

Confusion Matrix:
 [[169   0]
 [ 31   0]]
