In [1]:
import pandas as pd
import numpy as np
import os
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Models
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
df = pd.read_csv(r"D:\CSE(DataScience)\Customer_Churn_Prediction\Datasets\Telco-Customer-Churn-dataset.csv")

print("âœ… Dataset Loaded")
print("Shape before cleaning:", df.shape)

âœ… Dataset Loaded
Shape before cleaning: (7043, 21)


In [3]:
df.drop("customerID", axis=1, inplace=True)

In [4]:
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")

In [5]:
df.dropna(inplace=True)

print("Shape after cleaning:", df.shape)

Shape after cleaning: (7032, 20)


In [6]:
X = df.drop("Churn", axis=1)
y = df["Churn"]

In [7]:
X = pd.get_dummies(X, drop_first=True)

In [8]:
y = y.map({"Yes": 1, "No": 0})

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [10]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [11]:
save_dir = r"D:\CSE(DataScience)\Customer_Churn_Prediction\models"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

In [12]:
def evaluate_and_save_model(model, model_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    print("\n==============================")
    print(f"ðŸ“Š Model: {model_name}")
    print("==============================")
    print("âœ… Accuracy:", accuracy_score(y_test, y_pred))
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    
    # Save model
    save_path = os.path.join(save_dir, f"{model_name}.pkl")
    joblib.dump(model, save_path)
    print(f"ðŸ’¾ {model_name} saved at {save_path}")

In [13]:
evaluate_and_save_model(SVC(kernel='rbf', probability=True, random_state=42), "SVM")
evaluate_and_save_model(KNeighborsClassifier(n_neighbors=5), "KNN")
evaluate_and_save_model(LogisticRegression(max_iter=1000, random_state=42), "LogisticRegression")


ðŸ“Š Model: SVM
âœ… Accuracy: 0.7810945273631841

Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.89      0.86      1033
           1       0.62      0.47      0.53       374

    accuracy                           0.78      1407
   macro avg       0.72      0.68      0.69      1407
weighted avg       0.77      0.78      0.77      1407

Confusion Matrix:
 [[924 109]
 [199 175]]
ðŸ’¾ SVM saved at D:\CSE(DataScience)\Customer_Churn_Prediction\models\SVM.pkl

ðŸ“Š Model: KNN
âœ… Accuracy: 0.7526652452025586

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.84      0.83      1033
           1       0.54      0.51      0.52       374

    accuracy                           0.75      1407
   macro avg       0.68      0.67      0.68      1407
weighted avg       0.75      0.75      0.75      1407

Confusion Matrix:
 [[869 164]
 [184 190]]
ðŸ’¾ KNN saved at D:\CSE(DataScie