In [1]:
import time
import numpy as np
import pandas as pd
import matplotlib as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, f1_score, accuracy_score

In [2]:
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")

In [3]:
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors='coerce')

In [4]:
df.dropna(inplace=True) 

In [5]:
df.drop("customerID", axis=1, inplace=True)

In [6]:
df["Churn"] = df["Churn"].map({"Yes": 1, "No": 0})

In [7]:
binary_cols = [
    "gender", "Partner", "Dependents",
    "PhoneService", "PaperlessBilling"
]

multi_service_cols = [
    "MultipleLines", "OnlineSecurity", "OnlineBackup",
    "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies"
]

multi_class_cols = [
    "InternetService", "Contract", "PaymentMethod"
]


In [8]:
for col in binary_cols:
    df[col] = df[col].map({"Yes": 1, "No": 0, "Male": 1, "Female": 0})
for col in multi_service_cols:
    df[col] = df[col].replace({"No internet service": "No", "No phone service": "No"})
    df[col] = df[col].map({"Yes": 1, "No": 0})

In [17]:
df = pd.get_dummies(df, columns=multi_class_cols, drop_first=True)

In [19]:
print(df.dtypes)
df.head()  

gender                                     int64
SeniorCitizen                              int64
Partner                                    int64
Dependents                                 int64
tenure                                     int64
PhoneService                               int64
MultipleLines                              int64
OnlineSecurity                             int64
OnlineBackup                               int64
DeviceProtection                           int64
TechSupport                                int64
StreamingTV                                int64
StreamingMovies                            int64
PaperlessBilling                           int64
MonthlyCharges                           float64
TotalCharges                             float64
Churn                                      int64
InternetService_Fiber optic                 bool
InternetService_No                          bool
Contract_One year                           bool
Contract_Two year   

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,MonthlyCharges,TotalCharges,Churn,InternetService_Fiber optic,InternetService_No,Contract_One year,Contract_Two year,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,0,1,0,1,0,0,0,1,0,...,29.85,29.85,0,False,False,False,False,False,True,False
1,1,0,0,0,34,1,0,1,0,1,...,56.95,1889.5,0,False,False,True,False,False,False,True
2,1,0,0,0,2,1,0,1,1,0,...,53.85,108.15,1,False,False,False,False,False,False,True
3,1,0,0,0,45,0,0,1,0,1,...,42.3,1840.75,0,False,False,True,False,False,False,False
4,0,0,0,0,2,1,0,0,0,0,...,70.7,151.65,1,True,False,False,False,False,True,False


In [21]:
X = df.drop("Churn", axis=1)  
y = df["Churn"]              
# y.shape

In [23]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y 
)

In [25]:
lr = LogisticRegression(max_iter=10000)
start_time = time.time()
lr.fit(X_train, y_train)
end_time = time.time()
train_time_sklearn = end_time - start_time

In [26]:
start_time = time.time()
y_pred_sklearn = lr.predict(X_test)
end_time = time.time()
prediction_time_sklearn = (end_time - start_time)*100

In [29]:
cm = confusion_matrix(y_test, y_pred_sklearn)
print("Confusion Matrix (sklearn):")
print(cm)

report = classification_report(y_test, y_pred_sklearn)
print("\nClassification Report (sklearn):")
print(report)

acc = accuracy_score(y_test, y_pred_sklearn)
f1 = f1_score(y_test, y_pred_sklearn)
print(f"\nAccuracy (sklearn): {acc:.4f}")
print(f"F1 Score (sklearn): {f1:.4f}")

print(f"\nTraining Time (sklearn): {train_time_sklearn:.6f} seconds")
print(f"Prediction Time multiplied by 100 (sklearn) : {prediction_time_sklearn:.6f} seconds")

Confusion Matrix (sklearn):
[[917 116]
 [158 216]]

Classification Report (sklearn):
              precision    recall  f1-score   support

           0       0.85      0.89      0.87      1033
           1       0.65      0.58      0.61       374

    accuracy                           0.81      1407
   macro avg       0.75      0.73      0.74      1407
weighted avg       0.80      0.81      0.80      1407


Accuracy (sklearn): 0.8053
F1 Score (sklearn): 0.6119

Training Time (sklearn): 1.191926 seconds
Prediction Time multiplied by 100 (sklearn) : 0.180697 seconds
