In [1]:
import time
import numpy as np
import pandas as pd
import matplotlib as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, f1_score, accuracy_score

In [2]:
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")

print(df.shape)
print(df.head())
print(df.dtypes)

(7043, 21)
   customerID  gender  SeniorCitizen Partner Dependents  tenure PhoneService  \
0  7590-VHVEG  Female              0     Yes         No       1           No   
1  5575-GNVDE    Male              0      No         No      34          Yes   
2  3668-QPYBK    Male              0      No         No       2          Yes   
3  7795-CFOCW    Male              0      No         No      45           No   
4  9237-HQITU  Female              0      No         No       2          Yes   

      MultipleLines InternetService OnlineSecurity  ... DeviceProtection  \
0  No phone service             DSL             No  ...               No   
1                No             DSL            Yes  ...              Yes   
2                No             DSL            Yes  ...               No   
3  No phone service             DSL            Yes  ...              Yes   
4                No     Fiber optic             No  ...               No   

  TechSupport StreamingTV StreamingMovies        Co

In [3]:
df["TotalCharges"].head()

0      29.85
1     1889.5
2     108.15
3    1840.75
4     151.65
Name: TotalCharges, dtype: object

In [7]:
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors='coerce')

In [9]:
df.dropna(inplace=True) 

In [11]:
df.shape

(7032, 21)

In [13]:
df.drop("customerID", axis=1, inplace=True)

In [15]:
df["Churn"] = df["Churn"].map({"Yes": 1, "No": 0})

In [17]:
df["Churn"].head()

0    0
1    0
2    1
3    0
4    1
Name: Churn, dtype: int64

In [19]:
df["Churn"].value_counts(normalize=True)

Churn
0    0.734215
1    0.265785
Name: proportion, dtype: float64

In [21]:
binary_cols = [
    "gender", "Partner", "Dependents",
    "PhoneService", "PaperlessBilling"
]

multi_service_cols = [
    "MultipleLines", "OnlineSecurity", "OnlineBackup",
    "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies"
]

multi_class_cols = [
    "InternetService", "Contract", "PaymentMethod"
]


In [23]:
for col in binary_cols:
    df[col] = df[col].map({"Yes": 1, "No": 0, "Male": 1, "Female": 0})


In [25]:
for col in multi_service_cols:
    df[col] = df[col].replace({"No internet service": "No", "No phone service": "No"})
    df[col] = df[col].map({"Yes": 1, "No": 0})


In [27]:
df = pd.get_dummies(df, columns=multi_class_cols, drop_first=True)

In [29]:
bool_cols = df.select_dtypes(include=["bool"]).columns
df[bool_cols] = df[bool_cols].astype(int)

In [31]:
print(df.dtypes)
df.head()  

gender                                     int64
SeniorCitizen                              int64
Partner                                    int64
Dependents                                 int64
tenure                                     int64
PhoneService                               int64
MultipleLines                              int64
OnlineSecurity                             int64
OnlineBackup                               int64
DeviceProtection                           int64
TechSupport                                int64
StreamingTV                                int64
StreamingMovies                            int64
PaperlessBilling                           int64
MonthlyCharges                           float64
TotalCharges                             float64
Churn                                      int64
InternetService_Fiber optic                int32
InternetService_No                         int32
Contract_One year                          int32
Contract_Two year   

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,MonthlyCharges,TotalCharges,Churn,InternetService_Fiber optic,InternetService_No,Contract_One year,Contract_Two year,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,0,1,0,1,0,0,0,1,0,...,29.85,29.85,0,0,0,0,0,0,1,0
1,1,0,0,0,34,1,0,1,0,1,...,56.95,1889.5,0,0,0,1,0,0,0,1
2,1,0,0,0,2,1,0,1,1,0,...,53.85,108.15,1,0,0,0,0,0,0,1
3,1,0,0,0,45,0,0,1,0,1,...,42.3,1840.75,0,0,0,1,0,0,0,0
4,0,0,0,0,2,1,0,0,0,0,...,70.7,151.65,1,1,0,0,0,0,1,0


In [33]:
X = df.drop("Churn", axis=1)
y = df["Churn"]


In [35]:
X = X.astype(np.float64)  
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [37]:
y_train = y_train.to_numpy().reshape(-1, 1).astype(np.float64)
y_test = y_test.to_numpy().reshape(-1, 1).astype(np.float64)

In [39]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [41]:
X_train_bias = np.c_[np.ones(X_train_scaled.shape[0], dtype=np.float64), X_train_scaled]
X_test_bias = np.c_[np.ones(X_test_scaled.shape[0], dtype=np.float64), X_test_scaled]

In [43]:
n_features_plus_1 = X_train_bias.shape[1]
weights_initial = np.random.rand(n_features_plus_1, 1).astype(np.float64) * 0.01

In [45]:
def sigmoid(z):
    z_clipped = np.clip(z, -250, 250)
    return 1 / (1 + np.exp(-z_clipped))

In [47]:
def compute_cost(X, y, W):
    m = X.shape[0]
    Z = np.dot(X, W)          
    y_pred = sigmoid(Z)        

    epsilon = 1e-9
    cost = - (1 / m) * np.sum(
        y * np.log(y_pred + epsilon) + (1 - y) * np.log(1 - y_pred + epsilon)
    )
    return cost

In [49]:
def gradient_descent(X, y, W, learning_rate=0.001, n_steps=10000, print_cost=True):
    X = X.astype(np.float64)
    y = y.astype(np.float64)
    W = W.astype(np.float64)
    m = X.shape[0]

    start_time = time.time()
    for i in range(n_steps):
        Z = np.dot(X, W)           
        y_pred = sigmoid(Z)        
        gradient = (1 / m) * np.dot(X.T, (y_pred - y))  # shape: (n_features+1,1)
        W -= learning_rate * gradient

        if print_cost and i % 400 == 0:
            c = compute_cost(X, y, W)
            print(f"Step {i}: Cost = {c:.6f}")
            
    end_time = time.time()
    training_time = end_time - start_time
    
    return W, training_time

In [51]:
weights_final, train_time = gradient_descent(
    X_train_bias, 
    y_train, 
    weights_initial,
    learning_rate=0.001, 
    n_steps=10000, 
    print_cost=True
)

Step 0: Cost = 0.697475
Step 400: Cost = 0.629748
Step 800: Cost = 0.585822
Step 1200: Cost = 0.555600
Step 1600: Cost = 0.533621
Step 2000: Cost = 0.516886
Step 2400: Cost = 0.503676
Step 2800: Cost = 0.492957
Step 3200: Cost = 0.484072
Step 3600: Cost = 0.476586
Step 4000: Cost = 0.470199
Step 4400: Cost = 0.464695
Step 4800: Cost = 0.459911
Step 5200: Cost = 0.455727
Step 5600: Cost = 0.452046
Step 6000: Cost = 0.448792
Step 6400: Cost = 0.445903
Step 6800: Cost = 0.443328
Step 7200: Cost = 0.441025
Step 7600: Cost = 0.438958
Step 8000: Cost = 0.437099
Step 8400: Cost = 0.435421
Step 8800: Cost = 0.433903
Step 9200: Cost = 0.432526
Step 9600: Cost = 0.431275


In [53]:
def predict(X, W, threshold=0.5):
    Z = np.dot(X, W)
    probs = sigmoid(Z)
    return (probs >= threshold).astype(int)

In [55]:
# y_pred_train = predict(X_train_bias, weights_final)
start_pred_time = time.time()
y_pred_test = predict(X_test_bias, weights_final)
end_pred_time = time.time()
prediction_time = (end_pred_time - start_pred_time) * 1000

In [57]:
cm = confusion_matrix(y_test, y_pred_test)
print("Confusion Matrix (sklearn):")
print(cm)

report = classification_report(y_test, y_pred_test)
print("\nClassification Report (Manual):")
print(report)

acc = accuracy_score(y_test, y_pred_test)
f1 = f1_score(y_test, y_pred_test)
print(f"\nAccuracy (manual): {acc:.4f}")
print(f"F1 Score (manual): {f1:.4f}")

print(f"\nTraining Time (manual): {train_time:.6f} seconds")
print(f"Prediction Time multiplied by 100 (manual): {prediction_time:.6f} seconds")

Confusion Matrix (sklearn):
[[897 136]
 [151 223]]

Classification Report (Manual):
              precision    recall  f1-score   support

         0.0       0.86      0.87      0.86      1033
         1.0       0.62      0.60      0.61       374

    accuracy                           0.80      1407
   macro avg       0.74      0.73      0.74      1407
weighted avg       0.79      0.80      0.79      1407


Accuracy (manual): 0.7960
F1 Score (manual): 0.6085

Training Time (manual): 1.311875 seconds
Prediction Time multiplied by 100 (manual): 1.149893 seconds
