In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv("solved_data.csv")

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score

# -----------------------------
# 1. Load data
# -----------------------------
data = pd.read_csv("solved_data.csv")

# -----------------------------
# 2. Split into train/val/test
# -----------------------------
train_data, temp_data = train_test_split(data, train_size=2500, shuffle=True, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=500, shuffle=True, random_state=42)

# -----------------------------
# 3. Separate features and labels
# -----------------------------
X_train = train_data.drop(columns=["ChurnStatus", "CustomerID"])
y_train = train_data["ChurnStatus"]

X_val = val_data.drop(columns=["ChurnStatus", "CustomerID"])
y_val = val_data["ChurnStatus"]

X_test = test_data.drop(columns=["ChurnStatus", "CustomerID"])
y_test = test_data["ChurnStatus"]

# -----------------------------
# 4. Define degrees to test
# -----------------------------
degrees = [1, 2, 5, 9]  # 1 = linear, others = polynomial
results = {}

# -----------------------------
# 5. Train & Evaluate models
# -----------------------------
for deg in degrees:
    # Apply polynomial features (degree=1 is just linear)
    if deg == 1:
        X_train_poly = X_train
        X_val_poly = X_val
        X_test_poly = X_test
    else:
        poly = PolynomialFeatures(degree=deg, include_bias=False)
        X_train_poly = poly.fit_transform(X_train)
        X_val_poly = poly.transform(X_val)
        X_test_poly = poly.transform(X_test)
    
    # Train logistic regression
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train_poly, y_train)
    
    # Predict
    y_train_pred = model.predict(X_train_poly)
    y_val_pred = model.predict(X_val_poly)
    y_test_pred = model.predict(X_test_poly)
    
    # Metrics
    results[deg] = {
        "Train Accuracy": accuracy_score(y_train, y_train_pred),
        "Train Precision": precision_score(y_train, y_train_pred),
        "Train Recall": recall_score(y_train, y_train_pred),
        "Val Accuracy": accuracy_score(y_val, y_val_pred),
        "Val Precision": precision_score(y_val, y_val_pred),
        "Val Recall": recall_score(y_val, y_val_pred),
        "Test Accuracy": accuracy_score(y_test, y_test_pred),
        "Test Precision": precision_score(y_test, y_test_pred),
        "Test Recall": recall_score(y_test, y_test_pred)
    }

# -----------------------------
# 6. Display results
# -----------------------------
for deg, metrics in results.items():
    print(f"\n--- Logistic Regression (Degree {deg}) ---")
    for metric_name, value in metrics.items():
        print(f"{metric_name}: {value:.4f}")



--- Logistic Regression (Degree 1) ---
Train Accuracy: 0.9760
Train Precision: 0.7957
Train Recall: 0.6435
Val Accuracy: 0.9780
Val Precision: 0.7500
Val Recall: 0.6316
Test Accuracy: 0.9800
Test Precision: 0.8824
Test Recall: 0.6522

--- Logistic Regression (Degree 2) ---
Train Accuracy: 0.9816
Train Precision: 0.8416
Train Recall: 0.7391
Val Accuracy: 0.9780
Val Precision: 0.7222
Val Recall: 0.6842
Test Accuracy: 0.9840
Test Precision: 0.8571
Test Recall: 0.7826

--- Logistic Regression (Degree 5) ---
Train Accuracy: 0.9984
Train Precision: 0.9912
Train Recall: 0.9739
Val Accuracy: 0.9900
Val Precision: 0.7917
Val Recall: 1.0000
Test Accuracy: 0.9840
Test Precision: 0.8261
Test Recall: 0.8261

--- Logistic Regression (Degree 9) ---
Train Accuracy: 1.0000
Train Precision: 1.0000
Train Recall: 1.0000
Val Accuracy: 0.9880
Val Precision: 0.7826
Val Recall: 0.9474
Test Accuracy: 0.9820
Test Precision: 0.8500
Test Recall: 0.7391
