In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score


In [None]:
# Load dataset (manual parsing due to quoted rows)
df_raw = pd.read_csv(
    "../data/WA_Fn-UseC_-Telco-Customer-Churn.csv",
    header=None,
    engine="python"
)

df_raw[0] = df_raw[0].str.replace('"', '')
df = df_raw[0].str.split(",", expand=True)

df.columns = df.iloc[0]
df = df[1:].reset_index(drop=True)

# Convert numeric columns
numeric_cols = ["tenure", "MonthlyCharges", "TotalCharges", "SeniorCitizen"]

for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors="coerce")

df = df.dropna()

df.head()


In [None]:
pd.crosstab(df["Contract"], df["Churn"], normalize="index")


In [None]:
pd.crosstab(df["InternetService"], df["Churn"], normalize="index")


In [None]:
df.groupby("Churn")[["MonthlyCharges", "tenure"]].mean()


In [None]:
mean_monthly = df["MonthlyCharges"].mean()
mean_tenure = df["tenure"].mean()

df["risk_score"] = 0

df.loc[df["Contract"] == "Month-to-month", "risk_score"] += 1
df.loc[df["InternetService"] == "Fiber optic", "risk_score"] += 1
df.loc[df["MonthlyCharges"] > mean_monthly, "risk_score"] += 1
df.loc[df["tenure"] < mean_tenure, "risk_score"] += 1

df["is_high_risk"] = (df["risk_score"] >= 3).astype(int)

pd.crosstab(df["risk_score"], df["Churn"], normalize="index")


In [None]:
df["Churn"] = df["Churn"].map({"No": 0, "Yes": 1})

X = df.drop(["customerID", "Churn"], axis=1)
y = df["Churn"]

X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = LogisticRegression(max_iter=2000, class_weight="balanced")
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
coefficients = pd.DataFrame({
    "Feature": X.columns,
    "Coefficient": model.coef_[0]
})

coefficients.sort_values(by="Coefficient", ascending=False).head(10)
