In [None]:
import pandas as pd

df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")
df.head(), df.shape


In [None]:
df["Churn"].value_counts()


In [None]:
df["Churn"] = df["Churn"].map({"Yes": 1, "No": 0})


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8,5))
plt.hist(df[df["Churn"]==1]["tenure"], bins=30, alpha=0.7, label="Churned")
plt.hist(df[df["Churn"]==0]["tenure"], bins=30, alpha=0.7, label="Stayed")
plt.legend()
plt.xlabel("Tenure (months)")
plt.ylabel("Customers")
plt.title("Churn vs Tenure")
plt.show()

In [None]:
plt.figure(figsize=(8,5))
plt.boxplot([
    df[df["Churn"]==0]["MonthlyCharges"],
    df[df["Churn"]==1]["MonthlyCharges"]
], labels=["Stayed", "Churned"])
plt.ylabel("Monthly Charges")
plt.title("Monthly Charges vs Churn")
plt.show()


In [None]:
pd.crosstab(df["Contract"], df["Churn"], normalize="index")

In [None]:
pd.crosstab(df["InternetService"], df["Churn"], normalize="index")

In [None]:
pd.crosstab(df["PaymentMethod"], df["Churn"], normalize="index")

In [None]:
df = df.drop("customerID", axis=1)
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")

df = df.dropna()

In [None]:
X = df.drop("Churn", axis=1)
y = df["Churn"]

X = pd.get_dummies(X, drop_first=True)


In [None]:
X.shape

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import classification_report, roc_auc_score

y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:,1]

print(classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_prob))

In [None]:
import pandas as pd

feature_importance = pd.DataFrame({
    "Feature": X.columns,
    "Coefficient": model.coef_[0]
})

# Sort by absolute impact
feature_importance["Impact"] = feature_importance["Coefficient"].abs()
feature_importance = feature_importance.sort_values("Impact", ascending=False)

feature_importance.head(10)

In [None]:
df["Churn_Probability"] = model.predict_proba(X)[:,1]
df[["tenure","MonthlyCharges","Contract","InternetService","PaymentMethod","Churn_Probability"]].head()

In [None]:
high_risk = df[df["Churn_Probability"] > 0.7]

high_risk.shape

In [None]:
high_risk[["tenure","MonthlyCharges","Contract","InternetService","PaymentMethod","Churn_Probability"]].head(10)


In [None]:
lost_revenue = high_risk["MonthlyCharges"].sum()
lost_revenue

In [None]:
high_risk["Contract"].value_counts()

In [None]:
high_risk["InternetService"].value_counts()

In [None]:
import pickle

with open("churn_model.pkl", "wb") as f:
    pickle.dump(model, f)

In [None]:
from google.colab import drive
drive.mount('/content/drive')