In [1]:
import pandas as pd

df = pd.read_csv("../data/processed/cleaned_data.csv")

In [2]:
df["Churn"] = df["Churn"].map({"Yes": 1, "No": 0})

In [3]:
df["tenure_group"] = pd.cut(
    df["tenure"],
    bins=[0, 12, 24, 48, 72],
    labels=["0-12", "12-24", "24-48", "48+"]
)

In [4]:
services = [
    "PhoneService", "OnlineSecurity", "OnlineBackup",
    "DeviceProtection", "TechSupport",
    "StreamingTV", "StreamingMovies"
]

df["service_count"] = (df[services] == "Yes").sum(axis=1)

In [5]:
df["tenure"] = df["tenure"].astype(int)
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
df["TotalCharges"] = df["TotalCharges"].fillna(0)

In [6]:
df["avg_revenue_per_month"] = df["TotalCharges"] / (df["tenure"] + 1)

In [7]:
contract_map = {
    "Month-to-month": 3,
    "One year": 2,
    "Two year": 1
}

df["contract_risk_score"] = df["Contract"].map(contract_map)

In [8]:
threshold = df["TotalCharges"].quantile(0.75)
df["high_value_customer"] = (df["TotalCharges"] >= threshold).astype(int)

df["auto_payment"] = df["PaymentMethod"].apply(
    lambda x: 1 if "automatic" in x.lower() else 0
)

In [9]:
df_model = pd.get_dummies(df, drop_first=True)

In [10]:
df_model.to_csv("../data/processed/model_ready_data.csv", index=False)