In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")


In [None]:
df=pd.read_csv('/content/Telco-Customer-Churn.csv')

In [None]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [None]:
# Convert TotalCharges to numeric
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")

# Fill missing TotalCharges with 0 (early tenure users)
df["TotalCharges"] = df["TotalCharges"].fillna(0)


In [None]:
# is_month_to_month
df["is_month_to_month"] = (df["Contract"] == "Month-to-month").astype(int)

# commitment_level
commitment_map = {
    "Month-to-month": 0,
    "One year": 1,
    "Two year": 2
}
df["commitment_level"] = df["Contract"].map(commitment_map)

# tenure_bucket
def tenure_bucket(t):
    if t <= 6:
        return "new"
    elif t <= 24:
        return "mid"
    else:
        return "long"

df["tenure_bucket"] = df["tenure"].apply(tenure_bucket)

# early_tenure_flag
df["early_tenure_flag"] = (df["tenure"] <= 6).astype(int)


In [None]:
# avg_monthly_value
df["avg_monthly_value"] = df["TotalCharges"] / (df["tenure"] + 1)

# charge_to_tenure_ratio
df["charge_to_tenure_ratio"] = df["MonthlyCharges"] / (df["tenure"] + 1)

# high_charge_low_tenure
median_charge = df["MonthlyCharges"].median()
df["high_charge_low_tenure"] = (
    (df["MonthlyCharges"] > median_charge) &
    (df["tenure"] <= 6)
).astype(int)

# value_mismatch_score
df["value_mismatch_score"] = df["MonthlyCharges"] / (df["avg_monthly_value"] + 1)


In [None]:
# support_gap
df["support_gap"] = (
    (df["TechSupport"] == "No") &
    (df["InternetService"] != "No")
).astype(int)

# no_support_high_cost
df["no_support_high_cost"] = (
    (df["support_gap"] == 1) &
    (df["MonthlyCharges"] > median_charge)
).astype(int)

# service_complexity_score
service_cols = [
    "OnlineSecurity",
    "OnlineBackup",
    "DeviceProtection",
    "StreamingTV",
    "StreamingMovies",
    "MultipleLines"
]

df["service_complexity_score"] = (df[service_cols] == "Yes").sum(axis=1)


In [None]:
# fiber_risk_flag
df["fiber_risk_flag"] = (df["InternetService"] == "Fiber optic").astype(int)

# legacy_service_flag
df["legacy_service_flag"] = (df["InternetService"] == "DSL").astype(int)

# addon_dependency_score
addon_cols = [
    "OnlineSecurity",
    "OnlineBackup",
    "DeviceProtection",
    "StreamingTV",
    "StreamingMovies"
]

df["addon_dependency_score"] = (df[addon_cols] == "Yes").sum(axis=1)


In [None]:
# manual_payment_flag
df["manual_payment_flag"] = df["PaymentMethod"].isin(
    ["Electronic check", "Mailed check"]
).astype(int)

# auto_payment_flag
df["auto_payment_flag"] = df["PaymentMethod"].isin(
    ["Bank transfer (automatic)", "Credit card (automatic)"]
).astype(int)

# payment_friction_score
df["payment_friction_score"] = (
    df["manual_payment_flag"] * df["is_month_to_month"]
)


In [None]:
# friction_score
df["friction_score"] = (
    df["early_tenure_flag"] +
    df["high_charge_low_tenure"] +
    df["support_gap"] +
    df["manual_payment_flag"] +
    df["fiber_risk_flag"]
)

# early_warning_flag (threshold = 3)
df["early_warning_flag"] = (df["friction_score"] >= 3).astype(int)


In [None]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,value_mismatch_score,no_support_high_cost,fiber_risk_flag,legacy_service_flag,addon_dependency_score,manual_payment_flag,auto_payment_flag,payment_friction_score,friction_score,early_warning_flag
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,1.874411,0,0,1,1,1,0,1,3,1
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,1.035724,0,0,1,2,1,0,0,2,0
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,1.453441,0,0,1,2,1,0,1,3,1
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,1.031297,0,0,1,2,0,1,0,0,0
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,1.371484,1,1,0,0,1,0,1,5,1


In [None]:
df[
    [
        "is_month_to_month",
        "commitment_level",
        "early_tenure_flag",
        "value_mismatch_score",
        "support_gap",
        "manual_payment_flag",
        "fiber_risk_flag",
        "friction_score",
        "early_warning_flag"
    ]
].head(10)


Unnamed: 0,is_month_to_month,commitment_level,early_tenure_flag,value_mismatch_score,support_gap,manual_payment_flag,fiber_risk_flag,friction_score,early_warning_flag
0,1,0,1,1.874411,1,1,0,3,1
1,0,1,0,1.035724,1,1,0,2,0
2,1,0,1,1.453441,1,1,0,3,1
3,0,1,0,1.031297,0,0,0,0,0
4,1,0,1,1.371484,1,1,1,5,1
5,1,0,0,1.081193,1,1,1,3,1
6,1,0,0,1.038988,1,0,1,2,0
7,1,0,0,1.045861,1,1,0,2,0
8,1,0,0,0.988342,0,1,1,2,0
9,0,1,0,0.996198,1,0,0,1,0


In [None]:
df.to_csv("churn_features.csv" , index=False)