In [5]:
import pandas as pd
import numpy as np


In [6]:
df = pd.read_csv('../data/feature_ready_churn_data.csv')
df.head()


Unnamed: 0,customer_id,gender,senior_citizen,tenure,contract_type,monthly_charges,total_charges,internet_service,streaming,security,tech_support,churn,tenure_group,high_monthly_charge,service_count,is_monthly_contract
0,7590-VHVEG,Female,0,1,Month-to-month,29.85,29.85,,0.0,0.0,0.0,0,0-6,0,0.0,1
1,5575-GNVDE,Male,0,34,One year,56.95,1889.5,,0.0,1.0,0.0,0,24+,0,1.0,0
2,3668-QPYBK,Male,0,2,Month-to-month,53.85,108.15,,0.0,1.0,0.0,1,0-6,0,1.0,1
3,7795-CFOCW,Male,0,45,One year,42.3,1840.75,,0.0,1.0,1.0,0,24+,0,2.0,0
4,9237-HQITU,Female,0,2,Month-to-month,70.7,151.65,,0.0,0.0,0.0,1,0-6,1,0.0,1


In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split


In [8]:
X = df[
    ['tenure',
     'high_monthly_charge',
     'service_count',
     'is_monthly_contract']
]

y = df['churn']

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [9]:
df['churn_probability'] = model.predict_proba(X)[:, 1]
df[['churn', 'churn_probability']].head()


Unnamed: 0,churn,churn_probability
0,0,0.37782
1,0,0.051987
2,1,0.346436
3,0,0.034617
4,1,0.691063


In [10]:
df['risk_level'] = pd.cut(
    df['churn_probability'],
    bins=[0, 0.3, 0.6, 1],
    labels=['Low', 'Medium', 'High']
)


In [11]:
df['risk_level'].value_counts()


risk_level
Low       4044
Medium    2180
High       819
Name: count, dtype: int64

In [12]:
median_revenue = df['monthly_charges'].median()
df['value_level'] = np.where(
    df['monthly_charges'] > median_revenue,
    'High',
    'Low'
)


In [13]:
df['segment'] = (
    df['risk_level'].astype(str)
    + ' Risk - '
    + df['value_level'].astype(str)
    + ' Value'
)


In [14]:
df['segment'].value_counts()


segment
Low Risk - Low Value        2401
Low Risk - High Value       1643
Medium Risk - Low Value     1127
Medium Risk - High Value    1053
High Risk - High Value       819
Name: count, dtype: int64

In [15]:
high_risk_high_value = df[
    (df['risk_level'] == 'High') &
    (df['value_level'] == 'High')
]

high_risk_high_value.shape


(819, 20)

In [16]:
monthly_revenue_at_risk = high_risk_high_value['monthly_charges'].sum()
monthly_revenue_at_risk


np.float64(68108.4)

In [17]:
estimated_monthly_savings = monthly_revenue_at_risk * 0.20
estimated_annual_savings = estimated_monthly_savings * 12

estimated_monthly_savings, estimated_annual_savings


(np.float64(13621.68), np.float64(163460.16))

In [18]:
retention_actions = {
    'High Risk - High Value': 'Personal call + customized discount',
    'High Risk - Low Value': 'Automated email offer',
    'Medium Risk - High Value': 'Loyalty reward',
    'Low Risk - High Value': 'Thank-you benefits',
    'Low Risk - Low Value': 'No immediate action'
}

df['recommended_action'] = df['segment'].map(retention_actions)
df[['segment', 'recommended_action']].head()


Unnamed: 0,segment,recommended_action
0,Medium Risk - Low Value,
1,Low Risk - Low Value,No immediate action
2,Medium Risk - Low Value,
3,Low Risk - Low Value,No immediate action
4,High Risk - High Value,Personal call + customized discount


In [19]:
df.to_csv('../data/churn_retention_strategy.csv', index=False)
