In [1]:
import pandas as pd
import numpy as np

np.random.seed(42)

n = 200

df = pd.DataFrame({
    "Month": np.random.choice(["Nov","Dec","Jan"], size=n, p=[0.3, 0.35, 0.35]),
    "ProductAvailable": np.random.choice(["Yes","No"], size=n, p=[0.7, 0.3]),
    "ServiceRating": np.random.randint(1,6, size=n),   # 1–5 scale
    "Price": np.random.normal(100, 10, size=n).round(2),
    "TenureMonths": np.random.randint(1, 48, size=n)
})

# Simulate churn (more likely when product unavailable or rating low)
df["Churn"] = (
    (df["ProductAvailable"]=="No") |
    (df["ServiceRating"]<=2) |
    (df["Price"]>110)
).astype(int)

df.head()


Unnamed: 0,Month,ProductAvailable,ServiceRating,Price,TenureMonths,Churn
0,Dec,Yes,1,65.22,4,1
1,Jan,Yes,1,97.32,5,1
2,Jan,Yes,1,94.82,37,1
3,Dec,No,4,108.93,8,1
4,Nov,Yes,1,94.37,42,1


In [2]:
# get the rate of churn per month

df.groupby('Month')['Churn'].mean()

# from what is here, customer churn is high in january with ~72% and lowest in december with about ~58%
# so yes churn increased in january.

Month
Dec    0.588235
Jan    0.727273
Nov    0.666667
Name: Churn, dtype: float64

In [4]:
# check the rate of stock availability

df.groupby('ProductAvailable')['Churn'].mean()

# we can clearly see that the unavailability of stock inceases customer churn, 
# No product available is higher than Yes by almost 50%

ProductAvailable
No     1.000000
Yes    0.514286
Name: Churn, dtype: float64

In [5]:
# quality service affect customer churn?

df.groupby('ServiceRating')['Churn'].mean()

# from this we conclude that as service rating is reducing, customer churn is increasing
# so low service rating increases churn.

ServiceRating
1    1.000000
2    1.000000
3    0.437500
4    0.512821
5    0.403846
Name: Churn, dtype: float64

In [6]:
df

Unnamed: 0,Month,ProductAvailable,ServiceRating,Price,TenureMonths,Churn
0,Dec,Yes,1,65.22,4,1
1,Jan,Yes,1,97.32,5,1
2,Jan,Yes,1,94.82,37,1
3,Dec,No,4,108.93,8,1
4,Nov,Yes,1,94.37,42,1
...,...,...,...,...,...,...
195,Dec,No,4,102.13,23,1
196,Jan,No,1,108.68,37,1
197,Jan,Yes,1,90.63,11,1
198,Jan,No,2,91.24,6,1


In [8]:
# combine all the factors and check what happen per month

df.groupby('Month').agg({
    'Churn': 'mean',
    'ServiceRating': 'mean',
    'Price': 'mean',
    'ProductAvailable': lambda x : (x == 'No').mean()
})

# from the dataset, in january, price was pretty decent but the rating was low compared to the other months and availability was not that great
# hence customer churn in january was caused by low service rating.

Unnamed: 0_level_0,Churn,ServiceRating,Price,ProductAvailable
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Dec,0.588235,3.235294,100.662941,0.25
Jan,0.727273,2.727273,100.910152,0.30303
Nov,0.666667,3.348485,103.292727,0.348485


In [None]:
# final insight
# from my analysis, two factor has played a major part in customer churn and that is service rating and product availability
# price played a lesser role.
# when products are not available, customers are going to leave thereby increasing churn and when the service are low churn is definitely inceasing too

#Recommendations
# fix the problem of product unavailability, always take your inventry and restock when necessary
# up your services to leave a lasting memory on the customers hence they want to come back for more.
# after the 2 are properly carried out, then you might consider price reduction if it will not cost you.