In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv("Customer Churn.csv")
df

## DATA CLEANING,TRANSFORMATION & EDA

In [None]:
df.columns

In [None]:
df.columns=df.columns.str.lower() #column names standardizing

In [None]:
df.nunique()

In [None]:
df['seniorcitizen']=df['seniorcitizen'].apply(lambda x: "No" if x==0 else "Yes")
# Transforming seniorcitizen column to be understandable by anyone

In [None]:
df.info()

In [None]:
df.sample(5)

### Replacing Blanks with 0 in TotalCharges as Tenure is 0 and no total charges are needed, also converting its data type to Float.

In [None]:
df['totalcharges']=df['totalcharges'].replace(" ",0).astype("f")

### Detecting Missing Values & Duplications

In [None]:
df.isna().sum()

In [None]:
df.describe()

In [None]:
df.duplicated().sum()

In [None]:
df['customerid'].duplicated().sum()

## ANALYSIS

### Percent of Customers that Churned out

In [None]:
plt.figure(figsize=(5,4))
ax=sns.countplot(data=df,x='churn')
ax.bar_label(ax.containers[0])
plt.title("Count of Customer by Churn")
plt.show()

In [None]:
plt.figure(figsize=(3,4))
gb=df.groupby("churn").agg({'churn':'count'})
plt.pie(data=gb,x='churn',labels=gb.index,autopct="%1.2f%%",colors=['blue','orange'])
plt.title("Percentage of Churned Customers")
plt.show()

#### It can be concluded that 26.54% of the total customers have churned out.

### Gender & Senior Citizen Status Wise Customer Churn Analysis

In [None]:
plt.figure(figsize=(4,4))
ax=sns.countplot(data=df,x='gender',hue='churn',palette='bright',stat='percent')
ax.bar_label(ax.containers[0],fmt="%1.2f%%")
ax.bar_label(ax.containers[1],fmt="%1.2f%%")
plt.title("Customer Churn by Gender")
plt.show()

In [None]:
plt.figure(figsize=(5,6))
ax=sns.countplot(data=df,x='seniorcitizen',hue='churn',
              palette='bright',stat='percent',dodge='fill')
ax.bar_label(ax.containers[0],fmt='%1.2f%%')
ax.bar_label(ax.containers[1],fmt='%1.2f%%')
plt.title("Percentage Distribution of Customer Churn by Senior Citizen Status")
plt.show()

#### Comparing we can conclude that there are more churnings by Senior Citizens and the churning rate of male and female are approximately equal.

### Churn Analysis based on Tenure

In [None]:
plt.figure(figsize=(12,6))
sns.histplot(data=df,x='tenure',hue='churn',bins=50,
             multiple='stack',element='poly')
plt.title("Tenure wise Churn Analysis (Stacked) ")

#### From this analysis, we can conclude that people who have used our services for a long time have stayed whereas the ones who used it a for shorter time have churned

### Churning Rate by Contract Type

In [None]:
# Temporary Modification of Column
dfcopy=df.copy()
dfcopy['churn']=dfcopy['churn'].apply(lambda x : True if x=='Yes' else False)

In [None]:
#Calculation of churning rate
total=dfcopy.groupby('contract')['churn'].count()
churned=dfcopy.groupby('contract')['churn'].sum()
rate=churned*100/total
rate

In [None]:
ax=sns.countplot(data=df,x='contract',hue='churn')
ax.bar_label(ax.containers[0])
ax.bar_label(ax.containers[1])
plt.title("Customer Churn by Contract type")

In [None]:
ax=sns.barplot(data=rate)
ax.bar_label(ax.containers[0])
plt.ylabel("churning rate (per 100 customers)")
plt.title("Customer Churning Rate by Contract Type")

#### We can see that by this analysis that people with Month to Month contract are more likely to churn than the customers with One Year or Two Year Type Contract

### Customer Churn Analysis Across Multiple Categorical Variables

In [None]:
cat_cols = [
     'partner', 'dependents',
    'phoneservice', 'multiplelines', 'internetservice',
    'onlinesecurity', 'onlinebackup', 'deviceprotection',
    'techsupport', 'streamingtv', 'streamingmovies',
     'paperlessbilling'
]


In [None]:
rows = (len(cat_cols) + 3) // 4  
plt.figure(figsize=(20, rows * 4 ))
for i, col in enumerate(cat_cols, 1):
    plt.subplot(rows, 4, i)
    sns.countplot(data=df, x=col,hue='churn')
    plt.title(f"Count Plot of {col. capitalize()}")
plt.tight_layout()
plt.show()


#### It can be concluded that :
* ##### Customer churn is highest among month-to-month contract customers, electronic check users, and customers without security or support services.
* ##### Customers with long-term contracts, partners/dependents, and auto-payment methods show significantly lower churn.
* ##### Overall, service commitment and value-added features play a key role in customer retention.

### Payment Method Type Wise Customer Churn Analysis

In [None]:
ax=sns.countplot(data=df,x='paymentmethod',hue='churn')
ax.bar_label(ax.containers[0])
ax.bar_label(ax.containers[1])
plt.xticks(rotation=20)
plt.title("Customer Churn by Contract type")

#### We can conclude that customers using Electronic Check show the highest churn, while those using automatic payment methods (Bank Transfer and Credit Card) have the lowest churn. 
#### This indicates that auto-payment customers are more stable, whereas manual payment methods are linked to higher churn risk.