In [84]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('Customer Churn.csv')
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df['TotalCharges'] = df['TotalCharges'].replace(" ",0)
df["TotalCharges"] = df['TotalCharges'].astype(float)
df.info()

In [None]:
df.isna().sum().sum()

In [None]:
def check(value):
    if value == 1:
        return "Yes"
    else:
        return "No"
df['SeniorCitizen'] = df['SeniorCitizen'].apply(func=check)   
df.head(30)

In [None]:
df.info()

In [None]:
df[df.duplicated()]

In [None]:
ax = sns.countplot(x = 'Churn', data=df)
ax.bar_label(ax.containers[0])
plt.title("Count of Customer by Churn",fontsize=20)
plt.show()

In [None]:
plt.figure(figsize=(3,4))
gb = df.groupby("Churn").agg({'Churn':'count'})
plt.pie(gb['Churn'],autopct="%.2f%%",labels=['No','Yes'])
plt.title("Percentage of Churned Customer",fontsize=10)
plt.show()

#from the given pie chart we can conclude that 26.54% of our customers have churned out. 
#not let's explore the reason behind it

In [None]:
plt.figure(figsize=(4,4))
ax = sns.countplot(x='gender', data=df, hue='Churn')
ax.bar_label(ax.containers[0])
plt.title('Churn By Gender',fontsize=20)
plt.show()

In [None]:
plt.figure(figsize=(4,4))
ax = sns.countplot(x = 'SeniorCitizen', data=df)
ax.bar_label(ax.containers[0])
plt.title("Count of Customers by Senior Citizen",fontsize=15)
plt.show()

In [None]:
total_counts = df.groupby('SeniorCitizen')['Churn'].value_counts(normalize=True).unstack() * 100

# Plot
fig, ax = plt.subplots(figsize=(4, 4))  # Adjust figsize for better visualization

# Plot the bars
total_counts.plot(kind='bar', stacked=True, ax=ax, color=['#1f77b4', '#ff7f0e'])  # Customize colors if desired

# Add percentage labels on the bars
for p in ax.patches:
    width, height = p.get_width(), p.get_height()
    x, y = p.get_xy()
    ax.text(x + width / 2, y + height / 2, f'{height:.1f}%', ha='center', va='center')

plt.title('Churn by Senior Citizen (Stacked Bar Chart)')
plt.xlabel('SeniorCitizen')
plt.ylabel('Percentage (%)')
plt.xticks(rotation=0)
plt.legend(title='Churn', bbox_to_anchor = (0.9,0.9))  # Customize legend location

plt.show()

#comparative a greater pecentage of people in senior citizen category have churned

In [None]:
plt.figure(figsize=(9,4)) 
ax = sns.histplot(x = "tenure", data=df, hue='Churn', bins=72)
# ax.bar_label(ax.containers[0])
plt.show()

#people who have used our services for a long time have stayed and people who have used our sevices 
#1 or 2 months  have churned

In [None]:
plt.figure(figsize=(4,4))
ax = sns.countplot(x = 'Contract', data = df)
ax.bar_label(ax.containers[0])
plt.title("Count of Customer by Contract")
plt.show()

In [None]:
columns = ['PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 
           'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies']

# Number of columns for the subplot grid (you can change this)
n_cols = 3
n_rows = (len(columns) + n_cols - 1) // n_cols  # Calculate number of rows needed

# Create subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, n_rows * 4))  # Adjust figsize as needed

# Flatten the axes array for easy iteration (handles both 1D and 2D arrays)
axes = axes.flatten()

# Iterate over columns and plot count plots
for i, col in enumerate(columns):
    sns.countplot(x=col, data=df, ax=axes[i], hue = df["Churn"])
    axes[i].set_title(f'Count Plot of {col}')
    axes[i].set_xlabel(col)
    axes[i].set_ylabel('Count')

# Remove empty subplots (if any)
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()

#The majority of customers who do not churn tend to have services like PhoneService, InternetService (particularly DSL), and OnlineSecurity enabled. For services like OnlineBackup, TechSupport, and StreamingTV, churn rates are noticeably higher when these services are not used or are unavailable. 

In [None]:
plt.figure(figsize=(6,4))
ax = sns.countplot(x = 'PaymentMethod',data=df, hue='Churn')
ax.bar_label(ax.containers[0])
ax.bar_label(ax.containers[1])
plt.title("Customer Churned by Payment Method")
plt.xticks(rotation=45)
plt.show()

#customer is likely to churn when he is using electronic check as a payment method. 