In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [None]:
df = pd.read_csv("Customer Churn.csv")

In [None]:
df["TotalCharges"] = df["TotalCharges"].replace(" ", "0")
df["TotalCharges"] = df["TotalCharges"].astype("float")

# Check missing values and dataset info
print("Total Missing Values:", df.isnull().sum().sum())
print("Shape of dataset:", df.shape)
print(df.info())
print(df.describe())

In [None]:
def conv(value):
    if value == 1:
        return "yes"
    else:
        return "no"

df['SenionCitizen'] = df["SeniorCitizen"].apply(conv)

# Preview dataset
print(df.head())

In [None]:
co_mtx = df.corr(numeric_only=True)
print("\nCorrelation Matrix:\n", co_mtx)

# Plot correlation heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(co_mtx, cmap="YlGnBu", annot=True)
plt.title("Correlation Heatmap")
plt.show()

In [None]:
plt.figure(figsize=(9, 4))
sns.histplot(x="tenure", data=df, bins=72, hue="Churn")
plt.title("Tenure Distribution by Churn")
plt.show()

In [None]:
plt.figure(figsize=(6, 4))
sns.scatterplot(x="tenure", y="MonthlyCharges", data=df, hue="Churn")
plt.title("Tenure vs Monthly Charges by Churn")
plt.show()

In [None]:
numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns
corr_matrix = df[numerical_cols].corr()

plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="coolwarm")
plt.title("Correlation Matrix Heatmap (Numerical Features)")
plt.show()

In [None]:
X = df[numerical_cols]
vif_data = pd.DataFrame()
vif_data['Feature'] = X.columns
vif_data['VIF'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
print("\nVariance Inflation Factor:\n", vif_data)

In [None]:
plt.figure(figsize=(3, 3))
ax = sns.countplot(x='SeniorCitizen', data=df)
ax.bar_label(ax.containers[0])
plt.title("Count of Customers by Senior Citizen")
plt.show()

In [None]:
plt.figure(figsize=(4, 4))
ax = sns.countplot(x="Contract", data=df, hue="Churn")
for container in ax.containers:
    ax.bar_label(container)
plt.title("Count of Customers by Contract and Churn")
plt.show()

In [None]:
plt.figure(figsize=(3, 4))
gb = df.groupby("Churn").agg({'Churn': "count"})
plt.pie(gb['Churn'], labels=gb.index, autopct="%1.2f%%")
plt.title("Percentage of Churned Customers", fontsize=10)
plt.show()