In [None]:
# Plotting bar charts for categorical columns
categorical_cols = ['gender', 'partner', 'dependents', 'phone_service', 'multiple_lines', 
                    'internet_service', 'online_security', 'online_backup', 'device_protection', 
                    'tech_support', 'streaming_t_v', 'streaming_movies', 'contract', 
                    'paperless_billing', 'payment_method', 'churn']

plt.figure(figsize=(15, 25))
for i, col in enumerate(categorical_cols, 1):
    plt.subplot(6, 3, i)
    data[col].value_counts().plot(kind='bar', color='skyblue', edgecolor='black')
    plt.title(f'Distribution of {col.capitalize()}')
    plt.ylabel('Count')
    plt.xlabel(col.capitalize())
plt.tight_layout()
plt.show()


In [None]:
# Analyzing the relationship between churn and key features
plt.figure(figsize=(15, 10))

# Tenure vs Churn
plt.subplot(2, 2, 1)
data.groupby('churn')['tenure'].mean().plot(kind='bar', color='skyblue', edgecolor='black')
plt.title('Average Tenure by Churn')
plt.ylabel('Average Tenure')

# Monthly Charges vs Churn
plt.subplot(2, 2, 2)
data.groupby('churn')['monthly_charges'].mean().plot(kind='bar', color='lightgreen', edgecolor='black')
plt.title('Average Monthly Charges by Churn')
plt.ylabel('Average Monthly Charges')

# Total Charges vs Churn
plt.subplot(2, 2, 3)
data.groupby('churn')['total_charges'].mean().plot(kind='bar', color='lightcoral', edgecolor='black')
plt.title('Average Total Charges by Churn')
plt.ylabel('Average Total Charges')

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats

# Function to calculate Cramér's V for categorical-categorical association
def cramers_v(confusion_matrix):
    chi2 = stats.chi2_contingency(confusion_matrix)[0]
    n = confusion_matrix.sum()
    r, k = confusion_matrix.shape
    return np.sqrt(chi2 / (n * (min(k - 1, r - 1))))

# Calculate Cramér's V for churn against all categorical features
categorical_cols = ['gender', 'partner', 'dependents', 'phone_service', 'multiple_lines', 'internet_service',
                    'online_security', 'online_backup', 'device_protection', 'tech_support', 'streaming_t_v',
                    'streaming_movies', 'contract', 'paperless_billing', 'payment_method']

for col in categorical_cols:
    confusion_matrix = pd.crosstab(data[col], data['churn'])
    print(f'Cramér\'s V between churn and {col}: {cramers_v(confusion_matrix)}')


In [None]:
# Chi-Square test for independence
for col in categorical_cols:
    confusion_matrix = pd.crosstab(data[col], data['churn'])
    chi2, p, dof, ex = stats.chi2_contingency(confusion_matrix)
    print(f'Chi-Square Test between churn and {col}: p-value = {p}')


In [None]:
# Stacked bar charts for categorical variables vs churn
plt.figure(figsize=(15, 25))

for i, col in enumerate(categorical_cols, 1):
    plt.subplot(6, 3, i)
    churn_counts = pd.crosstab(data[col], data['churn'], normalize='index')
    churn_counts.plot(kind='bar', stacked=True, ax=plt.gca(), color=['lightgreen', 'salmon'], edgecolor='black')
    plt.title(f'{col.capitalize()} vs Churn')
    plt.ylabel('Proportion')
    plt.xlabel(col.capitalize())

plt.tight_layout()
plt.show()


In [None]:
# Plotting bar charts for categorical columns with churn proportions
categorical_cols = ['gender', 'partner', 'dependents', 'phone_service', 'multiple_lines', 'internet_service',
                    'online_security', 'online_backup', 'device_protection', 'tech_support', 'streaming_t_v',
                    'streaming_movies', 'contract', 'paperless_billing', 'payment_method']

plt.figure(figsize=(15, 25))
for i, col in enumerate(categorical_cols, 1):
    plt.subplot(6, 3, i)
    churn_counts = pd.crosstab(data[col], data['churn'], normalize='index')
    churn_counts.plot(kind='bar', stacked=True, ax=plt.gca(), color=['lightgreen', 'salmon'], edgecolor='black')
    plt.title(f'{col.capitalize()} vs Churn')
    plt.ylabel('Proportion')
    plt.xlabel(col.capitalize())

plt.tight_layout()
plt.show()


In [None]:
# Plotting churn rate across categorical features
plt.figure(figsize=(15, 25))

for i, col in enumerate(categorical_cols, 1):
    plt.subplot(6, 3, i)
    churn_rate = data.groupby(col)['churn'].value_counts(normalize=True).unstack()['Yes']
    churn_rate.plot(kind='bar', color='salmon', edgecolor='black')
    plt.title(f'Churn Rate in {col.capitalize()}')
    plt.ylabel('Churn Rate')
    plt.xlabel(col.capitalize())

plt.tight_layout()
plt.show()
