In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot
import matplotlib.pyplot as plt
import seaborn as se

class churn_code:
    def dataset():
        data_set = pd.read_csv("/home/happy/Downloads/churn-bigml-80.csv")
        # Identify all columns containing 'yes' or 'no' values
        yes_no_cols = data_set.columns[data_set.isin(['Yes', 'No']).any()]

        # Replace 'yes' with 1 and 'no' with 0 in those columns
        data_set[yes_no_cols] = data_set[yes_no_cols].replace({'Yes': 1, 'No': 0})
        
        # Step 1: Normalize all string values to lowercase (to handle case-insensitive variants)
        data_set = data_set.applymap(lambda x: str(x).lower() if isinstance(x, str) else x)

        # Step 2: Replace 'yes', 'true' with 1 and 'no', 'false' with 0
        data_set = data_set.replace({
            'yes': 1, 'no': 0,
            'true': 1, 'false': 0
        })

        # TAKE OVERVIEW OF DATASET
        print(data_set.describe())
        print(data_set.head(10))
        print(data_set.columns)

        # Univariate Analysis
        se.histplot(data=data_set, x='Total night charge', y='Total intl charge', hue='Churn')
        se.boxplot(data=data_set, x='Total night charge', y="Total intl charge", hue="Churn")
        se.violinplot(data=data_set, x='Total night charge', y="Total intl charge", hue="Churn")
        
        # Bivariate Analysis (Churn vs Features)
        se.violinplot(data=data_set, x='Total intl charge', y='Total night charge')
        se.boxplot(data=data_set, x='Churn', y='International plan')
        se.barplot(data=data_set, x='Churn', y='Voice mail plan', hue='International plan')

        # Correlation Matrix
        cols = ['Total intl charge', 'Total intl calls', 'Total intl minutes', 'Total night charge']
        co_relation = data_set[cols].corr()
        se.heatmap(co_relation, annot=True, cmap='coolwarm')
        pyplot.show()

        # SEGMENTED REPORTING
        print(data_set.groupby('Churn')[['Total night minutes', 'Total intl charge', 'Voice mail plan']].agg(['mean', 'median']))

        # VISUALIZATION DASHBOARD
        g = se.FacetGrid(data=data_set, col='International plan')
        g.map_dataframe(se.scatterplot, x='Voice mail plan', y='Number vmail messages', hue='Total day calls')
        g.add_legend()

        # SAVE VISUAL REPORT INTO PNG IMAGE AND PDF
        plt.savefig("monthly_charges_vs_churn.png", dpi=300)
        plt.savefig("monthly_charges_vs_churn.pdf")
        plt.close() 

       

if __name__=='__main__':
    cls = churn_code
    cls.dataset()

  data_set[yes_no_cols] = data_set[yes_no_cols].replace({'Yes': 1, 'No': 0})
  data_set = data_set.applymap(lambda x: str(x).lower() if isinstance(x, str) else x)
