<a href="https://colab.research.google.com/github/Tharun-Exe/NaanMudhalvan/blob/main/customer_churn_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
import warnings
warnings.filterwarnings("ignore")

In [None]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import LabelEncoder

In [None]:
pd.options.display.max_columns = None

In [None]:
data = pd.read_csv("/content/WA_Fn-UseC_-Telco-Customer-Churn.csv")

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.drop('customerID', axis = 1, inplace = True)

In [None]:
set(''.join(data['TotalCharges'].tolist()))

In [None]:
data['TotalCharges'] = data['TotalCharges'].replace(' ', np.nan)

In [None]:
data['TotalCharges'] = data['TotalCharges'].astype('float')

In [None]:
data.info()

In [None]:
data.columns = data.columns.str.lower()

In [None]:
data.columns

In [None]:
num_cols = ['tenure','monthlycharges', 'totalcharges']

In [None]:
data[num_cols].describe().T

In [None]:
data[[col for col in data.columns.difference(num_cols) if col != 'seniorcitizen']].describe().T

In [None]:
data['seniorcitizen'].value_counts()

In [None]:
ord_cols = ['dependents', 'gender', 'paperlessbilling', 'partner', 'phoneservice']
label = 'churn'
cat_cols = ['seniorcitizen', 'multiplelines', 'internetservice', 'onlinesecurity', 'onlinebackup', 'deviceprotection', 'techsupport', 'streamingtv', 'streamingmovies', 'contract','paymentmethod']

In [None]:
plt.figure(figsize = (15, 4))
for i, col in enumerate(num_cols):
    plt.subplot(1, 3, i+1)
    sns.histplot(data, x= col, color = 'red', alpha = 0.2, kde = True)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize = (15, 4))
for i, col in enumerate(num_cols):
    plt.subplot(1, 3, i+1)
    sns.rugplot(data, x = col, hue= label, height = 0.1)
    sns.boxplot(data, x = col, width = 0.3)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize = (15, 4))
for i, col in enumerate(num_cols):
    plt.subplot(1, 3, i+1)
    sns.boxplot(data, x = label, y = col, width = 0.4)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize = (15, 26))
for i, col in enumerate(data.columns.difference(num_cols)[1:]):
    plt.subplot(6, 3, i+1)
    ax = sns.countplot(data, x = col, hue = label)
    ax.bar_label(ax.containers[0])
    ax.bar_label(ax.containers[1])
    plt.xticks(rotation = 15)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize = (4,4))
ax = sns.countplot(data, x = label)
ax.bar_label(ax.containers[0])
plt.show()

In [None]:
def lencoder(col):
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    return data[col]

In [None]:
for col in ord_cols:
    data[col] = lencoder(col)

In [None]:
data['churn'] = lencoder('churn')

In [None]:
ohe_data = pd.get_dummies(data)

In [None]:
ohe_data[ohe_data.select_dtypes(include = 'bool').columns] = ohe_data[ohe_data.select_dtypes(include = 'bool').columns].astype('int')

In [None]:
data = ohe_data.copy()

In [None]:
imp_mean = IterativeImputer(random_state = 42)

In [None]:
data_impute = imp_mean.fit_transform(data)

In [None]:
data = pd.DataFrame(data_impute, columns = data.columns)

In [None]:
!pip -q install pycaret

In [None]:
!pip -q install --upgrade scipy

In [None]:
!pip -q install --upgrade yellowbrick

In [None]:
import pycaret

In [None]:
from pycaret.classification import *

In [None]:
s = setup(data, target = 'churn', session_id = 42, data_split_stratify=True)

In [None]:
best_model = compare_models(sort = 'AUC')

In [None]:
print(best_model)

In [None]:
plt.figure(figsize = (7, 4))
plot_model(best_model, plot = 'feature')

In [None]:
plt.figure(figsize = (7, 4))
plot_model(best_model, plot = 'auc')

In [None]:
plt.figure(figsize = (4,3))
plot_model(best_model, plot = 'confusion_matrix')

In [None]:
def calculate_profit(y, y_pred):
    tp = np.where((y_pred == 1) & (y == 1), 4000, 0)
    fp = np.where((y_pred == 1) & (y == 0), -1000, 0)
    return np.sum([tp,fp])

In [None]:
add_metric('profit', 'Profit', calculate_profit)

In [None]:
best_model = compare_models(sort = 'Profit')

In [None]:
plt.figure(figsize = (4,3))
plot_model(best_model, plot = 'confusion_matrix')

In [None]:
save_model(best_model, 'churn-predict')