# Credit Card Churn - Clustering

### Data Preparation

In [None]:
import os
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
working_directory = os.getcwd()

In [None]:
path = working_directory + '/data/BankChurners.csv'

In [None]:
bank_churn = pd.read_csv(path)

In [None]:
bank_churn.head()

In [None]:
bank_churn.columns

In [None]:
#dropping irrelevant columns + columns with categorical variables

dropcol = bank_churn.drop(columns=['CLIENTNUM', 'Attrition_Flag', 'Dependent_count', 'Avg_Open_To_Buy',
            'Education_Level', 'Marital_Status', 'Income_Category', 'Card_Category', 'Gender', 'Avg_Utilization_Ratio',
            'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1',
            'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2'], axis=1)

In [None]:
#standardization

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

dropcol1 = dropcol.copy(deep=True)

dropcol1[['Customer_Age', 'Months_on_book', 'Total_Relationship_Count', 'Months_Inactive_12_mon',
'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal',
'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt',
'Total_Trans_Ct', 'Total_Ct_Chng_Q4_Q1']] =\
scaler.fit_transform(dropcol1[['Customer_Age', 'Months_on_book', 'Total_Relationship_Count', 
'Months_Inactive_12_mon', 'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal',
'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt',
'Total_Trans_Ct', 'Total_Ct_Chng_Q4_Q1']])

### K-means clustering 

In [None]:
#Elbow method for K-means clustering

from sklearn.cluster import KMeans

In [None]:
sns.set_theme(style='darkgrid')

def optimal_k_means(data, max_k, random_state=1):
    means = []
    wcss = []
    
    for k in range(1, max_k):
        kmeans = KMeans(n_clusters=k)
        kmeans.fit(dropcol1)
        means.append(k)
        wcss.append(kmeans.inertia_)
        
    #generating elbow plot
    fig = plt.subplots (figsize=(12,6))
    plt.plot(means, wcss, 'o-')
    plt.xlabel('Number of clusters', fontsize = 11)
    plt.ylabel('Within Cluster Sum of Squares (WCSS)', fontsize = 11)
    plt.title('Elbow Method', fontsize = 14) 
    plt.show()

In [None]:
optimal_k_means(dropcol1, 10)

In [None]:
kmeans = KMeans(n_clusters = 3, random_state=1)

In [None]:
#fitting kmeans object to our data

clusters = kmeans.fit_predict(dropcol1.iloc[:,1:])

In [None]:
#adding "Cluster" and binary "Attrition_Flag" to standardized dataset

dropcol1["Cluster"] = clusters
churn_col = bank_churn.iloc[:, 1]
dropcol_churn = pd.concat([dropcol1, churn_col], axis=1)
dropcol_churn['Attrition_Flag'] = dropcol_churn['Attrition_Flag'].apply(lambda x: 1 if x == 'Attrited Customer' else 0)
dropcol_churn

In [None]:
dropcol_churn.columns

In [None]:
dropcol_churn.groupby(['Cluster', 'Attrition_Flag']).size().unstack(fill_value=0)

### Variables by Cluster - Visualizations

In [None]:
#Customer_Age by Cluster
cluster_bar = dropcol_churn.Cluster
Customer_Age = dropcol_churn.Customer_Age
sns.barplot(x=cluster_bar, y=Customer_Age, color='#FF9F66')
plt.title('Customer age by Cluster')
plt.xlabel('Cluster')
plt.ylabel('Age')

In [None]:
#Months_on_book by Cluster
cluster_bar = dropcol_churn.Cluster
Months_on_book = dropcol_churn.Months_on_book
sns.barplot(x=cluster_bar, y=Months_on_book, color='#FF9F66')
plt.title('Period of relationship with bank by Cluster')
plt.xlabel('Cluster')
plt.ylabel('Months')

In [None]:
#Total_Relationship_Count by cluster
cluster_bar = dropcol_churn.Cluster
Total_Relationship_Count = dropcol_churn.Total_Relationship_Count
sns.barplot(x=cluster_bar, y=Total_Relationship_Count, color='#FF9F66')
plt.title('Number of products held by Cluster')
plt.xlabel('Cluster')
plt.ylabel('Products')

In [None]:
#Months_Inactive_12_mon by cluster
cluster_bar = dropcol_churn.Cluster
Months_Inactive_12_mon = dropcol_churn.Months_Inactive_12_mon
sns.barplot(x=cluster_bar, y=Months_Inactive_12_mon, color='#FF9F66')
plt.title('Months inactive in the last 12 months by Cluster')
plt.xlabel('Cluster')
plt.ylabel('Months')

In [None]:
#Contacts_Count_12_mon by cluster
cluster_bar = dropcol_churn.Cluster
Contacts_Count_12_mon = dropcol_churn.Contacts_Count_12_mon
sns.barplot(x=cluster_bar, y=Contacts_Count_12_mon, color='#FF9F66')
plt.title('Contacts in the last 12 months by Cluster')
plt.xlabel('Cluster')
plt.ylabel('Contacts')

In [None]:
#Credit_Limit by cluster
cluster_bar = dropcol_churn.Cluster
Credit_Limit = dropcol_churn.Credit_Limit
sns.barplot(x=cluster_bar, y=Credit_Limit, color='#FF9F66')
plt.title('Credit Limit by Cluster')
plt.xlabel('Cluster')
plt.ylabel('Limit')

In [None]:
#Total_Revolving_Bal by cluster
cluster_bar = dropcol_churn.Cluster
Total_Revolving_Bal = dropcol_churn.Total_Revolving_Bal
sns.barplot(x=cluster_bar, y=Total_Revolving_Bal, color='#FF9F66')
plt.title('Total Revolving Balance by Cluster')
plt.xlabel('Cluster')
plt.ylabel('Balance')

In [None]:
#Total_Amt_Chng_Q4_Q1 by cluster
cluster_bar = dropcol_churn.Cluster
Total_Amt_Chng_Q4_Q1 = dropcol_churn.Total_Amt_Chng_Q4_Q1
sns.barplot(x=cluster_bar, y=Total_Amt_Chng_Q4_Q1, color='#FF9F66')
plt.title('Change in Transaction Amount Q4 over Q1 by Cluster')
plt.xlabel('Cluster')
plt.ylabel('Change')

In [None]:
#Total_Trans_Amt by cluster
cluster_bar = dropcol_churn.Cluster
Total_Trans_Amt = dropcol_churn.Total_Trans_Amt
sns.barplot(x=cluster_bar, y=Total_Trans_Amt, color='#FF9F66')
plt.title('Total Transaction Amount in the last 12 months by Cluster')
plt.xlabel('Cluster')
plt.ylabel('Amount')

In [None]:
#Total_Trans_Ct by cluster
cluster_bar = dropcol_churn.Cluster
Total_Trans_Ct = dropcol_churn.Total_Trans_Ct
sns.barplot(x=cluster_bar, y=Total_Trans_Ct, color='#FF9F66')
plt.title('Total Transaction Count in the last 12 months by Cluster')
plt.xlabel('Cluster')
plt.ylabel('Count')

In [None]:
#Total_Ct_Chng_Q4_Q1 by cluster
cluster_bar = dropcol_churn.Cluster
Total_Ct_Chng_Q4_Q1 = dropcol_churn.Total_Ct_Chng_Q4_Q1
sns.barplot(x=cluster_bar, y=Total_Ct_Chng_Q4_Q1, color='#FF9F66')
plt.title('Change in Transaction Count Q4 over Q1 by Cluster')
plt.xlabel('Cluster')
plt.ylabel('Change')