# Credit Card Retention Analysis

### Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.graph_objs as go
from plotly.offline import iplot
sns.set()
pd.options.display.max_columns = 999

In [2]:
data = pd.read_csv('../data/BankChurners_v2.csv')

In [3]:
data = data[['CLIENTNUM', 'Attrition_Flag', 'Customer_Age', 'Gender',
       'Dependent_count', 'Education_Level', 'Marital_Status',
       'Income_Category', 'Card_Category', 'Months_on_book',
       'Total_Relationship_Count', 'Months_Inactive_12_mon',
       'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal',
       'Avg_Open_To_Buy', 'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt',
       'Total_Trans_Ct', 'Total_Ct_Chng_Q4_Q1', 'Avg_Utilization_Ratio',]]

In [4]:
data['Education_Level'] = data['Education_Level'].fillna('Unknown')
data['Marital_Status'] = data['Marital_Status'].fillna('Unknown')
data['Income_Category'] = data['Income_Category'].fillna('Unknown')

***

## Data Transformation: Binning

To bin our ages, we will need a couple data points: the min and max ages. Let's find those.

In [6]:
print(data['Customer_Age'].min())
print(data['Customer_Age'].max())

26
73


For more information, see the documentation [here](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.cut.html). 

In [7]:
bins = [25, 30, 40, 50, 60, 70, 80]
labels = ['20s', '30s', '40s', '50s', '60s', '70s']
data['Customer_Age_bins'] = pd.cut(data['Customer_Age'], bins=bins, labels=labels, include_lowest=True, right=False)

In [9]:
data[data['Customer_Age']==31]

Unnamed: 0,CLIENTNUM,Attrition_Flag,Customer_Age,Gender,Dependent_count,Education_Level,Marital_Status,Income_Category,Card_Category,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,Contacts_Count_12_mon,Credit_Limit,Total_Revolving_Bal,Avg_Open_To_Buy,Total_Amt_Chng_Q4_Q1,Total_Trans_Amt,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio,Customer_Age_bins
602,90634,Existing Customer,31,M,0,Uneducated,Married,$40K - $60K,Blue,22,3,3,0,4048.0,1113,2935.0,0.717,1757,45,0.607,0.275,30s
808,90840,Existing Customer,31,M,0,Graduate,Married,$40K - $60K,Blue,21,3,3,0,2158.0,1729,429.0,1.100,1718,42,0.448,0.801,30s
832,90864,Existing Customer,31,F,0,Graduate,Married,Less than $40K,Blue,22,3,3,2,2917.0,2127,790.0,1.074,1831,36,1.000,0.729,30s
866,90898,Existing Customer,31,M,1,Unknown,Married,Less than $40K,Blue,36,3,3,3,1868.0,1515,353.0,1.359,2229,35,0.667,0.811,30s
1011,91043,Existing Customer,31,M,2,Uneducated,Married,Less than $40K,Blue,18,3,2,4,1438.3,0,1438.3,1.561,2789,50,1.083,0.000,30s
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9935,99967,Existing Customer,31,M,1,College,Single,$40K - $60K,Blue,36,3,1,2,11109.0,2045,9064.0,0.625,13988,92,0.840,0.184,30s
9982,100014,Existing Customer,31,F,1,Unknown,Single,Less than $40K,Blue,18,3,1,3,3399.0,1800,1599.0,0.752,13148,121,0.806,0.530,30s
10052,100084,Attrited Customer,31,M,1,Post-Graduate,Single,$40K - $60K,Blue,24,4,2,4,4169.0,534,3635.0,0.836,8565,70,0.795,0.128,30s
10069,100101,Existing Customer,31,M,2,Graduate,Single,Less than $40K,Blue,14,4,3,3,6933.0,1870,5063.0,0.656,15585,115,0.917,0.270,30s
