In [1]:
import numpy as np
import pandas as pd

from patsy import dmatrices, dmatrix
import re
import pickle
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
import sklearn.metrics as metrics
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.datasets import make_classification
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import LinearSVC, SVC
from xgboost import XGBClassifier
from collections import Counter
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn import linear_model, svm
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier

import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.plotting import plot_decision_regions
%matplotlib inline

# make prettier plots
%config InlineBackend.figure_format = 'svg' 

seed = 5
np.random.seed(seed)

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
file = open('cleaned_cc_default_data', 'rb')
model_data = pickle.load(file)

In [3]:
model_data.head()

Unnamed: 0,default_payment_next_month,age,education_level,bill_amt_1,bill_amt_2,bill_amt_3,bill_amt_4,bill_amt_5,bill_amt_6,pay_0,pay_2,pay_3,pay_4,pay_5,pay_6,pay_amt_1,pay_amt_2,pay_amt_3,pay_amt_4,pay_amt_5,pay_amt_6,limit_balance,sex,others,single
0,0,39,3.0,47174,47974,48630,50803,30789,15874,0,0,0,0,0,0,1800,2000,3000,2000,2000,2000,50000,1,0,1
1,0,29,3.0,48088,45980,44231,32489,26354,20221,0,0,0,0,0,0,2000,2010,3000,3000,3000,1000,110000,0,0,1
2,0,36,3.0,78630,68921,46512,40335,37165,22156,0,0,0,2,0,0,10076,4018,14,2051,2000,0,270000,1,0,1
3,0,45,3.0,58180,59134,61156,62377,63832,65099,0,0,0,0,0,0,2886,2908,2129,2354,2366,2291,130000,1,0,0
4,0,24,3.0,42058,35340,22110,19837,19855,20151,0,0,0,0,0,0,1367,1606,692,709,721,692,50000,1,0,1


In [4]:
model_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2963 entries, 0 to 2964
Data columns (total 25 columns):
default_payment_next_month    2963 non-null int64
age                           2963 non-null int64
education_level               2963 non-null float64
bill_amt_1                    2963 non-null int64
bill_amt_2                    2963 non-null int64
bill_amt_3                    2963 non-null int64
bill_amt_4                    2963 non-null int64
bill_amt_5                    2963 non-null int64
bill_amt_6                    2963 non-null int64
pay_0                         2963 non-null int64
pay_2                         2963 non-null int64
pay_3                         2963 non-null int64
pay_4                         2963 non-null int64
pay_5                         2963 non-null int64
pay_6                         2963 non-null int64
pay_amt_1                     2963 non-null int64
pay_amt_2                     2963 non-null int64
pay_amt_3                     2963 non-nu

In [5]:
model_data.shape

(2963, 25)

# Splitting data into train/test & scaling

In [6]:
# Stratified train test split
x_raw = model_data.iloc[:, 1:]
y_raw = np.array(model_data['default_payment_next_month'])

sss1 = StratifiedShuffleSplit(n_splits=2, test_size=0.2, random_state=0)

sss1.get_n_splits()
for train_index, test_index in sss1.split(x_raw, y_raw):
    x_train, x_test = x_raw.loc[train_index,:], x_raw.loc[test_index,:]
    y_train, y_test = y_raw[train_index], y_raw[test_index]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


In [7]:
x_train.shape

(2370, 24)

In [8]:
y_train.shape

(2370,)

In [9]:
x_test.shape

(593, 24)

In [10]:
y_test.shape

(593,)

In [11]:
# setting a scaler

scaler = StandardScaler()

In [12]:
type(x_train)

pandas.core.frame.DataFrame

In [13]:
x_train.columns

Index(['age', 'education_level', 'bill_amt_1', 'bill_amt_2', 'bill_amt_3',
       'bill_amt_4', 'bill_amt_5', 'bill_amt_6', 'pay_0', 'pay_2', 'pay_3',
       'pay_4', 'pay_5', 'pay_6', 'pay_amt_1', 'pay_amt_2', 'pay_amt_3',
       'pay_amt_4', 'pay_amt_5', 'pay_amt_6', 'limit_balance', 'sex', 'others',
       'single'],
      dtype='object')

In [14]:
# splitting train data into features that will be scaled
# by dropping categorical features

X_for_scaling = x_train.drop(columns=['education_level', 
        'pay_0', 'pay_2', 'pay_3','pay_4', 'pay_5', 'pay_6', 
        'sex', 'others', 'single'])

In [15]:
# scale features that need to be scaled

X_train_scaled = scaler.fit_transform(X_for_scaling)

In [16]:
# convert those features to data frame

X_train_scaled = pd.DataFrame(X_train_scaled, columns=['age', 
       'bill_amt_1', 'bill_amt_2', 'bill_amt_3','bill_amt_4', 'bill_amt_5', 'bill_amt_6', 
       'pay_amt_1', 'pay_amt_2', 'pay_amt_3','pay_amt_4', 'pay_amt_5', 'pay_amt_6', 
       'limit_balance'])

In [17]:
X_train_scaled.head()


Unnamed: 0,age,bill_amt_1,bill_amt_2,bill_amt_3,bill_amt_4,bill_amt_5,bill_amt_6,pay_amt_1,pay_amt_2,pay_amt_3,pay_amt_4,pay_amt_5,pay_amt_6,limit_balance
0,0.098135,-0.718157,-0.712797,-0.656081,-0.66203,-0.676581,-0.44901,-0.295507,-0.084792,-0.140569,-0.215248,0.570493,-0.248511,-0.019936
1,-0.885615,-0.045978,-0.046877,-0.134033,-0.063341,-0.001423,0.066774,-0.218995,-0.137506,-0.239268,-0.211688,-0.087358,-0.281494,-0.906082
2,1.300495,-0.028908,-0.032205,0.022519,0.10279,-0.656869,-0.702595,-0.295507,-0.122803,-0.198,-0.328832,-0.29446,2.123603,-0.906082
3,-0.776309,3.168099,3.12514,3.332646,3.805802,3.916929,1.252276,-0.295281,0.117521,0.333052,0.173641,-0.292388,10.970793,0.94677
4,0.972579,0.179921,-0.213537,-0.207158,0.790165,0.447903,0.392444,-0.069978,-0.104321,4.464748,-0.153223,-0.111723,-0.14956,-0.100494


In [18]:
# merge scaled features with non-scaled features

# X_train = pd.merge(X_train_scaled, x_train.drop(columns=['age',  
#         'bill_amt_1', 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6', 
#         'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5', 'pay_amt_6', 
#          'limit_balance']).reset_index(drop=True), left_index=True, right_index=True)

X_train = X_train_scaled.join(x_train.drop(columns=['age', 
       'bill_amt_1', 'bill_amt_2', 'bill_amt_3','bill_amt_4', 'bill_amt_5', 'bill_amt_6', 
       'pay_amt_1', 'pay_amt_2', 'pay_amt_3','pay_amt_4', 'pay_amt_5', 'pay_amt_6', 
       'limit_balance']).reset_index(drop=True))

In [19]:
X_train.head()

Unnamed: 0,age,bill_amt_1,bill_amt_2,bill_amt_3,bill_amt_4,bill_amt_5,bill_amt_6,pay_amt_1,pay_amt_2,pay_amt_3,pay_amt_4,pay_amt_5,pay_amt_6,limit_balance,education_level,pay_0,pay_2,pay_3,pay_4,pay_5,pay_6,sex,others,single
0,0.098135,-0.718157,-0.712797,-0.656081,-0.66203,-0.676581,-0.44901,-0.295507,-0.084792,-0.140569,-0.215248,0.570493,-0.248511,-0.019936,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-0.885615,-0.045978,-0.046877,-0.134033,-0.063341,-0.001423,0.066774,-0.218995,-0.137506,-0.239268,-0.211688,-0.087358,-0.281494,-0.906082,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
2,1.300495,-0.028908,-0.032205,0.022519,0.10279,-0.656869,-0.702595,-0.295507,-0.122803,-0.198,-0.328832,-0.29446,2.123603,-0.906082,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-0.776309,3.168099,3.12514,3.332646,3.805802,3.916929,1.252276,-0.295281,0.117521,0.333052,0.173641,-0.292388,10.970793,0.94677,3.0,1.0,2.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0
4,0.972579,0.179921,-0.213537,-0.207158,0.790165,0.447903,0.392444,-0.069978,-0.104321,4.464748,-0.153223,-0.111723,-0.14956,-0.100494,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0


In [20]:
X_train.shape

(2370, 24)

In [21]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2370 entries, 0 to 2369
Data columns (total 24 columns):
age                2368 non-null float64
bill_amt_1         2368 non-null float64
bill_amt_2         2368 non-null float64
bill_amt_3         2368 non-null float64
bill_amt_4         2368 non-null float64
bill_amt_5         2368 non-null float64
bill_amt_6         2368 non-null float64
pay_amt_1          2368 non-null float64
pay_amt_2          2368 non-null float64
pay_amt_3          2368 non-null float64
pay_amt_4          2368 non-null float64
pay_amt_5          2368 non-null float64
pay_amt_6          2368 non-null float64
limit_balance      2368 non-null float64
education_level    2368 non-null float64
pay_0              2368 non-null float64
pay_2              2368 non-null float64
pay_3              2368 non-null float64
pay_4              2368 non-null float64
pay_5              2368 non-null float64
pay_6              2368 non-null float64
sex                2368 non-null

In [22]:
# get null value indices in X_train

inds = pd.isnull(X_train).any(1).nonzero()[0]
inds

  This is separate from the ipykernel package so we can avoid doing imports until


array([1391, 1867])

In [23]:
# drop the above indices with null values from X_train and y_train

X_train.drop([1391, 1867], inplace = True)
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2368 entries, 0 to 2369
Data columns (total 24 columns):
age                2368 non-null float64
bill_amt_1         2368 non-null float64
bill_amt_2         2368 non-null float64
bill_amt_3         2368 non-null float64
bill_amt_4         2368 non-null float64
bill_amt_5         2368 non-null float64
bill_amt_6         2368 non-null float64
pay_amt_1          2368 non-null float64
pay_amt_2          2368 non-null float64
pay_amt_3          2368 non-null float64
pay_amt_4          2368 non-null float64
pay_amt_5          2368 non-null float64
pay_amt_6          2368 non-null float64
limit_balance      2368 non-null float64
education_level    2368 non-null float64
pay_0              2368 non-null float64
pay_2              2368 non-null float64
pay_3              2368 non-null float64
pay_4              2368 non-null float64
pay_5              2368 non-null float64
pay_6              2368 non-null float64
sex                2368 non-null

In [24]:
# converted y_train into a dataframe so I can deal with null values in x_train

y_train = pd.DataFrame(y_train, columns=['credit_card_default'])
y_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2370 entries, 0 to 2369
Data columns (total 1 columns):
credit_card_default    2370 non-null int64
dtypes: int64(1)
memory usage: 18.6 KB


In [25]:
# drop same rows with null values from y_train

y_train.drop([1391, 1867], inplace = True)
y_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2368 entries, 0 to 2369
Data columns (total 1 columns):
credit_card_default    2368 non-null int64
dtypes: int64(1)
memory usage: 37.0 KB


In [26]:
# split test data into features that will be scaled by dropping categorical features (same process as train data)

X_for_scaling2 = x_test.drop(columns=['education_level', 
        'pay_0', 'pay_2', 'pay_3','pay_4', 'pay_5', 'pay_6', 
        'sex', 'others', 'single'])

In [27]:
# scale test features

X_test_scaled = scaler.fit_transform(X_for_scaling2)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [28]:
# convert those features to dataframe

X_test_scaled = pd.DataFrame(X_test_scaled, columns=['age', 
       'bill_amt_1', 'bill_amt_2', 'bill_amt_3','bill_amt_4', 'bill_amt_5', 'bill_amt_6', 
       'pay_amt_1', 'pay_amt_2', 'pay_amt_3','pay_amt_4', 'pay_amt_5', 'pay_amt_6', 
       'limit_balance'])

In [29]:
X_test_scaled.head()

Unnamed: 0,age,bill_amt_1,bill_amt_2,bill_amt_3,bill_amt_4,bill_amt_5,bill_amt_6,pay_amt_1,pay_amt_2,pay_amt_3,pay_amt_4,pay_amt_5,pay_amt_6,limit_balance
0,-1.069387,-0.669816,-0.690259,-0.685146,-0.701047,-0.677536,-0.618999,-0.174529,-0.316118,-0.257875,-0.278114,-0.053508,0.00504,-0.677296
1,-1.181447,0.529878,0.555763,0.59542,0.707323,0.745784,0.783411,-0.160034,-0.201492,-0.114393,-0.118358,-0.108354,-0.126936,-0.521743
2,-0.621148,-0.726695,-0.724196,-0.683525,-0.71503,-0.689791,-0.671155,-0.329468,-0.312724,-0.322275,-0.349943,-0.2181,-0.213862,-0.366191
3,-1.405566,-0.140866,-0.111371,-0.280651,-0.251998,-0.190407,-0.17751,-0.24326,-0.316118,-0.271194,-0.324452,-0.234397,-0.196285,-0.910624
4,0.72357,2.604362,2.571945,2.404252,2.51106,2.601686,2.431268,0.173286,0.09986,0.150625,0.294966,0.047505,-0.015585,1.500438


In [30]:
# merge with categorical features that weren't scaled

# X_test = pd.merge(X_test_scaled, x_test.drop(columns=['age',  
#         'bill_amt_1', 'bill_amt_2', 'bill_amt_3', 'bill_amt_4', 'bill_amt_5', 'bill_amt_6', 
#         'pay_amt_1', 'pay_amt_2', 'pay_amt_3', 'pay_amt_4', 'pay_amt_5', 'pay_amt_6', 
#          'limit_balance']).reset_index(drop=True), left_index=True, right_index=True)

X_test = X_test_scaled.join(x_test.drop(columns=['age', 
       'bill_amt_1', 'bill_amt_2', 'bill_amt_3','bill_amt_4', 'bill_amt_5', 'bill_amt_6', 
       'pay_amt_1', 'pay_amt_2', 'pay_amt_3','pay_amt_4', 'pay_amt_5', 'pay_amt_6', 
       'limit_balance']).reset_index(drop=True))

In [31]:
X_test.head()

Unnamed: 0,age,bill_amt_1,bill_amt_2,bill_amt_3,bill_amt_4,bill_amt_5,bill_amt_6,pay_amt_1,pay_amt_2,pay_amt_3,pay_amt_4,pay_amt_5,pay_amt_6,limit_balance,education_level,pay_0,pay_2,pay_3,pay_4,pay_5,pay_6,sex,others,single
0,-1.069387,-0.669816,-0.690259,-0.685146,-0.701047,-0.677536,-0.618999,-0.174529,-0.316118,-0.257875,-0.278114,-0.053508,0.00504,-0.677296,3.0,0,0,0,0,0,0,0,0,1
1,-1.181447,0.529878,0.555763,0.59542,0.707323,0.745784,0.783411,-0.160034,-0.201492,-0.114393,-0.118358,-0.108354,-0.126936,-0.521743,2.0,0,0,0,0,0,0,1,0,1
2,-0.621148,-0.726695,-0.724196,-0.683525,-0.71503,-0.689791,-0.671155,-0.329468,-0.312724,-0.322275,-0.349943,-0.2181,-0.213862,-0.366191,1.0,0,0,0,0,0,0,0,0,0
3,-1.405566,-0.140866,-0.111371,-0.280651,-0.251998,-0.190407,-0.17751,-0.24326,-0.316118,-0.271194,-0.324452,-0.234397,-0.196285,-0.910624,2.0,0,0,0,0,0,0,0,0,0
4,0.72357,2.604362,2.571945,2.404252,2.51106,2.601686,2.431268,0.173286,0.09986,0.150625,0.294966,0.047505,-0.015585,1.500438,1.0,0,0,0,0,0,0,1,0,0


In [32]:
X_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 593 entries, 0 to 592
Data columns (total 24 columns):
age                593 non-null float64
bill_amt_1         593 non-null float64
bill_amt_2         593 non-null float64
bill_amt_3         593 non-null float64
bill_amt_4         593 non-null float64
bill_amt_5         593 non-null float64
bill_amt_6         593 non-null float64
pay_amt_1          593 non-null float64
pay_amt_2          593 non-null float64
pay_amt_3          593 non-null float64
pay_amt_4          593 non-null float64
pay_amt_5          593 non-null float64
pay_amt_6          593 non-null float64
limit_balance      593 non-null float64
education_level    593 non-null float64
pay_0              593 non-null int64
pay_2              593 non-null int64
pay_3              593 non-null int64
pay_4              593 non-null int64
pay_5              593 non-null int64
pay_6              593 non-null int64
sex                593 non-null int64
others             593 non-nu

In [None]:
# pickle column names to use later in Flask app

column_names = list(X_train.columns)

with open('column_names.pkl', 'wb') as f:
    pickle.dump(column_names, f)

# Modeling Round I

- KNN
- Logistic Regression
- SVM
- Naive Bayes
- Random Forest
- XGBoost ?

In [None]:
# Helper function for printing confusion matrices (see: https://gist.github.com/shaypal5/94c53d765083101efc0240d776a23823)

def print_confusion_matrix(confusion_matrix, class_names, figsize = (10,7), fontsize=18):
    """Prints a confusion matrix, as returned by sklearn.metrics.confusion_matrix, as a heatmap.
    
    Arguments
    ---------
    confusion_matrix: numpy.ndarray
        The numpy.ndarray object returned from a call to sklearn.metrics.confusion_matrix. 
        Similarly constructed ndarrays can also be used.
    class_names: list
        An ordered list of class names, in the order they index the given confusion matrix.
    figsize: tuple
        A 2-long tuple, the first value determining the horizontal size of the ouputted figure,
        the second determining the vertical size. Defaults to (10,7).
    fontsize: int
        Font size for axes labels. Defaults to 14.
        
    Returns
    -------
    matplotlib.figure.Figure
        The resulting confusion matrix figure
    """
    df_cm = pd.DataFrame(confusion_matrix, index=class_names, columns=class_names, )
    fig = plt.figure(figsize=figsize)
    try:
        heatmap = sns.heatmap(df_cm, annot=True, fmt="d")
    except ValueError:
        raise ValueError("Confusion matrix values must be integers.")
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize)
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    return fig

### Random Forest GridSearchCV

In [None]:
# Run CV with 5 folds (Random Forest)

# Create the parameter grid based on the results of random search 
param_grid = {
    'bootstrap': [True],
    'max_depth': [50, 100, None],
    'max_features': ['sqrt'], # what is this?
    'min_samples_leaf': [1, 2, 5, 10],
    'min_samples_split': [2, 3, 5, 10],
    'n_estimators': [100, 200, 400, 1000]
}

rf = RandomForestClassifier()
rf_grid = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, scoring='roc_auc', verbose=10, n_jobs=-1)
rf_grid.fit(X_train, y_train)

In [None]:
rf_grid.best_params_

In [None]:
rf_grid.best_estimator_

In [None]:
# rf_model with best params

rf_model = RandomForestClassifier(bootstrap = True, max_depth = None, max_features = 'sqrt', min_samples_leaf = 10, 
                                  min_samples_split = 10, n_estimators = 200)


In [None]:
# final model to pickle to use in Flask app

model = rf_model.fit(X_train, np.array(y_train))

In [None]:
# pickling final model for Flask app

filename = 'credit_card_default_model.sav'
pickle.dump(model, open(filename, 'wb'))



In [None]:
cm = print_confusion_matrix(confusion_matrix(y_train, rf_grid.predict(X_train)), ['Class 0', 'Class 1'], figsize=(5, 4), fontsize=15)

In [None]:
# Random Forest Feature Importances

rf2 = RandomForestRegressor(n_estimators=200, max_depth = None)
rf2.fit(X_train, y_train)
rf2.feature_importances_

In [None]:
# zip each feature importance weight with my columns

rf_feature_importances = pd.DataFrame(zip(list(rf2.feature_importances_), X_train.columns), columns=['feature_importances', 'feature_names'])
rf_feature_importances.sort_values(by='feature_importances', ascending=False)

### Scoring the models

#### ROC_AUC

In [None]:
print('Best ROC_AUC for rf: %0.4f' % rf_grid.best_score_)

#### F1

In [None]:
# score on F1

# from sklearn.metrics import f1_score

# y_true = 
# y_pred = 

# f1_score(y_true, y_pred, average='macro')  

# f1_score(y_true, y_pred, average='micro')  

# f1_score(y_true, y_pred, average='weighted')  

# f1_score(y_true, y_pred, average=None)


#### Best parameters

In [None]:
print('Best Params for rf: ', rf_grid.best_params_)

#### NOTE QUESTIONS IN BELOW COMMENTED CODE

In [None]:
# ROC for all the models

# ARE THE X/Y VARIABLES THE CORRECT ONES TO USE HERE?
# How does the ensembe work?

# How to add new models above into this? E.g., Linear SVC, 

model_list = [knn_grid.best_estimator_, 
              logistic_grid.best_estimator_, 
              svm_grid.best_estimator_, 
              gnb_best, 
              rf_grid.best_estimator_,
              'ensemble']
model_name = ['knn', 'logit', 'svm', 'n_bayes', 'random_forest', 'ensemble']

# Plot ROC curve for all my models
fig, ax = plt.subplots(figsize=(10,8))
for i, model in enumerate(model_list):
    if model == 'ensemble':
        w1 = 0.10
        w2 = 0.80
        y_pred = (w1*logistic_grid.best_estimator_.predict_proba(x_norm_test)[:,1] 
                  + w2*rf_grid.best_estimator_.predict_proba(x_norm_test)[:,1]
                  + (1-w1-w2)*gnb_best.predict_proba(x_norm_test)[:,1])
    else:
        y_pred = list(model.predict_proba(x_norm_test)[:,1])
    fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred)
    roc_auc = metrics.auc(fpr, tpr)
    plt.plot(fpr, tpr, label = (model_name[i] + ' AUC = %0.4f' % roc_auc))

plt.legend(loc = 'lower right')
plt.title('Receiver Operating Characteristic')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()