In [1]:
# import numpy, pandas, scipy, math, matplotlib
import numpy as np
import pandas as pd
import scipy 
import matplotlib.pyplot as plt

from math import sqrt
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [2]:
rawData = pd.read_csv('new_credit.csv')
# features
features = rawData.iloc[:,0:23]
# dependent variable
depVar = rawData['default payment next month']
# train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, depVar, test_size=0.25)

** Random Forest Train **

In [3]:
modelRF = RandomForestClassifier(n_estimators=100)
modelRF.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [4]:
# accuracy
modelRF.score(X_train, y_train)

1.0

** Gradient Boosting Train **

In [5]:
GB = GradientBoostingClassifier()
modelGB = GB.fit(X_train, y_train)

In [6]:
# accuracy
modelGB.score(X_train, y_train)

0.8280888888888889

** SVM Train **

In [7]:
modelSVM = SVC()
modelSVM.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [8]:
# accuracy
modelSVM.score(X_train, y_train)

0.9998666666666667

**Random Forest Cross Validation**

In [9]:
cross_val_score(RandomForestClassifier(n_estimators=25, random_state=0), X_train, y_train, cv=3)

array([0.81295827, 0.81626667, 0.8142419 ])

**Gradient Boosting Cross Validation**

In [10]:
GB_cv = GradientBoostingClassifier(n_estimators=25, random_state=0)
scores = cross_val_score(GB_cv, X_train, y_train, cv=3)
scores

array([0.82042394, 0.8248    , 0.82237632])

**SVM Cross Validation**

In [11]:
cross_val_score(SVC(random_state=0), X_train, y_train, cv=3)

array([0.77856286, 0.77853333, 0.77863715])

**Random Forest Test**

In [12]:
RF_cv = RandomForestClassifier(n_estimators=25, random_state=0)
modelRF_cv = RF_cv.fit(X_train, y_train)
predictions_RF_cv = cross_val_predict(modelRF_cv, X_test, y_test, cv=3)
confusion_matrix(y_test, predictions_RF_cv)

array([[5468,  377],
       [1095,  560]], dtype=int64)

In [13]:
reportRF = classification_report(y_test, predictions_RF_cv)
print(reportRF)

             precision    recall  f1-score   support

          0       0.83      0.94      0.88      5845
          1       0.60      0.34      0.43      1655

avg / total       0.78      0.80      0.78      7500



**Gradient Boosting Test**

In [14]:
predictions_GB = modelGB.predict(X_test)
confusion_matrix(y_test, predictions_GB)

array([[5540,  305],
       [1057,  598]], dtype=int64)

In [15]:
reportGB = classification_report(y_test, predictions_GB)
print(reportGB)

             precision    recall  f1-score   support

          0       0.84      0.95      0.89      5845
          1       0.66      0.36      0.47      1655

avg / total       0.80      0.82      0.80      7500



**SVM Test**

In [None]:
SVM_cv = SVC()
modelSVM_cv = SVM_cv.fit(X_train, y_train)
predictions_SVM_cv = cross_val_predict(modelSVM_cv, X_test, y_test, cv=3)
confusion_matrix(y_test, predictions_SVM_cv)

In [None]:
reportSVM = classification_report(y_test, predictions_SVM_cv)
print(reportSVM)