In [1]:
# import numpy, pandas, scipy, math, matplotlib
import numpy as np
import pandas as pd
import scipy 
import matplotlib.pyplot as plt

from math import sqrt
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [2]:
rawData = pd.read_csv('new_credit.csv')
# features
features = rawData.iloc[:,0:23]
# dependent variable
depVar = rawData['default payment next month']
# train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, depVar, test_size=0.25)

** Random Forest Train **

In [3]:
modelRF = RandomForestClassifier(n_estimators=100)
modelRF.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [4]:
# accuracy
modelRF.score(X_train, y_train)

1.0

** Gradient Boosting Train **

In [5]:
GB = GradientBoostingClassifier()
modelGB = GB.fit(X_train, y_train)

In [6]:
# accuracy
modelGB.score(X_train, y_train)

0.8280888888888889

** SVM Train **

In [None]:
modelSVM = SVC()
modelSVM.fit(X_train, y_train)

In [None]:
# accuracy
modelSVM.score(X_train, y_train)

**Random Forest Cross Validation**

In [None]:
cross_val_score(RandomForestClassifier(n_estimators=25, random_state=0), X_train, y_train, cv=3)

**Gradient Boosting Cross Validation**

In [None]:
GB_cv = GradientBoostingClassifier(n_estimators=25, random_state=0)
scores = cross_val_score(GB_cv, X_train, y_train, cv=3)
scores

**SVM Cross Validation**

In [None]:
cross_val_score(SVC(random_state=0), X_train, y_train, cv=3)

**Random Forest Test**

In [None]:
RF_cv = RandomForestClassifier(n_estimators=25, random_state=0)
modelRF_cv = RF_cv.fit(X_train, y_train)
predictions_RF_cv = cross_val_predict(modelRF_cv, X_test, y_test, cv=3)
confusion_matrix(y_test, predictions_RF_cv)

In [None]:
reportRF = classification_report(y_test, predictions_RF_cv)
print(reportRF)

**Gradient Boosting Test**

In [None]:
predictions_GB = modelGB.predict(X_test)
confusion_matrix(y_test, predictions_GB)

In [None]:
reportGB = classification_report(y_test, predictions_GB)
print(reportGB)

**SVM Test**

In [None]:
SVM_cv = SVC()
modelSVM_cv = SVM_cv.fit(X_train, y_train)
predictions_SVM_cv = cross_val_predict(modelSVM_cv, X_test, y_test, cv=3)
confusion_matrix(y_test, predictions_SVM_cv)

In [None]:
reportSVM = classification_report(y_test, predictions_SVM_cv)
print(reportSVM)