# Models and Evaluation

In [None]:
import pandas as pd
import numpy as np
from IPython.display import display

use_smote = False

data = pd.read_csv('data_processed/complete/data_selected.csv')

display(data.head())

In [None]:
def get_features(df):
    return df.drop('status', axis=1)
def get_target(df):
    return df['status']

results = {}

In [None]:
def oversample(X_train, y_train):
    from imblearn.over_sampling import SMOTE
    sm = SMOTE(random_state=1, sampling_strategy=1.0)
    X_train, y_train = sm.fit_resample(X_train, y_train)
    return X_train, y_train

In [None]:
smote_features, smote_targets = oversample(get_features(data), get_target(data))

smote_targets = pd.DataFrame(smote_targets)

# print('\nTrain shape:',smote_targets.shape)
# print('Status ratio:',smote_targets[smote_targets['status']==1].shape[0],'|',smote_targets[smote_targets['status']==-1].shape[0])

In [None]:
from sklearn.model_selection import RepeatedStratifiedKFold, StratifiedKFold

stratified_kfold = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)

In [None]:
from sklearn.model_selection import cross_validate

def cross_validation(name, model, df=data, cv=stratified_kfold):
    smote_features_copy = smote_features.copy()
    scores = ''
    if use_smote: 
        scores = cross_validate(model, smote_features_copy, np.ravel(smote_targets), scoring='roc_auc', cv=cv)['test_score']
    else:
        scores = cross_validate(model, get_features(df), get_target(df), scoring='roc_auc', cv=cv)['test_score']
    
    results[name] = scores
    print(name, scores.mean(), scores.std())

### Algorithms

In [None]:
from sklearn.tree import DecisionTreeClassifier
cross_validation('DT',
    DecisionTreeClassifier(),
)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
cross_validation('KNN',
    KNeighborsClassifier(),
)

In [None]:
from sklearn.neural_network import MLPClassifier
cross_validation('MLP',
    MLPClassifier(),
)

In [None]:
from sklearn.naive_bayes import GaussianNB
cross_validation('GNB',
    GaussianNB(),
)

In [None]:
from sklearn.svm import SVC
cross_validation('SVM',
    SVC(probability=True),
)

In [None]:
from sklearn.linear_model import LogisticRegression
cross_validation('LR',
    LogisticRegression(),
)

In [None]:
from sklearn.ensemble import RandomForestClassifier
cross_validation('RF',
    RandomForestClassifier(),
)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
cross_validation('GB',
    GradientBoostingClassifier(),
)

In [None]:
import seaborn as sb
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
sb.boxplot(data=pd.DataFrame(results))
sb.stripplot(data=pd.DataFrame(results), color='black')
plt.show()