In [1]:
import pandas as pd
import ast
import os
from pprint import pprint
import matplotlib.pyplot as plt
from auxiliar_func import *
from plot_func import *

# Importing the models
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

In [2]:
models = {
    'nb': GaussianNB,
    'lda': LDA,
    'qda': QDA,
    'knn': KNN,
    'logreg': LogisticRegression,
    'svm': LinearSVC,
    'rf': RandomForestClassifier,
    'xgb': XGBClassifier,
    'catboost': CatBoostClassifier,
}

df_tr = pd.read_csv('../train.csv')

# for xgboost
df_tr = df_tr.applymap(lambda x: x.replace('[', '').replace(']', '').replace('<', '') if isinstance(x, str) else x)

TARGET_METRIC = 'f1_macro'
SEED = 42
CV_FOLDS = 20

results = pd.DataFrame(columns=['model', 'accuracy', 'f1_macro', 'precision_macro', 'recall_macro'], dtype=float)
results.set_index('model', inplace=True)


for file in os.listdir('./results'):
    if file.endswith('.csv') and file.startswith('results_'):
        res = read_results('./results/'+file)
        prep_par, model_par = get_best_params('./results/'+file)
        mod_name = file.split('_')[1][:-4]
        model = models[mod_name](**model_par)
        score, y_pred, y_true = cross_validation(model, df_tr, prep_par, cv=CV_FOLDS, random_state=SEED, return_predict=True)

        results.loc[mod_name] = pd.Series(score)
        
        plot_conf_matrix(y_true, y_pred, './figures/conf_matrix_'+mod_name+'.pdf', show=False)
        
        

results = results.sort_values(by=TARGET_METRIC, ascending=False)

In [3]:
results.to_csv('./results/cv20_results.csv')
results.head(10)

Unnamed: 0_level_0,accuracy,f1_macro,precision_macro,recall_macro
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
catboost,0.953826,0.799765,0.806904,0.793224
xgb,0.951348,0.792804,0.794161,0.791706
rf,0.945026,0.778345,0.765724,0.79292
logreg,0.945141,0.774553,0.76673,0.783255
svm,0.945499,0.772375,0.76864,0.776474
knn,0.944783,0.75869,0.767163,0.750965
lda,0.938761,0.757889,0.742147,0.776732
qda,0.930957,0.743015,0.717831,0.777516
nb,0.9161,0.708427,0.679047,0.756181
