In [None]:
import pandas as pd
import time
import sys
from pathlib import Path
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Go up one directory to get to master/
project_root = str(Path.cwd().parent)
sys.path.append(project_root)

In [None]:
def get_solutions_all_folds_rollOCT(name_dataset: str, depth=3, folds_available = 10) -> dict: #for 10 fold cross valdidation, carefull that trees have min depth 2
    sol_dict = {} # one entry for every fold, [fold][depth]['test'] for train_data classification for fold of depth; same for training
    for i in range(1,folds_available+1):
        sol_dict[i] = {}
        for j in range(2,depth+1):
            sol_dict[i][j] = {}
            sol_dict[i][j]['test'] = pd.read_csv(f"../results/{name_dataset}/pulp/fold{i}/depth{j}_classification_{name_dataset}_test.csv")
            sol_dict[i][j]['train'] = pd.read_csv(f"../results/{name_dataset}/pulp/fold{i}/depth{j}_classification_{name_dataset}_train.csv")
    return sol_dict

In [None]:
def get_solutions_all_folds_cart(name_dataset: str, depth=3, folds_available = 10) -> dict: #for 10 fold cross valdidation, carefull that trees have min depth 2
    sol_dict = {} # one entry for every fold, [fold][depth]['test'] for train_data classification for fold of depth; same for training
    for i in range(1,folds_available+1):
        sol_dict[i] = {}
        for j in range(2,depth+1):
            sol_dict[i][j] = {}
            sol_dict[i][j]['test'] = pd.read_csv(f"../results/{name_dataset}/cart/fold{i}/depth{j}_classification_{name_dataset}_test.csv")
            sol_dict[i][j]['train'] = pd.read_csv(f"../results/{name_dataset}/cart/fold{i}/depth{j}_classification_{name_dataset}_train.csv")
    return sol_dict

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, matthews_corrcoef, roc_auc_score, f1_score

# does not contain time stuff
# man muss über die target vars iterieren
# we want result_dict[depth]['test'/'train'][target_var] and then sens,spec,prec,acc dor all folds. Then we are able to combine it with original target var afterwards
def solutions_all_depths_all_folds_multiclass(dataset_name, list_target_vars, max_tree_depth, folds_available, cart = False):
    if cart == False:
        sol_dict = get_solutions_all_folds_rollOCT(name_dataset = dataset_name, depth=max_tree_depth, folds_available = folds_available)
    else:
        sol_dict = get_solutions_all_folds_cart(name_dataset = dataset_name, depth=max_tree_depth, folds_available = folds_available)
    result_dict = dict()
    for depth in range(2,max_tree_depth+1):
        result_dict[depth] = dict()
        result_dict[depth]['test'] = dict()
        result_dict[depth]['train'] = dict()

        acc_list_test = []
        prec_list_test = []
        recall_list_test = []
        mcc_list_test = []
        roc_auc_list_test = []
        f1_list_test = []

        acc_list_train = []
        prec_list_train = []
        recall_list_train = []
        mcc_list_train = []
        roc_auc_list_train = []
        f1_list_train = []


        for fold in range(1, folds_available+1):
            
            y_true_test = sol_dict[fold][depth]['test']['y']
            y_predict_test = sol_dict[fold][depth]['test']['prediction']

            y_true_train = sol_dict[fold][depth]['train']['y']
            y_predict_train = sol_dict[fold][depth]['train']['prediction']

            acc_list_test.append(accuracy_score(y_true_test, y_predict_test))
            prec_list_test.append(precision_score(y_true_test, y_predict_test))
            recall_list_test.append(recall_score(y_true_test, y_predict_test))
            mcc_list_test.append(matthews_corrcoef(y_true_test, y_predict_test))
            roc_auc_list_test.append(roc_auc_score(y_true_test, y_predict_test, multi_class='ovr', average='macro'))
            f1_list_test.append(f1_score(y_true_test, y_predict_test))

            acc_list_train.append(accuracy_score(y_true_train, y_predict_train))
            prec_list_train.append(precision_score(y_true_test, y_predict_test))
            recall_list_train.append(recall_score(y_true_test, y_predict_test))
            mcc_list_train.append(matthews_corrcoef(y_true_test, y_predict_test))
            roc_auc_list_train.append(roc_auc_score(y_true_test, y_predict_test, multi_class='ovr', average='macro'))
            f1_list_train.append(f1_score(y_true_test, y_predict_test))
            

        result_dict[depth]['test']['mcc'] = mean_mcc_test
        result_dict[depth]['train']['mcc'] = mean_mcc_train

        #std_mcc_test = np.std(mcc_scores_test)
        #std_mcc_train = np.std(mcc_scores_train)

    
    return result_dict