In [None]:
from tensorflow import keras
import csv
import warnings
import numpy as np
import pandas as pd
from imblearn.metrics import geometric_mean_score
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.utils.class_weight import compute_class_weight

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
path_drive = '/content/drive/MyDrive/PIBIC'

In [None]:
clf_name = ['adamax', 'rmsprop', 'adam']
clf_models = {'adamax': [], 'rmsprop': [], 'adam': []}

def get_clf(data_num, resampling):
  clf_models = {'adamax': [], 'rmsprop': [], 'adam': []}
  for clf in clf_name:
    for i in range(6):
      model = keras.models.load_model(path_drive +'/hyper/data_'+str(data_num)+'/'+resampling+'('+clf+'-'+str(i+1)+')')
      clf_models[clf].append(model)
  return clf_models

In [None]:
def get_data(data_num, resampling, num):
    train_tfidf = pd.read_csv(path_drive +'/datasets/data_'+str(data_num)+'/train/' + resampling + '_tfidf_train(' + str(num) + ').csv')
    train_class = pd.read_csv(path_drive +'/datasets/data_'+str(data_num)+'/train/' + resampling + '_class_train(' + str(num) + ').csv')

    test_tfidf = pd.read_csv(path_drive +'/datasets/data_'+str(data_num)+'/test/tfidf_test(' + str(num) + ').csv')
    test_class = pd.read_csv(path_drive +'/datasets/data_'+str(data_num)+'/test/class_test(' + str(num) + ').csv')

    train_tfidf = np.array(train_tfidf)
    train_class = np.array(train_class['Class'])

    test_tfidf = np.array(test_tfidf)
    test_class = np.array(test_class['Class'])

    return train_tfidf, train_class, test_tfidf, test_class

def gravacaoCSV(nomeArquivo, dicio):
    path = path_drive +'/results/' + nomeArquivo + '.csv'
    try:
        open(path, 'r')
        with open(path, 'a') as arq:
            writer = csv.writer(arq)
            writer.writerow(dicio.values())
    except IOError:
        dataF = pd.DataFrame(columns=dicio.keys())
        dataF = dataF.append(dicio, ignore_index=True)
        dataF.to_csv(path, index=False)

def calcularMedia(divisor, dicio):
    for algoritmo in dicio:
        for metrica in dicio[algoritmo]:
            dicio[algoritmo][metrica] = dicio[algoritmo][metrica] / divisor
    return dicio

def get_structure_results():
    return {'adamax': {'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'AUC': 0.0, 'Kappa':0.0},
            'rmsprop': {'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'AUC': 0.0, 'Kappa':0.0},
            'adam': {'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'AUC': 0.0, 'Kappa':0.0}}

def get_results(y_pred, y_test, y_proba):
  kappa = cohen_kappa_score(y_test, y_pred)

  lb = LabelBinarizer().fit(y_pred)
  y_pred = lb.transform(y_pred)
  y_test = lb.transform(y_test)

  precisao = precision_score(y_test, y_pred, average='macro')
  recall = recall_score(y_test, y_pred, average='macro')
  f1 = f1_score(y_test, y_pred, average='macro')
  a = roc_auc_score(y_test, y_pred, multi_class='ovr')

  return precisao, recall, f1, a, kappa

In [None]:
class Evaluate:

    def __init__(self, interaction, k_fold, resampling):
        self.resampling = resampling
        self.k_fold = k_fold
        self.interaction = interaction

    def run(self):
        warnings.filterwarnings('ignore')
        final_mean = get_structure_results()
        print('-- '+self.resampling+' --')

        for index in self.interaction:

            cv_mean = get_structure_results()
            clfs = get_clf(index, self.resampling)
            print(repr(index) + ' interaction')

            for i in range(self.k_fold):
                train_tfidf, train_class, test_tfidf, test_class = get_data(index, self.resampling, i+1)
                train_class = LabelBinarizer().fit_transform(train_class)
                print(repr(i + 1) + ' dobra')

                for model in clfs:
                    model_i = clfs[model][i]

                    y_pred = model_i.predict(test_tfidf)
                    y_pred_proba = model_i.predict_proba(test_tfidf)
                    y_pred_bool = np.argmax(y_pred, axis=1)

                    precision, recall, f1, auc, kp = get_results(y_pred_bool, test_class, y_pred_proba)

                    score = {'Optimizer': model, 'interaction': str(i+1), 'Precision': precision,
                              'Recall': recall, 'F1': f1, 'AUC': auc, 'Kappa': kp}

                    cv_mean[model]['Precision'] += precision
                    cv_mean[model]['Recall'] += recall
                    cv_mean[model]['F1'] += f1
                    cv_mean[model]['AUC'] += auc
                    cv_mean[model]['Kappa'] += kp
                    path_results = 'data_' + str(index) + '/score(' + self.resampling + ')'
                    gravacaoCSV(path_results, score)

            cv_mean = calcularMedia(self.k_fold, cv_mean)
            for clf in cv_mean:
                precision = cv_mean[clf]['Precision']
                recall = cv_mean[clf]['Recall']
                f1 = cv_mean[clf]['F1']
                auc = cv_mean[clf]['AUC']
                kp = cv_mean[clf]['Kappa']

                final_mean[clf]['Precision'] += precision
                final_mean[clf]['Recall'] += recall
                final_mean[clf]['F1'] += f1
                final_mean[clf]['AUC'] += auc
                final_mean[clf]['Kappa'] += kp

                helper_cv_mean = {'Optimizer': clf, 'Precision': precision, 'Recall': recall, 'F1': f1, 'AUC': auc, 'Kappa': kp}
                helper_general_cv_mean = {'Optimizer': clf, 'Interaction': index, 
                                          'Precision': precision, 'Recall': recall, 'F1': f1, 'AUC': auc, 'Kappa': kp}

                path_mean_cv = 'data_' + str(index) + '/cv_mean(' + self.resampling + ')'
                gravacaoCSV(path_mean_cv, helper_cv_mean)

                path_general = 'general_cv_mean(' + self.resampling + ')'
                gravacaoCSV(path_general, helper_general_cv_mean)
        final_mean = calcularMedia(len(self.interaction), final_mean)
        path_final = 'final_mean(' + self.resampling +')'
        for clf in final_mean:
            precision = final_mean[clf]['Precision']
            recall = final_mean[clf]['Recall']
            f1 = final_mean[clf]['F1']
            auc = final_mean[clf]['AUC']
            kp = final_mean[clf]['Kappa']

            helper_final_mean = {'Optimizer': clf, 'Precision': precision, 'Recall': recall, 'F1': f1, 'AUC': auc, 'Kappa': kp}
            gravacaoCSV(path_final, helper_final_mean)

In [None]:
resamplings = ['origin', 'tomek', 'adasyn', 'smote', 'bdsmote', 'smotetomek']
data_num = [1,2,3,4,5,6,7,8,9,10]

for res in resamplings:
  eval = Evaluate(interaction=data_num, k_fold=6, resampling=res)
  eval.run()

-- origin --
5 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
6 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
7 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
8 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
9 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
-- tomek --
5 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
6 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
7 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
8 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
9 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
-- adasyn --
5 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
6 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
7 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
8 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
9 interaction
1 dobra
2 dobra
3 dobra
4 dobra
5 dobra
6 dobra
-- smote --
5 interaction
1 dobr