In [None]:
# Keras e tensorflow
import keras
import tensorflow as tf
from keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import Adam, Adamax, SGD, RMSprop, Nadam, Ftrl

# Keras tuner
!pip install keras-tuner -q
import kerastuner as kt
from kerastuner import HyperModel
from kerastuner.tuners import BayesianOptimization

# others
import pandas as pd
import numpy as np
import json
import shutil
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score
from contextlib import redirect_stdout

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
class KerasHyperModel(HyperModel):

    def __init__(self,
                 input_size,
                 output_size,
                 optimizer_name):
      self.vocab_size = input_size
      self.output_size = output_size
      self.optimizer_name = optimizer_name

    def build(self, hp):
      dropout = hp.Float('dropout', 0, 0.5, step=0.1, default=0.5)
      act = hp.Choice(name='act', values=['relu', 'tanh', 'elu', 'sigmoid'])
      lr = hp.Float('lr', 1e-3, 0.1, step=0.0001)
      # l1 = hp.Choice('l1',values=[0.0, 0.01, 0.001, 0.0001])
      # l2 = hp.Choice('l2',values=[0.0, 0.01, 0.001, 0.0001])
      units = hp.Int('units', min_value=32, max_value=256, step=5)
      
      opt = None

      if self.optimizer_name == 'adam':
          opt = Adam(learning_rate=lr)
      elif self.optimizer_name == 'adamax':
          opt = Adamax(learning_rate=lr)
      elif self.optimizer_name == 'nadam':
          opt = Nadam(learning_rate=lr)
      elif self.optimizer_name == 'rmsprop':
          opt = RMSprop(learning_rate=lr)
      elif self.optimizer_name == 'sgd':
          opt = SGD(learning_rate=lr, momentum=0.0)
      elif self.optimizer_name == 'sgdm':
          opt = SGD(learning_rate=lr, momentum=0.9)
      else:
          print('ERROR: Invalid name!')

      model = Sequential()

      model.add(Dense(units=units,
                      input_dim=self.vocab_size,
                      activation=act))

      model.add(Dropout(dropout))

      model.add(Dense(self.output_size, activation='softmax'))
      model.compile(optimizer=opt,
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])
      return model

In [None]:
def get_data(data_num, resampling, num):
    train_tfidf = pd.read_csv('/content/drive/MyDrive/PIBIC/datasets/data_'+str(data_num)+'/train/' + resampling + '_tfidf_train(' + str(num) + ').csv')
    train_class = pd.read_csv('/content/drive/MyDrive/PIBIC/datasets/data_'+str(data_num)+'/train/' + resampling + '_class_train(' + str(num) + ').csv')

    test_tfidf = pd.read_csv('/content/drive/MyDrive/PIBIC/datasets/data_'+str(data_num)+'/test/tfidf_test(' + str(num) + ').csv')
    test_class = pd.read_csv('/content/drive/MyDrive/PIBIC/datasets/data_'+str(data_num)+'/test/class_test(' + str(num) + ').csv')

    train_tfidf = np.array(train_tfidf)
    train_class = np.array(train_class['Class'])

    test_tfidf = np.array(test_tfidf)
    test_class = np.array(test_class['Class'])

    return train_tfidf, train_class, test_tfidf, test_class

In [None]:
data_num = [1,2,3,4,5,6,7,8,9,10]
resampling = ['origin', 'tomek', 'adasyn', 'smote', 'bdsmote', 'smotetomek']
index = [1,2,3,4,5,6]
opt_name = ['adamax', 'rmsprop', 'adam']

for dn in data_num:
  for res in resampling:
    for opt in opt_name:
      for i in index:
        train_tfidf, train_class, test_tfidf, test_class = get_data(dn, res, i)
        train_class = LabelBinarizer().fit_transform(train_class)

        hypermodel = KerasHyperModel(input_size=train_tfidf.shape[1], output_size=11, optimizer_name=opt)

        tuner = BayesianOptimization(
            hypermodel,
            objective='val_loss',
            max_trials=100,
            directory='/content/drive/MyDrive/PIBIC/hyperparametrization/detail/'+res+'-data_'+str(dn)+'('+opt+'-'+str(i)+')',
            project_name=opt+'-hyper',
            executions_per_trial=2
        )

        es = EarlyStopping(monitor='val_loss', mode='min', verbose=False, patience=5)
        tuner.search(train_tfidf, train_class, epochs=60, validation_split=0.125, verbose=1, callbacks=[es])

        best_model = tuner.get_best_models(num_models=1)[0]

        with open('/content/drive/MyDrive/PIBIC/hyperparametrization/detail/'+res+'-data_'+str(dn)+'('+opt+'-'+str(i)+').txt', 'w') as f:
            f.write(log)
            f.write('\n')

        # Salvando: modelo, parametros e arquitetura
        best_hp = tuner.get_best_hyperparameters()[0]

        path_model_best = '/content/drive/MyDrive/PIBIC/hyperparametrization/hyper_models/data_'+str(dn)+'/'+res+'('+opt+'-'+str(i)+')'
        best_model.save(path_model_best)

        with open(path_model_best+'/config.json', 'w') as json_file:
                    json.dump(best_hp.get_config()['values'], json_file)

        with open(path_model_best+'/model_summary.txt', 'w') as f:
            with redirect_stdout(f):
                best_model.summary()
        
        # Excluindo pasta
        try:
          shutil.rmtree('/content/drive/MyDrive/PIBIC/hyperparametrization/detail/'+res+'-data_'+str(dn)+'('+opt+'-'+str(i)+')')
        except OSError as e:
          print ("Error: %s - %s." % (e.filename, e.strerror))