## Optimizer: Finding best hyper-parameters

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
pip install "numpy<1.24.0"

In [None]:
pip install transformers datasets scikit-optimize

In [None]:
pip install accelerate -U

In [None]:
pip install transformers[torch]

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from transformers import ElectraTokenizer, ElectraForSequenceClassification, TrainingArguments, Trainer
import torch
import numpy as np
from sklearn.metrics import classification_report, accuracy_score
from transformers import TrainerCallback
import os
import shutil
import re
import time
from pathlib import Path

from skopt.space import Real, Categorical, Integer
from skopt.utils import use_named_args
import numpy as np
from skopt import gp_minimize

In [7]:
#Define the space of hyperparameters to search
search_space = [
  Real(1e-5, 5e-5, name='learning_rate'),
  Real(0.01, 0.2,  name='weight_decay'),
  Integer(3, 5, name='num_train_epochs'),
  Integer(8, 32, name='per_device_train_batch_size', prior='log-uniform'),
  Integer(8, 64, name='per_device_eval_batch_size', prior='log-uniform'),
]

# [5e-05, 0.135161130336292, 5, 13, 13]
@use_named_args(search_space)
def objective(learning_rate, num_train_epochs, per_device_train_batch_size, per_device_eval_batch_size, weight_decay):
    __file__ = "/content/drive/MyDrive/FinalProject/Models/ELECTRA/ELECTRA_optimizer.ipynb"
    file_name = "dataset_balanced_4000"
    ext ="xlsx"
    path_type = "Balanced"

    current_file_path = Path(__file__).parent
    path_to_project = current_file_path.parents[1]

    df = pd.read_excel(f"{path_to_project}/Data/Datasets/{path_type}/{file_name}.{ext}")

    results_dir = f"{path_to_project}/Models/ELECTRA/Output/{path_type}/{file_name}_optimizer"
    dump_dir = results_dir+"/Dump"

    if os.path.isdir(results_dir):
        shutil.rmtree(results_dir)

    os.mkdir(results_dir)
    os.mkdir(dump_dir)

    df = df[df['review'].notna() & (df['review'] != '')]
    # Select the text and label columns
    df['review'] = df['review'].str.replace('[^\x20-\x7E]', '', regex=True)
    X = df['review'].values
    y = df['label'].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


    # Encode the labels to a numeric format
    label_encoder = LabelEncoder()
    y_train_encoded = label_encoder.fit_transform(y_train)
    y_test_encoded = label_encoder.transform(y_test)

    # Initialize the tokenizer for RoBERTa
    tokenizer = ElectraTokenizer.from_pretrained('google/electra-base-discriminator')
    # Tokenization function
    def tokenize_function(texts):
        return tokenizer(texts, padding="max_length", truncation=True, max_length=128)


    # Tokenize the data
    train_encodings = tokenize_function(X_train.tolist())
    val_encodings = tokenize_function(X_test.tolist())

    # Create dataset objects
    train_dataset = ReviewDataset(train_encodings, y_train_encoded)
    val_dataset = ReviewDataset(val_encodings, y_test_encoded)

    # Initialize the model for each fold
    model = ElectraForSequenceClassification.from_pretrained('google/electra-base-discriminator', num_labels=len(label_encoder.classes_))


    # Define training arguments for each fold, adjust hyperparameters as needed
    training_args = TrainingArguments(
        output_dir=f"{dump_dir}/res",
        num_train_epochs= int(num_train_epochs),
        per_device_train_batch_size= int(per_device_train_batch_size),
        per_device_eval_batch_size= int(per_device_eval_batch_size),
        warmup_steps=500,
        weight_decay=weight_decay,
        logging_dir=f"{dump_dir}/logs",
        logging_strategy="epoch",
        evaluation_strategy="epoch",
        learning_rate=learning_rate,
        max_grad_norm=1.0,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        save_strategy="epoch",
        save_total_limit=2,
        lr_scheduler_type='linear'
    )

    # Initialize Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=lambda p: {"accuracy": accuracy_score(p.predictions.argmax(-1), p.label_ids)}
        )

    # Train
    trainer.train()
    # Evaluate
    results = trainer.evaluate()
    neg_accuracy = -results['eval_accuracy']

    print(results['eval_accuracy'])
    print(neg_accuracy)

    shutil.rmtree(dump_dir)
    return neg_accuracy

# Custom dataset class
class ReviewDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

result = gp_minimize(objective, search_space, n_calls=20, random_state=0)

print("Best parameters:")
print(result.x)




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/666 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.3598,1.262976,0.59125
2,1.0209,0.826621,0.70625
3,0.7034,0.697653,0.74625
4,0.516,0.705546,0.765
5,0.329,0.727951,0.76875


0.76875
-0.76875


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.2967,0.978256,0.67875
2,0.8219,0.774553,0.71125
3,0.5403,0.687962,0.7475


0.7475
-0.7475


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.3486,1.168134,0.6025
2,0.9557,0.77145,0.72125
3,0.6564,0.717127,0.73875
4,0.4666,0.767273,0.74625


0.74625
-0.74625


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1661,0.825108,0.705
2,0.7332,0.699907,0.75375
3,0.5151,0.761367,0.765
4,0.2877,0.93404,0.7425
5,0.1656,0.95766,0.775


0.775
-0.775


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.363,1.266669,0.5875
2,1.0012,0.772118,0.73
3,0.6883,0.730964,0.7325
4,0.462,0.659422,0.76375


0.76375
-0.76375


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1568,0.817764,0.70375
2,0.7256,0.686743,0.755
3,0.4539,0.812534,0.76
4,0.2639,0.975024,0.7725
5,0.1586,1.027178,0.7775


0.7775
-0.7775


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.3006,0.972921,0.6675
2,0.8182,0.713272,0.74625
3,0.5427,0.702637,0.7525


0.7525
-0.7525


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.3823,1.356138,0.4675
2,1.2544,1.065211,0.66375
3,0.9436,0.820003,0.7125
4,0.7083,0.752704,0.71875


0.71875
-0.71875


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.3696,1.296959,0.5
2,1.0972,0.883818,0.69375
3,0.7772,0.735307,0.73


0.73
-0.73


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.3709,1.307223,0.53875
2,1.0869,0.877356,0.695
3,0.751,0.699329,0.74125
4,0.5204,0.675853,0.75875
5,0.3633,0.704297,0.7625


0.7625
-0.7625


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.2542,0.882162,0.7075
2,0.7807,0.817874,0.69375
3,0.6031,0.727251,0.76625
4,0.3628,0.749047,0.7775
5,0.2103,0.857962,0.775


0.7775
-0.7775


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1247,0.814074,0.70875
2,0.7295,0.688636,0.765
3,0.4596,0.839827,0.76375
4,0.2451,0.94311,0.77


0.77
-0.77


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.3051,1.017356,0.66875
2,0.8431,0.71041,0.745
3,0.5939,0.713178,0.75375
4,0.452,0.743526,0.755
5,0.362,0.78217,0.7525


0.755
-0.755


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.2052,0.810049,0.71125
2,0.7484,0.764448,0.725
3,0.5477,0.753833,0.7625
4,0.3199,0.839057,0.78125
5,0.1745,0.964219,0.755


0.78125
-0.78125


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1052,0.793085,0.7075
2,0.7144,0.764558,0.7275
3,0.4628,0.837149,0.77375
4,0.2436,1.000365,0.76375


0.77375
-0.77375


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0893,0.757577,0.715
2,0.7003,0.703041,0.7675
3,0.443,0.846468,0.75625
4,0.2532,1.017705,0.7675
5,0.1292,1.117656,0.77


0.77
-0.77


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.3185,1.043171,0.65875
2,0.8467,0.741165,0.725
3,0.5822,0.72518,0.7375
4,0.4295,0.748362,0.7525
5,0.3434,0.783565,0.75875


0.75875
-0.75875


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1093,0.829137,0.695
2,0.7267,0.724496,0.73125
3,0.4527,0.823611,0.76125
4,0.2539,1.12903,0.74375
5,0.1229,1.143897,0.7675


0.7675
-0.7675


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.3371,1.105265,0.65
2,0.8864,0.755886,0.7275
3,0.6278,0.707579,0.74
4,0.3839,0.757788,0.7625
5,0.2311,0.827642,0.76125


0.7625
-0.7625


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.253,0.888831,0.69375
2,0.7894,0.779546,0.7075
3,0.6,0.680628,0.77
4,0.3565,0.738279,0.775
5,0.1798,0.905266,0.7675


0.775
-0.775
Best parameters:
[5e-05, 0.135161130336292, 5, 13, 13]
