## Optimizer: Finding best hyper-parameters

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
pip install "numpy<1.24.0"

In [None]:
pip install transformers datasets scikit-optimize

In [None]:
pip install accelerate -U

In [None]:
pip install transformers[torch]

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import torch
import numpy as np
from sklearn.metrics import classification_report, accuracy_score
from transformers import TrainerCallback
import os
import shutil
import re
import time
from pathlib import Path

from skopt.space import Real, Categorical, Integer
from skopt.utils import use_named_args
import numpy as np
from skopt import gp_minimize

In [8]:
#Define the space of hyperparameters to search
search_space = [
  Real(1e-5, 5e-5, name='learning_rate'),
  Real(0.01, 0.2,  name='weight_decay'),
  Integer(3, 5, name='num_train_epochs'),
  Integer(8, 32, name='per_device_train_batch_size', prior='log-uniform'),
  Integer(8, 64, name='per_device_eval_batch_size', prior='log-uniform'),
]


@use_named_args(search_space)
def objective(learning_rate, num_train_epochs, per_device_train_batch_size, per_device_eval_batch_size, weight_decay):
    __file__ = "/content/drive/MyDrive/FinalProject/Models/ERNIE/ERNIE_optimizer.ipynb"
    file_name = "dataset_balanced_4000"
    ext ="xlsx"
    path_type = "Balanced"

    current_file_path = Path(__file__).parent
    path_to_project = current_file_path.parents[1]

    df = pd.read_excel(f"{path_to_project}/Data/Datasets/{path_type}/{file_name}.{ext}")

    results_dir = f"{path_to_project}/Models/ERNIE/Output/{path_type}/{file_name}"
    dump_dir = results_dir+"/Dump"

    if os.path.isdir(results_dir):
        shutil.rmtree(results_dir)

    os.mkdir(results_dir)
    os.mkdir(dump_dir)

    df = df[df['review'].notna() & (df['review'] != '')]
    # Select the text and label columns
    df['review'] = df['review'].str.replace('[^\x20-\x7E]', '', regex=True)
    X = df['review'].values
    y = df['label'].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


    # Encode the labels to a numeric format
    label_encoder = LabelEncoder()
    y_train_encoded = label_encoder.fit_transform(y_train)
    y_test_encoded = label_encoder.transform(y_test)

    # Initialize the tokenizer for RoBERTa
    tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-2.0-base-en")
    # Tokenization function
    def tokenize_function(texts):
        return tokenizer(texts, padding="max_length", truncation=True, max_length=128)


    # Tokenize the data
    train_encodings = tokenize_function(X_train.tolist())
    val_encodings = tokenize_function(X_test.tolist())

    # Create dataset objects
    train_dataset = ReviewDataset(train_encodings, y_train_encoded)
    val_dataset = ReviewDataset(val_encodings, y_test_encoded)

    # Initialize the model for each fold
    model = AutoModelForSequenceClassification.from_pretrained("nghuyong/ernie-2.0-base-en", num_labels=len(label_encoder.classes_))


    # Define training arguments for each fold, adjust hyperparameters as needed
    training_args = TrainingArguments(
        output_dir=f"{dump_dir}/res",
        num_train_epochs= int(num_train_epochs),
        per_device_train_batch_size= int(per_device_train_batch_size),
        per_device_eval_batch_size= int(per_device_eval_batch_size),
        warmup_steps=500,
        weight_decay=weight_decay,
        logging_dir=f"{dump_dir}/logs",
        logging_strategy="epoch",
        evaluation_strategy="epoch",
        learning_rate=learning_rate,
        max_grad_norm=1.0,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        save_strategy="epoch",
        save_total_limit=2,
        lr_scheduler_type='linear'
    )

    # Initialize Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=lambda p: {"accuracy": accuracy_score(p.predictions.argmax(-1), p.label_ids)}
        )

    # Train
    trainer.train()
    # Evaluate
    results = trainer.evaluate()
    neg_accuracy = -results['eval_accuracy']

    print(results['eval_accuracy'])
    print(neg_accuracy)

    shutil.rmtree(dump_dir)
    return neg_accuracy

# Custom dataset class
class ReviewDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

result = gp_minimize(objective, search_space, n_calls=20, random_state=0)

print("Best parameters:")
print(result.x)




Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1234,0.781327,0.705
2,0.6803,0.693591,0.7325
3,0.5348,0.679082,0.74125
4,0.3835,0.790912,0.73125
5,0.2008,0.797438,0.75


0.75
-0.75


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9856,0.737532,0.71375
2,0.6429,0.68928,0.7425
3,0.3837,0.738532,0.7525


0.7525
-0.7525


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1161,0.760477,0.7025
2,0.6604,0.698233,0.74375
3,0.4989,0.706954,0.75625
4,0.3501,0.808252,0.745


0.75625
-0.75625


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9522,0.781283,0.70625
2,0.6591,0.75037,0.745
3,0.4055,0.826662,0.745
4,0.2089,1.089399,0.7675
5,0.1102,1.166088,0.775


0.775
-0.775


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0954,0.755583,0.7075
2,0.6714,0.680237,0.7375
3,0.5175,0.687983,0.735
4,0.3278,0.80509,0.7375


0.7375
-0.7375


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9509,0.774461,0.73
2,0.6596,0.671834,0.7725
3,0.403,0.887968,0.755
4,0.2235,1.13507,0.76125
5,0.1087,1.253366,0.75375


0.7725
-0.7725


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9977,0.762457,0.71
2,0.6392,0.679926,0.74875
3,0.3718,0.751638,0.7525


0.7525
-0.7525


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.2868,0.970001,0.67375
2,0.7999,0.726653,0.7225
3,0.6011,0.706674,0.74875
4,0.4625,0.731831,0.7325


0.74875
-0.74875


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.2118,0.828546,0.7
2,0.7232,0.709533,0.7425
3,0.5568,0.740155,0.74125


0.7425
-0.7425


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1902,0.819959,0.6925
2,0.7046,0.682325,0.73125
3,0.5455,0.683043,0.74
4,0.3677,0.736869,0.7425
5,0.1924,0.7915,0.73375


0.7425
-0.7425


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.925,0.797097,0.72375
2,0.6718,0.909661,0.7125
3,0.4255,0.802465,0.765
4,0.2056,1.154131,0.75625
5,0.0927,1.146166,0.76375


0.765
-0.765


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9321,0.775497,0.7175
2,0.6706,0.740044,0.7425
3,0.3949,0.792171,0.755
4,0.1844,1.056821,0.77


0.77
-0.77


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9698,0.828352,0.69
2,0.6617,0.765289,0.73875
3,0.4702,0.799634,0.73125
4,0.238,0.999154,0.7425
5,0.0994,1.171735,0.755


0.755
-0.755


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1017,0.766769,0.72
2,0.6678,0.669169,0.73625
3,0.5063,0.676641,0.76
4,0.3587,0.766057,0.73
5,0.2331,0.858595,0.75125


0.76
-0.76


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9186,0.880779,0.6975
2,0.6662,0.768078,0.73375
3,0.342,0.904452,0.7525


0.7525
-0.7525


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0429,0.754606,0.72375
2,0.6377,0.707066,0.73625
3,0.4024,0.798138,0.745
4,0.2547,0.942314,0.73625
5,0.1718,1.019336,0.7425


0.745
-0.745


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9228,0.786249,0.71875
2,0.6509,0.815026,0.70375
3,0.4157,0.863655,0.76
4,0.2071,1.169498,0.76
5,0.0982,1.26035,0.755


0.76
-0.76


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9196,0.796094,0.70625
2,0.6836,0.709544,0.75125
3,0.4319,0.861222,0.73875
4,0.2536,1.04907,0.75875
5,0.1109,1.141709,0.76625


0.76625
-0.76625


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0105,0.756128,0.70125
2,0.6366,0.655472,0.75875
3,0.4737,0.792423,0.72125
4,0.2944,0.936455,0.73125
5,0.1224,0.974524,0.75125


0.75875
-0.75875


Some weights of ErnieForSequenceClassification were not initialized from the model checkpoint at nghuyong/ernie-2.0-base-en and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9407,0.789843,0.7125
2,0.6534,0.686732,0.75875
3,0.386,0.867341,0.7525
4,0.2141,1.098068,0.75875
5,0.1009,1.190189,0.76125


0.76125
-0.76125
Best parameters:
[3.592687488204789e-05, 0.07996589256970411, 5, 10, 49]
