In [1]:
import json

import numpy as np
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
import torch
import pandas as pd
import os
from util import get_dataframe_from_json
from sklearn.metrics import classification_report
from bert_util import bert_tokenize_data, tensor_train_test_split, train_bert_model, model_predict, get_data_loader, \
    train_pipeline, randomized_cv_rasearch

os.environ["USE_TF"] = "0"

  from .autonotebook import tqdm as notebook_tqdm
2025-05-24 17:29:22.126647: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-24 17:29:22.133489: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748100562.141807    9985 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748100562.144399    9985 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748100562.150802    9985 computation_placer.cc:177] computation placer already r

In [2]:
train_df = pd.read_json('../train_dataset.json', lines=False)
test_df = pd.read_json('../test_dataset.json', lines=False)
train_df

Unnamed: 0,turn,utterance,emotion,act,hat
0,3,"I'll take one, too.",happiness,inform,0
1,8,"You know, we are superior to other clothes com...",no_emotion,inform,3
2,5,"Her new boyfriend, right?",no_emotion,commissive,1
3,9,How about recommending him to use the storage ...,no_emotion,directive,4
4,1,"Oh, a bouquet of flowers. It's very kind of you.",surprise,commissive,1
...,...,...,...,...,...
808,0,I prefer potatoes to eggplants.,no_emotion,inform,0
809,0,"Mr. Smith, I would like to get right to the po...",no_emotion,question,1
810,4,Yeah?,no_emotion,question,1
811,0,I am so bored all day.,no_emotion,inform,0


In [11]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
token_ids, attention_masks = bert_tokenize_data(tokenizer, train_df['utterance'].values)
train_dataloader, val_dataloader = tensor_train_test_split(torch.tensor(train_df['hat'].values), token_ids, attention_masks, test_size=0.1)

In [12]:
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)
epochs = 10
optimizer = AdamW(model.parameters(), lr=2e-5)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)
epochs = 10
optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn = torch.nn.CrossEntropyLoss()
num_training_steps = epochs * len(train_dataloader)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps)

model = train_bert_model(model, optimizer, scheduler, train_dataloader, val_dataloader, epochs)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



-------------------- Epoch 1 --------------------
Training:
---------
Start Time:       2025-05-24 16:15:32.720315
Average Training Loss: 1.233990160667378
Time Taken:            0:00:07.294805

Validation:
-----------
Start Time:       2025-05-24 16:15:40.015512
Average Validation Loss:     1.2737085927616467
Average Validation Accuracy: 0.5340909090909091
Time Taken:                  0:00:00.238261

-------------------- Epoch 2 --------------------
Training:
---------
Start Time:       2025-05-24 16:15:40.255083
Average Training Loss: 0.99287924753583
Time Taken:            0:00:07.275139

Validation:
-----------
Start Time:       2025-05-24 16:15:47.530589
Average Validation Loss:     1.220288959416476
Average Validation Accuracy: 0.48863636363636365
Time Taken:                  0:00:00.247728

-------------------- Epoch 3 --------------------
Training:
---------
Start Time:       2025-05-24 16:15:47.779455
Average Training Loss: 0.6303189602559027
Time Taken:            0:00:07.27

In [14]:
test_texts = test_df['utterance'].values
serie = pd.Series(test_texts)
tids, amids = bert_tokenize_data(tokenizer, serie, max_length=64)
dl = get_data_loader(tids, amids, batch_size=5, shuffle=False)
preds, confidences = model_predict(model, dl)
labels_flat = test_df['hat'].values.flatten()
accuracy = np.sum(preds == labels_flat) / len(labels_flat)
accuracy

np.float64(0.5784313725490197)

In [15]:
hat_map = {
    0: "red",
    1: "white",
    2: "black",
    3: "yellow",
    4: "green",
}
preds_array = np.array(preds)
print(classification_report(labels_flat, preds_array, target_names=list(hat_map.values())))

              precision    recall  f1-score   support

         red       0.50      0.55      0.52        40
       white       0.72      0.71      0.72       110
       black       0.30      0.35      0.32        23
      yellow       0.50      0.32      0.39        19
       green       0.31      0.33      0.32        12

    accuracy                           0.58       204
   macro avg       0.47      0.45      0.45       204
weighted avg       0.59      0.58      0.58       204



In [9]:
augmented_train_df = pd.read_json('../eda_train_dataset.json', lines=False)

In [10]:


tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
token_ids, attention_masks = bert_tokenize_data(tokenizer, augmented_train_df['utterance'].values)
train_dataloader, val_dataloader = tensor_train_test_split(torch.tensor(augmented_train_df['hat'].values), token_ids, attention_masks, test_size=0.1)
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)
epochs = 10
optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn = torch.nn.CrossEntropyLoss()
num_training_steps = epochs * len(train_dataloader)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps)

model = train_bert_model(model, optimizer, scheduler, train_dataloader, val_dataloader, epochs)
test_texts = test_df['utterance'].values
serie = pd.Series(test_texts)
tids, amids = bert_tokenize_data(tokenizer, serie, max_length=64)
dl = get_data_loader(tids, amids, batch_size=5, shuffle=False)
preds, confidences = model_predict(model, dl)
labels_flat = test_df['hat'].values.flatten()

preds_array = np.array(preds)
print(classification_report(labels_flat, preds_array, target_names=list(hat_map.values())))

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


-------------------- Epoch 1 --------------------

Training:
---------
Start Time:       2025-05-21 18:49:45.599930
Average Loss:     0.8352723453822158
Time Taken:       0:00:49.121369

Validation:
---------
Start Time:       2025-05-21 18:50:34.721732
Average Loss:     1.6427994484597064
Average Accuracy: 0.4376899696048632
Time Taken:       0:00:01.687571

-------------------- Epoch 2 --------------------

Training:
---------
Start Time:       2025-05-21 18:50:36.409734
Average Loss:     0.19497602208099393
Time Taken:       0:00:49.416321

Validation:
---------
Start Time:       2025-05-21 18:51:25.826465
Average Loss:     1.0135578555054963
Average Accuracy: 0.7815349544072948
Time Taken:       0:00:01.698944

-------------------- Epoch 3 --------------------

Training:
---------
Start Time:       2025-05-21 18:51:27.525819
Average Loss:     0.07621283072581728
Time Taken:       0:00:49.632151

Validation:
---------
Start Time:       2025-05-21 18:52:17.158519
Average Loss:     1.

In [23]:
test_texts = test_df['utterance'].values
serie = pd.Series(test_texts)
tids, amids = bert_tokenize_data(tokenizer, serie, max_length=64)
dl = get_data_loader(tids, amids, batch_size=5, shuffle=False)
preds, confidences = model_predict(model, dl)
labels_flat = test_df['hat'].values.flatten()

preds_array = np.array(preds)
print(classification_report(labels_flat, preds_array, target_names=list(hat_map.values())))

              precision    recall  f1-score   support

         red       0.42      0.53      0.47        40
       white       0.76      0.66      0.71       110
       black       0.36      0.43      0.39        23
      yellow       0.45      0.26      0.33        19
       green       0.26      0.42      0.32        12

    accuracy                           0.56       204
   macro avg       0.45      0.46      0.44       204
weighted avg       0.59      0.56      0.57       204



# Randomized Search for Hyperparameter Tuning usage example

In [3]:

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)
best_model, best_config, best_score = randomized_cv_rasearch(model, tokenizer, train_df['utterance'].values, train_df['hat'].values, test_df['utterance'].values, test_df['hat'].values, num_samples=2, use_lora=False)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trying configuration 1/2: {'epochs': 10, 'model_dropout': 0.1, 'optimizer_lr': 3e-05, 'scheduler_warmup_steps': 0, 'lora_r': 4, 'lora_alpha': 16, 'lora_dropout': 0.2, 'tokenizer_max_length': 16, 'dataloader_batch_size': 32, 'clip_grad_norm': 5.0, 'early_stopping_patience': 10, 'early_stopping_delta': 0.01, 'scheduler_type': 'constant', 'optimizer_type': 'Adafactor', 'weight_decay': 0.01}

-------------------- Epoch 1 --------------------
Training:
---------
Start Time:       2025-05-24 17:30:22.254312
Average Training Loss: 1.774469180804927
Time Taken:            0:00:03.686337

Validation:
-----------
Start Time:       2025-05-24 17:30:25.940996
Average Validation Loss:     1.6343998568398612
Average Validation Accuracy: 0.1765873015873016
Time Taken:                  0:00:00.071323

-------------------- Epoch 2 --------------------
Training:
---------
Start Time:       2025-05-24 17:30:26.013083
Average Training Loss: 1.5592435830976905
Time Taken:            0:00:03.330103

Validat

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



-------------------- Epoch 1 --------------------
Training:
---------
Start Time:       2025-05-24 17:30:57.092997
Average Training Loss: 1.17313210339081
Time Taken:            0:00:03.468951

Validation:
-----------
Start Time:       2025-05-24 17:31:00.562367
Average Validation Loss:     1.3038132474535988
Average Validation Accuracy: 0.5079365079365079
Time Taken:                  0:00:00.141896

-------------------- Epoch 2 --------------------
Training:
---------
Start Time:       2025-05-24 17:31:00.705119
Average Training Loss: 0.9270630849570762
Time Taken:            0:00:03.451904

Validation:
-----------
Start Time:       2025-05-24 17:31:04.157410
Average Validation Loss:     1.2575146499134244
Average Validation Accuracy: 0.5218253968253969
Time Taken:                  0:00:00.142290
No improvement for 1 epoch(s).

-------------------- Epoch 3 --------------------
Training:
---------
Start Time:       2025-05-24 17:31:04.300063
Average Training Loss: 0.6803209082019038
T

In [4]:
from bert_util import scoring_fn
tids, amids = bert_tokenize_data(tokenizer, pd.Series(test_df['utterance'].values), max_length=best_config["tokenizer_max_length"])
val_dataloader = get_data_loader(tids, amids, batch_size=best_config["dataloader_batch_size"], shuffle=False)

score = scoring_fn(best_model, val_dataloader, test_df['hat'].values)

              precision    recall  f1-score   support

         red       0.46      0.45      0.46        40
       white       0.69      0.78      0.74       110
       black       0.21      0.13      0.16        23
      yellow       0.40      0.32      0.35        19
       green       0.25      0.25      0.25        12

    accuracy                           0.57       204
   macro avg       0.40      0.39      0.39       204
weighted avg       0.54      0.57      0.55       204

