In [1]:
import json

import numpy as np
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
import torch
import pandas as pd
import os
from bert_util import bert_tokenize_data, tensor_train_test_split, train_bert_model, model_predict, get_data_loader, \
    calculate_accuracy
from util import get_dataframe_from_json
from sklearn.metrics import classification_report

os.environ["USE_TF"] = "0"

  from .autonotebook import tqdm as notebook_tqdm
2025-05-20 22:25:28.949224: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-20 22:25:28.956161: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747772728.965427   54178 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747772728.968258   54178 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747772728.975980   54178 computation_placer.cc:177] computation placer already r

In [2]:
hld = get_dataframe_from_json('./hand_labeled/hand_labelled_dataset.json')
hld

Unnamed: 0,turn,utterance,emotion,act,hat
0,0,I'm so angry . I feel like killing someone .,anger,inform,red
1,1,Calm down . __eou__,no_emotion,inform,red
2,0,I was just about to go to bed when the telepho...,no_emotion,inform,white
3,1,Who was it ?,no_emotion,question,white
4,2,Kate . She said she was too excited to go to s...,no_emotion,inform,white
...,...,...,...,...,...
1012,6,I want to live abroad and learn to speak a dif...,no_emotion,inform,white
1013,7,I'm really sorry . But I understand .,no_emotion,commissive,white
1014,8,"Thank you , manager . __eou__",no_emotion,inform,white
1015,0,I fired Mr . Li today .,no_emotion,inform,white


In [3]:
hat_map = {
    0: "red",
    1: "white",
    2: "black",
    3: "yellow",
    4: "green",
}

reverse_hat_map = {v: k for k, v in hat_map.items()}
hld['hat'] = hld['hat'].apply(lambda x: reverse_hat_map[x])

In [5]:
# import train test split
from sklearn.model_selection import train_test_split
# split the data into train and test
train_df, test_df = train_test_split(hld, test_size=0.2, random_state=42, stratify=hld['hat'])


In [6]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
token_ids, attention_masks = bert_tokenize_data(tokenizer, train_df['utterance'].values)
train_dataloader, val_dataloader = tensor_train_test_split(torch.tensor(train_df['hat'].values), token_ids, attention_masks, test_size=0.1)

In [7]:
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)
epochs = 20
optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn = torch.nn.CrossEntropyLoss()
num_training_steps = epochs * len(train_dataloader)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps)

model = train_bert_model(model, optimizer, scheduler, train_dataloader, val_dataloader, epochs)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


-------------------- Epoch 1 --------------------

Training:
---------
Start Time:       2025-05-20 21:45:49.853438
Average Loss:     1.289921478084896
Time Taken:       0:00:07.659268

Validation:
---------
Start Time:       2025-05-20 21:45:57.513115
Average Loss:     1.3286631865934893
Average Accuracy: 0.45454545454545453
Time Taken:       0:00:00.231185

-------------------- Epoch 2 --------------------

Training:
---------
Start Time:       2025-05-20 21:45:57.744682
Average Loss:     1.0797201783760735
Time Taken:       0:00:07.201838

Validation:
---------
Start Time:       2025-05-20 21:46:04.946889
Average Loss:     1.2672974846579812
Average Accuracy: 0.5
Time Taken:       0:00:00.237452

-------------------- Epoch 3 --------------------

Training:
---------
Start Time:       2025-05-20 21:46:05.184793
Average Loss:     0.8025512677495894
Time Taken:       0:00:07.271905

Validation:
---------
Start Time:       2025-05-20 21:46:12.457073
Average Loss:     1.3192306729880245


In [11]:
test_texts = test_df['utterance'].values
serie = pd.Series(test_texts)
tids, amids = bert_tokenize_data(tokenizer, serie, max_length=64)
dl = get_data_loader(tids, amids, batch_size=5, shuffle=False)
preds, confidences = model_predict(model, dl)
labels_flat = test_df['hat'].values.flatten()
accuracy = np.sum(preds == labels_flat) / len(labels_flat)
accuracy

np.float64(0.553921568627451)

In [17]:

from sklearn.metrics import classification_report
print(classification_report(labels_flat, preds_array, target_names=list(hat_map.values())))

              precision    recall  f1-score   support

         red       0.53      0.40      0.46        40
       white       0.71      0.75      0.73       110
       black       0.23      0.26      0.24        23
      yellow       0.47      0.42      0.44        19
       green       0.07      0.08      0.07        12

    accuracy                           0.55       204
   macro avg       0.40      0.38      0.39       204
weighted avg       0.56      0.55      0.55       204



In [7]:
from util import eda_augment_dataset

train_df, test_df = train_test_split(hld, test_size=0.2, random_state=42, stratify=hld['hat'])

green_augmented = eda_augment_dataset(train_df[train_df['hat'] == 4], num_aug=10, alpha_sr=0.05, alpha_ri=0.05, alpha_rs=0.05, p_rd=0.1)
yellow_augmented = eda_augment_dataset(train_df[train_df['hat'] == 3], num_aug=10, alpha_sr=0.05, alpha_ri=0.05, alpha_rs=0.05, p_rd=0.1)
black_augmented = eda_augment_dataset(train_df[train_df['hat'] == 2], num_aug=10, alpha_sr=0.05, alpha_ri=0.05, alpha_rs=0.05, p_rd=0.1)

augmented_train_df = pd.concat([train_df, green_augmented, yellow_augmented, black_augmented], ignore_index=True)
augmented_train_df

Unnamed: 0,turn,utterance,emotion,act,hat
0,3,"I'll take one , too . __eou__",happiness,inform,0
1,8,"You know , we are superior to other clothes co...",no_emotion,inform,3
2,5,"Her new boyfriend , right ?",no_emotion,commissive,1
3,9,How about recommending him to use the storage ...,no_emotion,directive,4
4,1,"Oh , a bouquet of flowers . It's very kind of ...",surprise,commissive,1
...,...,...,...,...,...
2938,2,what seems to be the problem,no_emotion,question,2
2939,2,what seems to be the trouble,no_emotion,question,2
2940,2,what seems to be seem the problem,no_emotion,question,2
2941,2,seems what to be the problem,no_emotion,question,2


In [8]:


tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
token_ids, attention_masks = bert_tokenize_data(tokenizer, augmented_train_df['utterance'].values)
train_dataloader, val_dataloader = tensor_train_test_split(torch.tensor(augmented_train_df['hat'].values), token_ids, attention_masks, test_size=0.1)
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)
epochs = 10
optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn = torch.nn.CrossEntropyLoss()
num_training_steps = epochs * len(train_dataloader)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps)

model = train_bert_model(model, optimizer, scheduler, train_dataloader, val_dataloader, epochs)
test_texts = test_df['utterance'].values
serie = pd.Series(test_texts)
tids, amids = bert_tokenize_data(tokenizer, serie, max_length=64)
dl = get_data_loader(tids, amids, batch_size=5, shuffle=False)
preds, confidences = model_predict(model, dl)
labels_flat = test_df['hat'].values.flatten()

preds_array = np.array(preds)
print(classification_report(labels_flat, preds_array, target_names=list(hat_map.values())))

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


-------------------- Epoch 1 --------------------

Training:
---------
Start Time:       2025-05-20 22:36:30.704541
Average Loss:     0.8054862971842469
Time Taken:       0:00:26.048986

Validation:
---------
Start Time:       2025-05-20 22:36:56.754114
Average Loss:     0.307436011228207
Average Accuracy: 0.9324324324324325
Time Taken:       0:00:00.822701

-------------------- Epoch 2 --------------------

Training:
---------
Start Time:       2025-05-20 22:36:57.577185
Average Loss:     0.25033063978783987
Time Taken:       0:00:26.196718

Validation:
---------
Start Time:       2025-05-20 22:37:23.774278
Average Loss:     0.9572705428141195
Average Accuracy: 0.7693050193050194
Time Taken:       0:00:00.822414

-------------------- Epoch 3 --------------------

Training:
---------
Start Time:       2025-05-20 22:37:24.597094
Average Loss:     0.13844849159652678
Time Taken:       0:00:26.161896

Validation:
---------
Start Time:       2025-05-20 22:37:50.759325
Average Loss:     0.3