In [None]:
# https://lit.eecs.umich.edu/downloads.html#Twitter%20Optimism%20Dataset

# dataset paper https://aclanthology.org/2022.lrec-1.218.pdf?utm_source=chatgpt.com#page=8&zoom=100,402,744

# starting paper https://aclanthology.org/2022.lrec-1.218.pdf?utm_source=chatgpt.com#page=8&zoom=100,402,309

In [1]:
import numpy as np
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
import torch
import pandas as pd
import os
from bert_util import bert_tokenize_data, tensor_train_test_split, train_bert_model, model_predict, get_data_loader
os.environ["USE_TF"] = "0"


  from .autonotebook import tqdm as notebook_tqdm
2025-05-20 11:22:18.082442: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-20 11:22:18.374670: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747732938.506304   30478 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747732938.543772   30478 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747732938.834153   30478 computation_placer.cc:177] computation placer already r

### Preprocess the tweet dataset

In [2]:
# Load dataset
df = pd.read_csv("tweets_annotation.csv")  # must have 'text' and 'score' columns

df['AverageAnnotation'] = df['AverageAnnotation'].astype(float)
df['AverageAnnotation'] = df['AverageAnnotation'].apply(lambda x: 2 if x > 2 else (0 if x < -2 else 1))

# Convert the text to string
df['Tweet'] = df['Tweet'].astype(str)


def remove_word(text, word):
    # Remove the specified word from the text
    if word in text:
        for w in text.split(' '):
            if word == w.strip():
                text = text.replace(w, '')
    return text

def preprocess_tweet(text):
    # Remove the annotation part from the text
    if '@' in text:
        for word in text.split(' '):
            if word.startswith('@'):
                text = text.replace(word, '')
    # Remove the hashtag
    if '#' in text:
        for word in text.split(' '):
            if word.startswith('#'):
                text = text.replace(word, '')
    return text

# remove @mentions from the text
df['Tweet'] = df['Tweet'].apply(lambda x: preprocess_tweet(x))
# Remove additional spaces
df['Tweet'].replace(r'\s+', ' ', regex=True)
df

Unnamed: 0,Tweet,Username,AverageAnnotation
0,did i just hear clive anderson say peter cooke...,The_Bounder,1
1,anyways that's my evening thesis. write don't ...,Satori_Paris,1
2,when teachers ask why im late. (vine by,Sizzurp_713,1
3,hurrion in goal. haha. watch the bounce si....,NathElCuchillo,1
4,on the other hand. the both of you on the wo...,Aethien,1
...,...,...,...
7470,1. you're beautiful,Yadi_cheers15,1
7471,in 1915 women fought for some rights.,YoItsLilBlue,1
7472,this is beautiful do i have permission to fra...,yzxnd,1
7473,just played with my ear for awhile,bellzuh_,1


### Tokenize

In [3]:
token_ids, attention_masks = bert_tokenize_data(df['Tweet'].values)
train_dataloader, val_dataloader = tensor_train_test_split(torch.tensor(df['AverageAnnotation'].values), token_ids, attention_masks, test_size=0.1)

### Build the model

In [4]:
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)
epochs = 3
optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn = torch.nn.CrossEntropyLoss()
num_training_steps = epochs * len(train_dataloader)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps)

model = train_bert_model(model, optimizer, scheduler, train_dataloader, val_dataloader, epochs)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


-------------------- Epoch 1 --------------------

Training:
---------
Start Time:       2025-05-18 17:04:34.303543
Average Loss:     0.19271763889550914
Time Taken:       0:01:06.790031

Validation:
---------
Start Time:       2025-05-18 17:05:41.093926
Average Loss:     0.10313056009831502
Average Accuracy: 0.9800531914893617
Time Taken:       0:00:01.944072

-------------------- Epoch 2 --------------------

Training:
---------
Start Time:       2025-05-18 17:05:43.038786
Average Loss:     0.11790456349906643
Time Taken:       0:01:06.771402

Validation:
---------
Start Time:       2025-05-18 17:06:49.810742
Average Loss:     0.1053737268022737
Average Accuracy: 0.973404255319149
Time Taken:       0:00:01.957827

-------------------- Epoch 3 --------------------

Training:
---------
Start Time:       2025-05-18 17:06:51.768968
Average Loss:     0.04802651970950288
Time Taken:       0:01:07.165965

Validation:
---------
Start Time:       2025-05-18 17:07:58.935320
Average Loss:     0

### Test the model

In [27]:
test_texts = [
    "I encourage her to go her own way, but she's now having a change of heart.",
    "I really enjoyed this movie",
    "This product is terrible",
    "all i can say is wow beautiful as always. have an awesome week",
    "I am very angry about this situation"
]
serie = pd.Series(test_texts)
tids, amids = bert_tokenize_data(serie, max_length=64)
dl = get_data_loader(tids, amids, batch_size=5, shuffle=False)

In [28]:
preds, confidences = model_predict(model, dl)
preds, confidences

([1, 1, 1, 2, 1],
 [0.9995620846748352,
  0.9993290901184082,
  0.9996789693832397,
  0.9918040037155151,
  0.9932570457458496])

### Load the dataset with bloom labels

In [41]:
dialogues_df = pd.read_json("../Black_White_Hat/dataset_with_bloom.json", lines=True)
dialogues_df

Unnamed: 0,turn,utterance,emotion,act,hat,bloom_label,bloom_score
0,0,I'm so angry . I feel like killing someone .,anger,inform,,Evaluation,0.792142
1,1,Calm down . __eou__,no_emotion,inform,,Comprehension,0.512506
2,0,I was just about to go to bed when the telepho...,no_emotion,inform,,Analysis,0.946143
3,1,Who was it ?,no_emotion,question,,Knowledge,0.999966
4,2,Kate . She said she was too excited to go to s...,no_emotion,inform,,Evaluation,0.957235
...,...,...,...,...,...,...,...
50922,5,I felt that the food was pretty mediocre .,no_emotion,inform,,Evaluation,0.999984
50923,6,"The service wasn't that great , either .",no_emotion,inform,,Evaluation,0.999745
50924,7,I agree . The service was not good .,no_emotion,inform,,Evaluation,0.999773
50925,8,Do you think that you want to try this restaur...,no_emotion,question,,Evaluation,0.996227


### Predict the optimism labels over the dataset

In [None]:
tids, amids = bert_tokenize_data(dialogues_df['utterance'], max_length=128)
dl = get_data_loader(tids, amids, batch_size=512, shuffle=False)
preds, confidences = model_predict(model, dl)

### Add the predictions to the dataframe

In [34]:
dialogues_df['optimism_label'] = preds
dialogues_df['optimism_score'] = confidences
dialogues_df['optimism_label'] = dialogues_df['optimism_label'].apply(lambda x: 'optimist' if x == 2 else ('pessimist' if x == 0 else 'neutral'))
dialogues_df

Unnamed: 0,turn,utterance,emotion,act,hat,bloom_label,bloom_score,optimism_label,optimism_score
0,0,I'm so angry . I feel like killing someone .,anger,inform,,Evaluation,0.792142,pessimist,0.495951
1,1,Calm down . __eou__,no_emotion,inform,,Comprehension,0.512506,neutral,0.999543
2,0,I was just about to go to bed when the telepho...,no_emotion,inform,,Analysis,0.946143,neutral,0.999105
3,1,Who was it ?,no_emotion,question,,Knowledge,0.999966,neutral,0.999600
4,2,Kate . She said she was too excited to go to s...,no_emotion,inform,,Evaluation,0.957235,neutral,0.999474
...,...,...,...,...,...,...,...,...,...
50922,5,I felt that the food was pretty mediocre .,no_emotion,inform,,Evaluation,0.999984,neutral,0.999589
50923,6,"The service wasn't that great , either .",no_emotion,inform,,Evaluation,0.999745,neutral,0.999557
50924,7,I agree . The service was not good .,no_emotion,inform,,Evaluation,0.999773,neutral,0.999572
50925,8,Do you think that you want to try this restaur...,no_emotion,question,,Evaluation,0.996227,neutral,0.999233


In [35]:
dialogues_df.query("optimism_label == 'optimist'")

Unnamed: 0,turn,utterance,emotion,act,hat,bloom_label,bloom_score,optimism_label,optimism_score
278,0,Your ring is very beautiful .,happiness,inform,,Evaluation,0.840383,optimist,0.802760
392,1,Oh ! What a beautiful dress ! It fits you so w...,happiness,inform,,Evaluation,0.999254,optimist,0.985101
467,2,"Oh , you look stunning . Your dress really goe...",happiness,inform,,Evaluation,0.999926,optimist,0.651362
477,0,You're my hero .,happiness,inform,,Knowledge,0.649432,optimist,0.867752
510,1,Thanks ! I'm so happy . It's like a dream come...,happiness,inform,,Comprehension,0.869269,optimist,0.982325
...,...,...,...,...,...,...,...,...,...
49350,7,"You look great , you are absolutely glowing .",happiness,inform,,Evaluation,0.573625,optimist,0.988734
49429,2,You're so kind . __eou__,happiness,commissive,,Evaluation,0.998598,optimist,0.972702
49683,0,It's such a nice day .,no_emotion,inform,,Knowledge,0.881944,optimist,0.987821
50523,0,You look really nice today .,happiness,inform,,Evaluation,0.996557,optimist,0.576937


### Save the dataframe to json

In [36]:
# turn the dataframe into a json file
dialogues_df.to_json('dataset_with_bloom_optimism.json', orient='records', lines=False)

In [4]:
dialogues_df = pd.read_json("./dataset_with_bloom_optimism.json", lines=False)
dialogues_df

Unnamed: 0,turn,utterance,emotion,act,hat,bloom_label,bloom_score,optimism_label,optimism_score
0,0,I'm so angry . I feel like killing someone .,anger,inform,,Evaluation,0.792142,pessimist,0.495951
1,1,Calm down . __eou__,no_emotion,inform,,Comprehension,0.512506,neutral,0.999543
2,0,I was just about to go to bed when the telepho...,no_emotion,inform,,Analysis,0.946143,neutral,0.999105
3,1,Who was it ?,no_emotion,question,,Knowledge,0.999966,neutral,0.999600
4,2,Kate . She said she was too excited to go to s...,no_emotion,inform,,Evaluation,0.957235,neutral,0.999474
...,...,...,...,...,...,...,...,...,...
50922,5,I felt that the food was pretty mediocre .,no_emotion,inform,,Evaluation,0.999984,neutral,0.999589
50923,6,"The service wasn't that great , either .",no_emotion,inform,,Evaluation,0.999745,neutral,0.999557
50924,7,I agree . The service was not good .,no_emotion,inform,,Evaluation,0.999773,neutral,0.999572
50925,8,Do you think that you want to try this restaur...,no_emotion,question,,Evaluation,0.996227,neutral,0.999233


In [19]:
dialogues_df.query("bloom_label == 'Analysis' and bloom_score>0.8 and (emotion != 'fear' and emotion != 'anger' and emotion != 'sadness')")

Unnamed: 0,turn,utterance,emotion,act,hat,bloom_label,bloom_score,optimism_label,optimism_score
2,0,I was just about to go to bed when the telepho...,no_emotion,inform,,Analysis,0.946143,neutral,0.999105
23,2,But I work hard all the time for long hours . ...,no_emotion,inform,,Analysis,0.944474,neutral,0.999331
26,5,How about going for a walk instead of sitting ...,no_emotion,directive,,Analysis,0.974937,neutral,0.999547
28,7,"I know , I know.Have you got a portable radio ...",no_emotion,directive,,Analysis,0.999376,neutral,0.998995
82,0,I prefer potatoes to eggplants .,no_emotion,inform,,Analysis,0.919683,neutral,0.998892
...,...,...,...,...,...,...,...,...,...
50738,7,I can eat my bacon and eggs while you are exch...,no_emotion,inform,,Analysis,0.991499,neutral,0.995055
50754,1,We have a dinner reservation for four at 7:00 ...,no_emotion,directive,,Analysis,0.968431,neutral,0.999233
50781,6,Their appointment is at eleven o'clock in the ...,no_emotion,inform,,Analysis,0.971374,neutral,0.999522
50868,7,"Oh yes , I've heard of that one . Everyone I'v...",happiness,inform,,Analysis,0.943736,neutral,0.997230


In [12]:
dialogues_df.query("optimism_label == 'optimist' and optimism_score>0.8")

Unnamed: 0,turn,utterance,emotion,act,hat,bloom_label,bloom_score,optimism_label,optimism_score
278,0,Your ring is very beautiful .,happiness,inform,,Evaluation,0.840383,optimist,0.802760
392,1,Oh ! What a beautiful dress ! It fits you so w...,happiness,inform,,Evaluation,0.999254,optimist,0.985101
477,0,You're my hero .,happiness,inform,,Knowledge,0.649432,optimist,0.867752
510,1,Thanks ! I'm so happy . It's like a dream come...,happiness,inform,,Comprehension,0.869269,optimist,0.982325
667,2,I think luck played a very small part in your ...,no_emotion,inform,,Analysis,0.719604,optimist,0.988791
...,...,...,...,...,...,...,...,...,...
49312,5,"I don't know how , but I love you more than th...",happiness,inform,,Analysis,0.441687,optimist,0.987404
49348,5,"Yes , you are always popular with everyone . a...",happiness,inform,,Evaluation,0.997935,optimist,0.902398
49350,7,"You look great , you are absolutely glowing .",happiness,inform,,Evaluation,0.573625,optimist,0.988734
49429,2,You're so kind . __eou__,happiness,commissive,,Evaluation,0.998598,optimist,0.972702


In [16]:
dialogues_df.query("(bloom_label == 'Knowledge' or bloom_label == 'Evaluation') and bloom_score>0.8")

Unnamed: 0,turn,utterance,emotion,act,hat,bloom_label,bloom_score,optimism_label,optimism_score
3,1,Who was it ?,no_emotion,question,,Knowledge,0.999966,neutral,0.999600
4,2,Kate . She said she was too excited to go to s...,no_emotion,inform,,Evaluation,0.957235,neutral,0.999474
7,5,"Her new boyfriend , right ?",no_emotion,commissive,,Knowledge,0.923408,neutral,0.999651
8,6,"Yeah , how do you know about that ?",surprise,question,,Evaluation,0.841434,neutral,0.999707
9,7,I was also interrupted by her last week becaus...,no_emotion,inform,,Evaluation,0.966702,neutral,0.999413
...,...,...,...,...,...,...,...,...,...
50922,5,I felt that the food was pretty mediocre .,no_emotion,inform,,Evaluation,0.999984,neutral,0.999589
50923,6,"The service wasn't that great , either .",no_emotion,inform,,Evaluation,0.999745,neutral,0.999557
50924,7,I agree . The service was not good .,no_emotion,inform,,Evaluation,0.999773,neutral,0.999572
50925,8,Do you think that you want to try this restaur...,no_emotion,question,,Evaluation,0.996227,neutral,0.999233
