# Transformer model

In [None]:
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModel, TFAutoModel 
import tensorflow as tf
import tf_keras
from tf_keras.preprocessing.text import Tokenizer
from tf_keras.preprocessing.sequence import pad_sequences
from tf_keras.layers import *
from tf_keras.models import Model, Sequential
from tf_keras.callbacks import EarlyStopping, LearningRateScheduler, ModelCheckpoint
from tf_keras.utils import to_categorical
from tf_keras.models import load_model

# from tensorflow.keras.preprocessing.text import Tokenizer
# from tensorflow.keras.preprocessing.sequence import pad_sequences
# from tensorflow.keras.layers import *
# from tensorflow.keras.models import Model, Sequential
# from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ModelCheckpoint
# from tensorflow.keras.utils import to_categorical
# from keras.models import load_model

# Load pre-trained embeddings
import gensim.downloader as api
glove_vectors = api.load("glove-wiki-gigaword-300")

from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV

2025-04-01 19:11:36.164758: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743505896.250367   24454 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743505896.272921   24454 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743505896.445051   24454 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743505896.445086   24454 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743505896.445088   24454 computation_placer.cc:177] computation placer alr

## Check GPU

In [2]:
# Check if TensorFlow can see GPU
print("TensorFlow version:", tf.__version__)
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))

# More detailed GPU information
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        print("GPU name:", gpu.name)
        print("GPU details:", tf.config.experimental.get_device_details(gpu))
else:
    print("No GPU detected. TensorFlow is running on CPU.")

# Simple test to confirm GPU operation
if gpus:
    with tf.device('/GPU:0'):
        a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
        b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
        c = tf.matmul(a, b)
        print("Matrix multiplication result:", c)
        print("Executed on GPU")

TensorFlow version: 2.19.0
Num GPUs Available: 1
GPU name: /physical_device:GPU:0
GPU details: {'compute_capability': (7, 5), 'device_name': 'NVIDIA GeForce GTX 1650 Ti'}
Matrix multiplication result: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)
Executed on GPU


I0000 00:00:1743505968.543756   24454 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2608 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1650 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5


## Load and split data

In [3]:
train_df = pd.read_csv("data/train_preprocessed.csv", encoding="ISO-8859-1")
train_set, validation_set = train_test_split(train_df, test_size=0.2, random_state=20250310)

print("Train set size: " + str(len(train_set)))
train_set.head()

Train set size: 21984


Unnamed: 0,textID,text,selected_text,sentiment,Time of Tweet,Age of User,Country,Population -2020,Land Area (KmÂ²),Density (P/KmÂ²),expanded_text,clean_text,text_without_stopwords,tokens,lemmatized_tokens,lemmatized_sentence,label
25721,269a1d9936,good morning...did you break 5 digits yet?,good morning.,positive,morning,0-20,New Zealand,4822233,263310.0,18,good morning...did you break 5 digits yet?,good morningdid you break digits yet,good morningdid break digits yet,"['good', 'morningdid', 'you', 'break', 'digits...","['good', 'morningdid', 'you', 'break', 'digit'...",good morningdid you break digit yet,2
27411,878acf5421,i wish you were in my class last year,i wish you were in my class last year,positive,noon,60-70,Albania,2877797,27400.0,105,i wish you were in my class last year,i wish you were in my class last year,wish class last year,"['i', 'wish', 'you', 'were', 'in', 'my', 'clas...","['i', 'wish', 'you', 'be', 'in', 'my', 'class'...",i wish you be in my class last year,2
24261,958eed7410,Have been rolling on the bed for the past 2 ho...,Have been rolling on the bed for the past 2 ho...,neutral,noon,60-70,Seychelles,98347,460.0,214,Have been rolling on the bed for the past 2 ho...,have been rolling on the bed for the past hour...,rolling bed past hours still unable sleep,"['have', 'been', 'rolling', 'on', 'the', 'bed'...","['have', 'be', 'roll', 'on', 'the', 'bed', 'fo...",have be roll on the bed for the past hour stil...,1
16672,09430f45ef,sadly isn`t me. It`s some1 w/ 1 follower 1 u...,sadly isn`t me.,negative,night,70-100,Saint Lucia,183627,610.0,301,sadly is not me. It is some1 w/ 1 follower 1...,sadly is not me it is some w follower update h...,sadly w follower update used twitter since may...,"['sadly', 'is', 'not', 'me', 'it', 'is', 'some...","['sadly', 'be', 'not', 'me', 'it', 'be', 'some...",sadly be not me it be some w follower update h...,0
6925,9849f2c929,LOL how many stalkers today?you just love get...,love,positive,night,31-45,"Eswatini (fmr. ""Swaziland"")",1160164,17200.0,67,LOL how many stalkers today?you just love get...,lol how many stalkers todayyou just love getti...,lol many stalkers todayyou love getting creepy...,"['lol', 'how', 'many', 'stalkers', 'todayyou',...","['lol', 'how', 'many', 'stalker', 'todayyou', ...",lol how many stalker todayyou just love get cr...,2


In [4]:
print("Validation set size: " + str(len(validation_set)))
validation_set.head()

Validation set size: 5496


Unnamed: 0,textID,text,selected_text,sentiment,Time of Tweet,Age of User,Country,Population -2020,Land Area (KmÂ²),Density (P/KmÂ²),expanded_text,clean_text,text_without_stopwords,tokens,lemmatized_tokens,lemmatized_sentence,label
7096,d4d73f8bff,"yeah. No vomiting tonight, bit sniffly but ok...","yeah. No vomiting tonight, bit sniffly but ok....",neutral,night,70-100,Canada,37742154,9093510.0,4,"yeah. No vomiting tonight, bit sniffly but ok...",yeah no vomiting tonight bit sniffly but ok be...,yeah vomiting tonight bit sniffly ok bens tryi...,"['yeah', 'no', 'vomiting', 'tonight', 'bit', '...","['yeah', 'no', 'vomit', 'tonight', 'bit', 'sni...",yeah no vomit tonight bit sniffly but ok ben t...,1
17178,86c5e1e0c5,"I feel sick..like don`t wanna get out of bed, ...",sick..,negative,noon,21-30,Paraguay,7132538,397300.0,18,I feel sick..like do not want to get out of be...,i feel sicklike do not want to get out of bed ...,feel sicklike want get bed bothered go work tu...,"['i', 'feel', 'sicklike', 'do', 'not', 'want',...","['i', 'feel', 'sicklike', 'do', 'not', 'want',...",i feel sicklike do not want to get out of bed ...,0
1963,2be8fc0d5c,I`m on my way to see `17 Again`. I`ve wanted ...,I`m on my way to see `17 Again`. I`ve wanted ...,neutral,night,31-45,Italy,60461826,294140.0,206,I am on my way to see '17 Again'. I have want...,i am on my way to see again i have wanted to s...,way see wanted see looong time,"['i', 'am', 'on', 'my', 'way', 'to', 'see', 'a...","['i', 'be', 'on', 'my', 'way', 'to', 'see', 'a...",i be on my way to see again i have want to see...,1
19500,55f9af0b57,Ugh have to report again monday,Ugh,negative,noon,21-30,Gabon,2225734,257670.0,9,Ugh have to report again monday,ugh have to report again monday,ugh report monday,"['ugh', 'have', 'to', 'report', 'again', 'mond...","['ugh', 'have', 'to', 'report', 'again', 'mond...",ugh have to report again monday,0
22302,352e2cdec2,work Yay!,Yay,positive,noon,21-30,Iceland,341243,100250.0,3,work Yay!,work yay,work yay,"['work', 'yay']","['work', 'yay']",work yay,2


## Feature extraction

In [None]:
# Create tokenizer and model
model_name = "roberta-base"  # You can use other models like "roberta-base" or "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
label_encoder = LabelEncoder()

# Tokenize data
def tokenize_data(texts, max_length=128):
    # Make sure texts is a list of strings
    if not isinstance(texts, list):
        texts = list(texts)
    
    # Check for any non-string entries and convert them
    for i, text in enumerate(texts):
        if not isinstance(text, str):
            texts[i] = str(text)
    
    encodings = tokenizer(
        texts, 
        padding='max_length',
        truncation=True,
        max_length=max_length,
        return_tensors='tf'
    )
    return encodings['input_ids'], encodings['attention_mask']

# Tokenize train and validation data
X_train_inputs, X_train_masks = tokenize_data(train_set["lemmatized_sentence"].tolist())
X_val_inputs, X_val_masks = tokenize_data(validation_set["lemmatized_sentence"].tolist())
y_train = label_encoder.fit_transform(train_set["sentiment"])
y_val = label_encoder.transform(validation_set["sentiment"])

# Convert labels to one-hot if needed (depends on your loss function)
y_train_tf = tf.convert_to_tensor(y_train, dtype=tf.int32)
y_val_tf = tf.convert_to_tensor(y_val, dtype=tf.int32)

# Create TensorFlow datasets
batch_size = 16
train_dataset = tf.data.Dataset.from_tensor_slices(((X_train_inputs, X_train_masks), y_train_tf))
train_dataset = train_dataset.shuffle(len(y_train)).batch(batch_size)

val_dataset = tf.data.Dataset.from_tensor_slices(((X_val_inputs, X_val_masks), y_val_tf))
val_dataset = val_dataset.batch(batch_size)

## Transformer model architecture

In [None]:
# Define a transformer-based model for sentiment analysis using TensorFlow
class TransformerSentimentClassifier(tf_keras.Model):
    def __init__(self, model_name, num_classes=3):
        super(TransformerSentimentClassifier, self).__init__()
        self.transformer = TFAutoModel.from_pretrained(model_name)
        self.dropout = tf_keras.layers.Dropout(0.1)
        self.classifier = tf_keras.layers.Dense(num_classes, activation=None)
        
    def call(self, inputs, training=False):
        # Get transformer outputs
        input_ids, attention_mask = inputs
        outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        
        # Use the [CLS] token representation (first token)
        pooled_output = outputs.last_hidden_state[:, 0, :]
        
        # Apply dropout and the classification layer
        x = self.dropout(pooled_output, training=training)
        logits = self.classifier(x)
        
        return logits

model = TransformerSentimentClassifier(model_name)

# Compile the model
# Enable mixed precision training
from tf_keras.mixed_precision import Policy, set_global_policy
policy = Policy('mixed_float16')
set_global_policy(policy)

optimizer = tf_keras.optimizers.Adam(learning_rate=2e-5)
loss = tf_keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(
    optimizer=optimizer,
    loss=loss,
    metrics=['accuracy']
)

class WarmupScheduler(tf_keras.callbacks.Callback):
    def __init__(self, warmup_steps, total_steps, initial_lr=2e-5, min_lr=0):
        super(WarmupScheduler, self).__init__()
        self.warmup_steps = warmup_steps
        self.total_steps = total_steps
        self.initial_lr = initial_lr
        self.min_lr = min_lr
        self.global_step = 0
        
    def on_batch_begin(self, batch, logs=None):
        self.global_step += 1
        if self.global_step < self.warmup_steps:
            lr = self.global_step / self.warmup_steps * self.initial_lr
        else:
            decay_steps = self.total_steps - self.warmup_steps
            decay_rate = (self.min_lr - self.initial_lr) / decay_steps
            lr = self.initial_lr + decay_rate * (self.global_step - self.warmup_steps)
            lr = max(lr, self.min_lr)
        
        tf_keras.backend.set_value(self.model.optimizer.lr, lr)

# Training parameters
epochs = 3
steps_per_epoch = len(train_dataset)
total_steps = steps_per_epoch * epochs
warmup_steps = int(0.1 * total_steps)  # 10% warmup

# Callbacks
lr_scheduler = WarmupScheduler(warmup_steps, total_steps)
early_stopping = tf_keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)

# Train the model
history = model.fit(
    train_dataset,
    epochs=epochs,
    validation_data=val_dataset,
    callbacks=[lr_scheduler, early_stopping]
)

# Alternatively, if you prefer a simpler approach without the custom learning rate scheduler:
# history = model.fit(
#     train_dataset,
#     epochs=epochs,
#     validation_data=val_dataset
# )

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce GTX 1650 Ti, compute capability 7.5
Epoch 1/3


2025-04-01 19:23:16.231217: W external/local_xla/xla/tsl/framework/bfc_allocator.cc:501] Allocator (GPU_0_bfc) ran out of memory trying to allocate 12.00MiB (rounded to 12582912)requested by op transformer_sentiment_classifier_2/tf_bert_model_2/bert/encoder/layer_._0/attention/self/Softmax
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2025-04-01 19:23:16.231264: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1058] BFCAllocator dump for GPU_0_bfc
2025-04-01 19:23:16.231270: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1065] Bin (256): 	Total Chunks: 79, Chunks in use: 78. 19.8KiB allocated for chunks. 19.5KiB in use in bin. 637B client-requested in use in bin.
2025-04-01 19:23:16.231274: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1065] Bin (512): 	Total Chunks: 2, Chunks in use: 2. 1.2KiB allocate

ResourceExhaustedError: Graph execution error:

Detected at node transformer_sentiment_classifier_2/tf_bert_model_2/bert/encoder/layer_._0/attention/self/Softmax defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/nix/store/0flj08i381bfdfbrly8sk6vs36lfrhnb-python3-3.11.11/lib/python3.11/asyncio/base_events.py", line 608, in run_forever

  File "/nix/store/0flj08i381bfdfbrly8sk6vs36lfrhnb-python3-3.11.11/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once

  File "/nix/store/0flj08i381bfdfbrly8sk6vs36lfrhnb-python3-3.11.11/lib/python3.11/asyncio/events.py", line 84, in _run

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3077, in run_cell

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3132, in _run_cell

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3336, in run_cell_async

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3519, in run_ast_nodes

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3579, in run_code

  File "/tmp/nix-shell.Xlz3Bv/ipykernel_24454/2777327274.py", line 72, in <module>

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/training.py", line 1804, in fit

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/training.py", line 1398, in train_function

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/training.py", line 1381, in step_function

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/training.py", line 1370, in run_step

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/training.py", line 1147, in train_step

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/training.py", line 588, in __call__

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/base_layer.py", line 1142, in __call__

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/tmp/nix-shell.Xlz3Bv/ipykernel_24454/2777327274.py", line 12, in call

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/training.py", line 588, in __call__

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/base_layer.py", line 1142, in __call__

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/transformers/modeling_tf_utils.py", line 1182, in run_call_with_unpacked_inputs

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/transformers/models/bert/modeling_tf_bert.py", line 1209, in call

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/base_layer.py", line 1142, in __call__

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/transformers/modeling_tf_utils.py", line 1182, in run_call_with_unpacked_inputs

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/transformers/models/bert/modeling_tf_bert.py", line 969, in call

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/base_layer.py", line 1142, in __call__

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/transformers/models/bert/modeling_tf_bert.py", line 603, in call

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/transformers/models/bert/modeling_tf_bert.py", line 609, in call

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/base_layer.py", line 1142, in __call__

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/transformers/models/bert/modeling_tf_bert.py", line 502, in call

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/base_layer.py", line 1142, in __call__

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/transformers/models/bert/modeling_tf_bert.py", line 386, in call

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/engine/base_layer.py", line 1142, in __call__

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py", line 96, in error_handler

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/transformers/models/bert/modeling_tf_bert.py", line 299, in call

  File "/home/nixosuser/cs4248-tweet-sentiment-analysis/venv/lib/python3.11/site-packages/transformers/tf_utils.py", line 72, in stable_softmax

OOM when allocating tensor with shape[16,12,128,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node transformer_sentiment_classifier_2/tf_bert_model_2/bert/encoder/layer_._0/attention/self/Softmax}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_64919]

ze 3072 next 1083
2025-04-01 19:23:16.233823: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1114] InUse at 7ffd64e81800 of size 3072 next 1084
2025-04-01 19:23:16.233825: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1114] InUse at 7ffd64e82400 of size 3072 next 1085
2025-04-01 19:23:16.233827: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1114] InUse at 7ffd64e83000 of size 3072 next 1086
2025-04-01 19:23:16.233829: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1114] InUse at 7ffd64e83c00 of size 3072 next 1087
2025-04-01 19:23:16.233831: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1114] InUse at 7ffd64e84800 of size 2359296 next 1088
2025-04-01 19:23:16.233833: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1114] InUse at 7ffd650c4800 of size 2359296 next 1089
2025-04-01 19:23:16.233835: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1114] InUse at 7ffd65304800 of size 3072 next 1090
2025-04-01 19:23:16.233837: I e

## Saving and loading the model

In [None]:
# Save the model after training
# 1. Save the entire model (including optimizer state)
model.save_weights('transformer_sentiment_model_weights.h5')

# 2. Save the model architecture as JSON (optional)
model_json = model.to_json()
with open("transformer_sentiment_model.json", "w") as json_file:
    json_file.write(model_json)

print("Model saved successfully")

# Later, to load the model:
def load_trained_model(model_name, num_classes=3):
    # Recreate the model architecture
    loaded_model = TransformerSentimentClassifier(model_name=model_name, num_classes=num_classes)
    
    # Compile the model to initialize weights
    loaded_model.compile(
        optimizer=tf_keras.optimizers.Adam(),
        loss=tf_keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy']
    )
    
    # Build the model with a sample input
    sample_input_ids = tf.ones((1, 128), dtype=tf.int32)
    sample_masks = tf.ones((1, 128), dtype=tf.int32)
    _ = loaded_model([sample_input_ids, sample_masks])
    
    # Load the weights
    loaded_model.load_weights('transformer_sentiment_model_weights.h5')
    return loaded_model

# Example of loading the model
# loaded_model = load_trained_model()

## Saving and loading in TensorFlow format

In [None]:
# Save the entire model in SavedModel format
model.save('transformer_sentiment_model_saved', save_format='tf')

# Later, to load:
# loaded_model = tf_keras.models.load_model('transformer_sentiment_model_saved')

## Load test data

In [None]:
test_df = pd.read_csv("data/test_preprocessed.csv")
print("Test set size: " + str(len(test_df)))

X_test_inputs, X_test_masks = tokenize_data(test_df["lemmatized_sentence"].tolist())

Test set size: 3534


Processing test data: 100%|██████████| 3534/3534 [00:00<00:00, 26214.82it/s]


## Evaluate models

In [None]:
reverse_label_map = {
    0: "negative",
    1: "neutral",
    2: "positive"
}

# Make predictions
predictions = model.predict((X_test_inputs, X_test_masks))
predictions = np.argmax(predictions, axis=1)
mapped_predictions = np.array([reverse_label_map[prediction] for prediction in predictions])

test_df["predicted_sentiment"] = mapped_predictions
test_df.to_csv("data/test_predictions.csv", index=False)

test_df.head()

Unnamed: 0,textID,text,sentiment,Time of Tweet,Age of User,Country,Population -2020,Land Area (Km²),Density (P/Km²),expanded_text,clean_text,text_without_stopwords,tokens,lemmatized_tokens,lemmatized_sentence,label,predicted_sentiment
0,f87dea47db,Last session of the day http://twitpic.com/67ezh,neutral,morning,0-20,Afghanistan,38928346.0,652860.0,60.0,Last session of the day http://twitpic.com/67ezh,last session of the day httptwitpiccom ezh,last session day httptwitpiccom ezh,"['last', 'session', 'of', 'the', 'day', 'httpt...","['last', 'session', 'of', 'the', 'day', 'httpt...",last session of the day httptwitpiccom ezh,1,neutral
1,96d74cb729,Shanghai is also really exciting (precisely -...,positive,noon,21-30,Albania,2877797.0,27400.0,105.0,Shanghai is also really exciting (precisely -...,shanghai is also really exciting precisely sky...,shanghai also really exciting precisely skyscr...,"['shanghai', 'is', 'also', 'really', 'exciting...","['shanghai', 'be', 'also', 'really', 'excite',...",shanghai be also really excite precisely skysc...,2,positive
2,eee518ae67,"Recession hit Veronique Branquinho, she has to...",negative,night,31-45,Algeria,43851044.0,2381740.0,18.0,"Recession hit Veronique Branquinho, she has to...",recession hit veronique branquinho she has to ...,recession hit veronique branquinho quit compan...,"['recession', 'hit', 'veronique', 'branquinho'...","['recession', 'hit', 'veronique', 'branquinho'...",recession hit veronique branquinho she have to...,0,negative
3,01082688c6,happy bday!,positive,morning,46-60,Andorra,77265.0,470.0,164.0,happy birthday!,happy birthday,happy birthday,"['happy', 'birthday']","['happy', 'birthday']",happy birthday,2,positive
4,33987a8ee5,http://twitpic.com/4w75p - I like it!!,positive,noon,60-70,Angola,32866272.0,1246700.0,26.0,http://twitpic.com/4w75p - I like it!!,httptwitpiccom w p i like it,httptwitpiccom w p like,"['httptwitpiccom', 'w', 'p', 'i', 'like', 'it']","['httptwitpiccom', 'w', 'p', 'i', 'like', 'it']",httptwitpiccom w p i like it,2,neutral


In [None]:
print(classification_report(test_df["sentiment"], test_df["predicted_sentiment"]))