# Alejandro Paredes, Parameter tuning of BERT

https://arunm8489.medium.com/understanding-distil-bert-in-depth-5f2ca92cf1ed

In [1]:
#from google.colab import drive
#drive.mount('/content/gdrive')

In [1]:
import torch

if torch.cuda.is_available():
    print("CUDA is available!")
else:
    print("CUDA is not available.")

CUDA is available!


In [None]:
#!pip install transformers datasets peft evaluate datasets contractions tweet-preprocessor



In [108]:
from datasets import load_dataset, DatasetDict, Dataset
from transformers import (
    AutoTokenizer,
    DistilBertModel,
    DistilBertTokenizer,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer
)
from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
import evaluate
import torch
import numpy as np

from tqdm import tqdm

import re
import contractions
import numpy as np

import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

import preprocessor as p

# Setting up the device for GPU usage
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

device

'cuda'

In [109]:
from datasets import load_dataset

from transformers import BertTokenizerFast

from torch.utils.data import DataLoader

df = load_dataset("csv", data_files="/content/gdrive/MyDrive/ColabNotebooks/NLP Project/2017_2.csv")['train'].filter(lambda example: example['headline'] is not None and example['headline'] != '').train_test_split(test_size=0.1)
df

DatasetDict({
    train: Dataset({
        features: ['id', 'date_publish', 'outlet', 'headline', 'lead', 'body', 'authors', 'domain', 'url', 'political_leaning'],
        num_rows: 161568
    })
    test: Dataset({
        features: ['id', 'date_publish', 'outlet', 'headline', 'lead', 'body', 'authors', 'domain', 'url', 'political_leaning'],
        num_rows: 17952
    })
})

In [139]:
model_checkpoint = 'distilbert-base-uncased'

#Define label maps
id2label = {0:"UNDEFINED" ,1:"LEFT",2:"RIGHT",3:"CENTER"}
label2id = {"UNDEFINED": 0, "LEFT": 1, "RIGHT": 2, "CENTER": 3}

tokenizer =  DistilBertTokenizer.from_pretrained(model_checkpoint, add_prefix=True)

In [111]:
#lemmatization and removing stopwords
#nltk.download('punkt')
#nltk.download('wordnet')
#nltk.download('stopwords')

#lemmatizer = WordNetLemmatizer()
#stop_words = set(stopwords.words("english"))

p.set_options(p.OPT.URL, p.OPT.EMOJI, p.OPT.SMILEY)

def preprocess(text):
    def is_english_word(word):
        """Function to filter out non-English words."""
        return bool(re.match(r'^[a-zA-Z]+$', word))
    text = text.lower()
    text = contractions.fix(text)
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)
    text = p.clean(text)
    return text

In [112]:
for i in range(5):
    print('Original Text: ', df['train']['headline'][i], '\n')
    print('Tokenized Text: ', tokenizer.tokenize(preprocess(df['train']['headline'][i])), '\n')
    print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(df['train']['headline'][i])))

#for i in range(2):
    #print('Original Text: ', df['train']['body'][i], '\n')
    #print('Tokenized Text: ', tokenizer.tokenize(preprocess(df['train']['body'][i])), '\n')
    #print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(df['train']['body'][i])))


Original Text:  Red Cross suspends work in vast region of South Sudan 

Tokenized Text:  ['red', 'cross', 'suspend', '##s', 'work', 'in', 'vast', 'region', 'of', 'south', 'sudan'] 

Token IDs:  [2417, 2892, 28324, 2015, 2147, 1999, 6565, 2555, 1997, 2148, 10411]
Original Text:  Corrections officer severely hurt in Kansas City jail attack 

Tokenized Text:  ['corrections', 'officer', 'severely', 'hurt', 'in', 'kansas', 'city', 'jail', 'attack'] 

Token IDs:  [20983, 2961, 8949, 3480, 1999, 5111, 2103, 7173, 2886]
Original Text:  Sheriff: Inmate caught after 2nd jail escape within 3 weeks 

Tokenized Text:  ['sheriff', ':', 'inmate', 'caught', 'after', '2nd', 'jail', 'escape', 'within', '3', 'weeks'] 

Token IDs:  [6458, 1024, 24467, 3236, 2044, 3416, 7173, 4019, 2306, 1017, 3134]
Original Text:  Chicago stabbing death: One donation, two suspects at large, lots of mystery 

Tokenized Text:  ['chicago', 'stabbing', 'death', ':', 'one', 'donation', ',', 'two', 'suspects', 'at', 'large', ',

In [113]:
texts = df['train']['headline']

# Handle None or missing values by filtering out None entries
text_lengths = [len(text.split(' ')) if text is not None else 0 for text in texts]

print(min(text_lengths))
print(max(text_lengths))

# Count how many texts have 300 or more words
print(sum([1 for length in text_lengths if length >= 300]))

# Repeat for the 'body' column
texts = df['train']['body']

# Handle None or missing values by filtering out None entries
text_lengths = [len(text.split()) if text is not None else 0 for text in texts]

print(min(text_lengths))
print(max(text_lengths))

# Count how many texts have 300 or more words
print(sum([1 for length in text_lengths if length >= 300]))


1
36
0
15
16927
110914


# **Creating a custom model**

In [193]:
# Creating the customized model, by adding a drop out and a dense layer on top of distil bert to get the final output for the model.

class DistillBERTClass(torch.nn.Module):
    def __init__(self):
        super(DistillBERTClass, self).__init__()
        self.l1 = DistilBertModel.from_pretrained(model_checkpoint, num_labels=8)

        # Freeze DistilBERT parameters
        for param in self.l1.parameters():
            param.requires_grad = False

        self.dropout = torch.nn.Dropout(0.3)
        self.pre_classifier = torch.nn.Linear(768, 768)
        self.fc1 = torch.nn.Linear(768, 1024)  # Input dimension is 768 for BERT
        self.fc2 = torch.nn.Linear(1024, 512)
        self.classifier = torch.nn.Linear(512, 5)
        self.relu = torch.nn.ReLU()
        self.relu = torch.nn.ReLU()
        #self.softmax = torch.nn.Softmax(dim=1)


    def forward(self, input_ids, attention_mask):
        output = self.l1(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = output[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = self.dropout(pooler)
        pooler = self.fc1(pooler)
        pooler = self.relu(pooler)
        pooler = self.dropout(pooler)
        pooler = self.fc2(pooler)
        pooler = self.relu(pooler)
        pooler = self.dropout(pooler)
        #pooler = self.fc3(pooler)
        #pooler = self.softmax(pooler)
        output = self.classifier(pooler)
        return output

In [194]:
# Defining some key variables that will be used later on in the training
MAX_LEN = 512
TRAIN_BATCH_SIZE = 10
VALID_BATCH_SIZE = 10
EPOCHS = 10
LEARNING_RATE = 1e-04



model = DistillBERTClass()
model.to(device)

# Creating the loss function and optimizer
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

model

DistillBERTClass(
  (l1): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Linear(

In [195]:
for name, param in model.named_parameters():
    print(f"{name}: requires_grad={param.requires_grad}")

l1.embeddings.word_embeddings.weight: requires_grad=False
l1.embeddings.position_embeddings.weight: requires_grad=False
l1.embeddings.LayerNorm.weight: requires_grad=False
l1.embeddings.LayerNorm.bias: requires_grad=False
l1.transformer.layer.0.attention.q_lin.weight: requires_grad=False
l1.transformer.layer.0.attention.q_lin.bias: requires_grad=False
l1.transformer.layer.0.attention.k_lin.weight: requires_grad=False
l1.transformer.layer.0.attention.k_lin.bias: requires_grad=False
l1.transformer.layer.0.attention.v_lin.weight: requires_grad=False
l1.transformer.layer.0.attention.v_lin.bias: requires_grad=False
l1.transformer.layer.0.attention.out_lin.weight: requires_grad=False
l1.transformer.layer.0.attention.out_lin.bias: requires_grad=False
l1.transformer.layer.0.sa_layer_norm.weight: requires_grad=False
l1.transformer.layer.0.sa_layer_norm.bias: requires_grad=False
l1.transformer.layer.0.ffn.lin1.weight: requires_grad=False
l1.transformer.layer.0.ffn.lin1.bias: requires_grad=False


In [196]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

if tokenizer.pad_token is None:
  tokenizer.add_special_tokens({'pad_token': '[PAD]'})
  model.resize_token_embeddings(len(tokenizer))

In [197]:
def tokenize_function(examples):
    #text = examples["body"]
    text = examples["headline"]
    labels = examples["political_leaning"]

    tokenizer.truncation_side = "left"
    tokenized_inputs = tokenizer(
        text,#[preprocess(t) for t in text] ,
        return_tensors = "np",
        padding = True,
        truncation = True,
        max_length = 512
        )

    tokenized_inputs["labels"] = [label2id[label] for label in labels]
    return tokenized_inputs

#tokenized_dataset = df.map(tokenize_function, batched=True)
#tokenized_dataset

In [198]:
# Define split ratio for validation
train_test_split = df["train"].train_test_split(test_size=0.1)  # 10% for validation
datasets = DatasetDict({
    "train": train_test_split["train"],
    "validation": train_test_split["test"],  # This is your validation set
    "test": df["test"],       # Keep the original test set
})

In [199]:
import re
import contractions
from torch.utils.data import Dataset

# Define the mapping for political leaning categories to numeric values
category_mapping = {
    'LEFT': 0,
    'CENTER': 1,
    'RIGHT': 2,
    'UNDEFINED': 3
}

# Preprocessing function
def preprocess(text):
    """ Preprocess the text to clean it for tokenization """
    def is_english_word(word):
        """Function to filter out non-English words."""
        return bool(re.match(r'^[a-zA-Z]+$', word))

    text = text.lower()  # Convert to lowercase
    text = contractions.fix(text)  # Expand contractions (e.g., "don't" -> "do not")
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)  # Remove non-ASCII characters
    text = p.clean(text)  # Clean text using the clean-text library
    return text

class Triage(Dataset):
    def __init__(self, dataset, tokenizer, max_length):
        self.texts = dataset['headline']  # Assuming 'text' column contains the raw text
        self.labels = dataset['political_leaning']
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __getitem__(self, index):
        # Get raw text and label for the current index
        text = self.texts[index]
        label = self.labels[index]
        tokenizer.truncation_side = "left"
        #tokenized_inputs = self.tokenizer(
        tokenized_inputs = self.tokenizer.encode_plus(
            preprocess(text),
            None,
            #return_tensors="pt",
            #padding=True,
            #truncation=True,
            #max_length=self.max_length
            add_special_tokens=True,
            max_length=self.max_length,
            pad_to_max_length=True
        )

        #encoding = tokenize_function({"text": [text], "labels": [label]}, self.tokenizer, self.max_length)
        input_ids = tokenized_inputs['input_ids']  # Remove the batch dimension
        attention_mask = tokenized_inputs['attention_mask']  # Remove the batch dimension

        return {
            'input_ids': torch.tensor(input_ids, dtype=torch.long),
            'attention_mask': torch.tensor(attention_mask, dtype=torch.long),
            'labels': torch.tensor(category_mapping[self.labels[index]], dtype=torch.float)
        }

    def __len__(self):
        return len(self.texts)


In [200]:
train_dataset = Triage(datasets['train'], tokenizer, max_length=512)
val_dataset = Triage(datasets['validation'], tokenizer, max_length=512)
test_dataset = Triage(datasets['test'], tokenizer, max_length=512)

In [201]:
# Training DataLoader
training_loader = DataLoader(
    train_dataset,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    collate_fn=data_collator,
)

# Validation DataLoader
val_loader = DataLoader(
    val_dataset,
    batch_size=VALID_BATCH_SIZE,
    shuffle=False,
    collate_fn=data_collator,
)

# Test DataLoader
test_loader = DataLoader(
    test_dataset,
    batch_size=VALID_BATCH_SIZE,
    shuffle=False,
    collate_fn=data_collator,
)

### Training the model

In [202]:
# Defining the training function on the 80% of the dataset for tuning the distilbert model
def calculate_accuracy(preds, targets):
    n_correct = (preds==targets).sum().item()
    return n_correct

def train(epoch):
    tr_loss = 0
    n_correct = 0
    nb_tr_steps = 0
    nb_tr_examples = 0
    model.train()
    for _,data in tqdm(enumerate(training_loader, 0)):
        ids = data['input_ids'].to(device, dtype = torch.long)
        mask = data['attention_mask'].to(device, dtype = torch.long)
        #token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['labels'].to(device, dtype = torch.long)

        outputs = model(ids, mask)#, token_type_ids)
        loss = loss_function(outputs, targets)
        tr_loss += loss.item()
        big_val, big_idx = torch.max(outputs.data, dim=1)
        n_correct += calculate_accuracy(big_idx, targets)

        nb_tr_steps += 1
        nb_tr_examples+=targets.size(0)

        if _%5000==0:
            loss_step = tr_loss/nb_tr_steps
            accu_step = (n_correct*100)/nb_tr_examples
            print(f"Training Loss per 5000 steps: {loss_step}")
            print(f"Training Accuracy per 5000 steps: {accu_step}")

        optimizer.zero_grad()
        loss.backward()
        # # When using GPU
        optimizer.step()

    print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Training Loss Epoch: {epoch_loss}")
    print(f"Training Accuracy Epoch: {epoch_accu}")

    return

def valid(model, testing_loader):
    model.eval()
    n_correct = 0; n_wrong = 0; total = 0; tr_loss=0; nb_tr_steps=0; nb_tr_examples=0
    with torch.no_grad():
        for _, data in tqdm(enumerate(testing_loader, 0)):
            ids = data['input_ids'].to(device, dtype = torch.long)
            mask = data['attention_mask'].to(device, dtype = torch.long)
            #token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['labels'].to(device, dtype = torch.long)
            outputs = model(ids, mask).squeeze()
            loss = loss_function(outputs, targets)
            tr_loss += loss.item()
            big_val, big_idx = torch.max(outputs.data, dim=1)
            n_correct += calculate_accuracy(big_idx, targets)

            nb_tr_steps += 1
            nb_tr_examples+=targets.size(0)

            if _%5000==0:
                loss_step = tr_loss/nb_tr_steps
                accu_step = (n_correct*100)/nb_tr_examples
                print(f"Validation Loss per 100 steps: {loss_step}")
                print(f"Validation Accuracy per 100 steps: {accu_step}")
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Validation Loss Epoch: {epoch_loss}")
    print(f"Validation Accuracy Epoch: {epoch_accu}")

    return epoch_loss, epoch_accu


In [204]:
best_val_loss = float("inf")

for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch + 1}/{EPOCHS}")
    print("-" * 30)

    train_loss = train(epoch)
    val_loss, val_accuracy = valid(model, val_loader)

    # Save the best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_model.pt")
        print("Saved Best Model!")


Epoch 1/10
------------------------------


2it [00:00,  5.26it/s]

Training Loss per 5000 steps: 1.0981061458587646
Training Accuracy per 5000 steps: 50.0


5002it [11:55,  6.99it/s]

Training Loss per 5000 steps: 1.2158478236250867
Training Accuracy per 5000 steps: 45.250949810037994


10002it [23:50,  7.07it/s]

Training Loss per 5000 steps: 1.2069132186605769
Training Accuracy per 5000 steps: 45.752424757524246


14542it [34:39,  6.99it/s]


The Total Accuracy for Epoch 0: 46.19939344341212
Training Loss Epoch: 1.2015696653755277
Training Accuracy Epoch: 46.19939344341212


1it [00:00,  7.56it/s]

Validation Loss per 100 steps: 1.2763311862945557
Validation Accuracy per 100 steps: 50.0


1616it [03:46,  7.13it/s]


Validation Loss Epoch: 1.143046498815022
Validation Accuracy Epoch: 48.542427430834934
Saved Best Model!

Epoch 2/10
------------------------------


1it [00:00,  7.10it/s]

Training Loss per 5000 steps: 0.96269690990448
Training Accuracy per 5000 steps: 70.0


5002it [11:54,  7.05it/s]

Training Loss per 5000 steps: 1.1856764811702882
Training Accuracy per 5000 steps: 47.228554289142174


10002it [23:49,  7.07it/s]

Training Loss per 5000 steps: 1.1855292908460686
Training Accuracy per 5000 steps: 47.24127587241276


14542it [34:39,  6.99it/s]


The Total Accuracy for Epoch 1: 47.38981232506482
Training Loss Epoch: 1.1839389795655972
Training Accuracy Epoch: 47.38981232506482


1it [00:00,  7.69it/s]

Validation Loss per 100 steps: 1.1015052795410156
Validation Accuracy per 100 steps: 70.0


1616it [03:46,  7.13it/s]


Validation Loss Epoch: 1.1397270067862355
Validation Accuracy Epoch: 50.6839140929628
Saved Best Model!

Epoch 3/10
------------------------------


1it [00:00,  7.07it/s]

Training Loss per 5000 steps: 0.9601763486862183
Training Accuracy per 5000 steps: 60.0


5002it [11:55,  7.06it/s]

Training Loss per 5000 steps: 1.1904155022977376
Training Accuracy per 5000 steps: 47.07658468306339


10002it [23:50,  6.94it/s]

Training Loss per 5000 steps: 1.1904257130007805
Training Accuracy per 5000 steps: 46.92530746925308


14542it [34:43,  6.98it/s]


The Total Accuracy for Epoch 2: 47.0528364429101
Training Loss Epoch: 1.189491287187375
Training Accuracy Epoch: 47.0528364429101


1it [00:00,  7.52it/s]

Validation Loss per 100 steps: 1.0833522081375122
Validation Accuracy per 100 steps: 70.0


1616it [03:46,  7.12it/s]


Validation Loss Epoch: 1.155599923011395
Validation Accuracy Epoch: 47.52738751005756

Epoch 4/10
------------------------------


1it [00:00,  7.07it/s]

Training Loss per 5000 steps: 1.1276814937591553
Training Accuracy per 5000 steps: 40.0


5002it [11:57,  7.04it/s]

Training Loss per 5000 steps: 1.1779514262018813
Training Accuracy per 5000 steps: 47.58248350329934


10002it [23:54,  7.00it/s]

Training Loss per 5000 steps: 1.179525209908342
Training Accuracy per 5000 steps: 47.77422257774223


14542it [34:47,  6.96it/s]


The Total Accuracy for Epoch 3: 47.57067897201725
Training Loss Epoch: 1.1833471740957073
Training Accuracy Epoch: 47.57067897201725


1it [00:00,  7.65it/s]

Validation Loss per 100 steps: 1.2244033813476562
Validation Accuracy per 100 steps: 50.0


1616it [03:47,  7.09it/s]


Validation Loss Epoch: 1.1378429900891711
Validation Accuracy Epoch: 51.036702358111036
Saved Best Model!

Epoch 5/10
------------------------------


2it [00:00,  5.87it/s]

Training Loss per 5000 steps: 1.1576762199401855
Training Accuracy per 5000 steps: 60.0


5002it [11:58,  7.01it/s]

Training Loss per 5000 steps: 1.1894061499608801
Training Accuracy per 5000 steps: 47.150569886022794


10002it [23:55,  7.08it/s]

Training Loss per 5000 steps: 1.1925137128344585
Training Accuracy per 5000 steps: 46.923307669233076


14542it [34:48,  6.96it/s]


The Total Accuracy for Epoch 4: 46.95655761943732
Training Loss Epoch: 1.1913354651826462
Training Accuracy Epoch: 46.95655761943732


1it [00:00,  7.58it/s]

Validation Loss per 100 steps: 1.1051081418991089
Validation Accuracy per 100 steps: 50.0


1616it [03:46,  7.13it/s]


Validation Loss Epoch: 1.1976400574687684
Validation Accuracy Epoch: 48.00396113139816

Epoch 6/10
------------------------------


1it [00:00,  6.92it/s]

Training Loss per 5000 steps: 1.2208364009857178
Training Accuracy per 5000 steps: 30.0


242it [00:34,  6.97it/s]


KeyboardInterrupt: 

In [205]:
!cp best_model.pt '/content/gdrive/MyDrive/ColabNotebooks/NLP Project/distilBERT/'

In [206]:
model.load_state_dict(torch.load("best_model.pt"))
model.to(device)

  model.load_state_dict(torch.load("best_model.pt"))


DistillBERTClass(
  (l1): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Linear(

In [208]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Test function
def test_model(model, data_loader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Testing"):
            # Move batch to GPU/CPU
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            # Forward pass
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)

            # Collect predictions and true labels
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average="weighted")

    print("\nTest Results")
    print("-" * 30)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-score: {f1:.4f}")

    return accuracy, precision, recall, f1

# After training and validation, evaluate on the test set
print("\nEvaluating on Test Set")
test_accuracy, test_precision, test_recall, test_f1 = test_model(model, test_loader, device)


Evaluating on Test Set


Testing: 100%|██████████| 1796/1796 [04:12<00:00,  7.12it/s]



Test Results
------------------------------
Accuracy: 0.5100
Precision: 0.5181
Recall: 0.5100
F1-score: 0.4948


In [207]:
import matplotlib.pyplot as plt
import numpy as np

# Plot function for metrics
def plot_metrics(metrics, metric_names, title):
    fig, ax = plt.subplots(figsize=(8, 6))
    bars = ax.bar(metric_names, metrics, color=['skyblue', 'orange', 'green', 'red'])

    # Add value annotations on bars
    for bar in bars:
        ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.02,
                f"{bar.get_height():.4f}", ha='center', fontsize=10)

    ax.set_ylim(0, 1)
    ax.set_title(title, fontsize=16)
    ax.set_ylabel("Score", fontsize=14)
    ax.set_xlabel("Metrics", fontsize=14)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.show()

# After testing, plot the metrics
print("\nEvaluating on Test Set")
test_accuracy, test_precision, test_recall, test_f1 = test_model(model, test_loader, device)

# Metrics and their names
metrics = [test_accuracy, test_precision, test_recall, test_f1]
metric_names = ["Accuracy", "Precision", "Recall", "F1-Score"]

# Plot the test results
plot_metrics(metrics, metric_names, title="Test Metrics Overview")



Evaluating on Test Set


NameError: name 'test_model' is not defined

### Other form of training

In [None]:
'''
accuracy = evaluate.load("accuracy")

def compute_metrics(p):
  predictions, labels = p
  predictions = np.argmax(predictions, axis=1)
  return {"accuracy": accuracy.compute(predictions=predictions
                                       , references=labels)}
'''

In [None]:
'''
lr = 1e-3
batch_size = 10
num_epochs = 10

training_args = TrainingArguments(
    output_dir=""+model_checkpoint+"lora-txt",
    learning_rate = lr,
    per_device_train_batch_size = batch_size,
    per_device_eval_batch_size = batch_size,
    num_train_epochs = num_epochs,
    weight_decay = 0.01,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    load_best_model_at_end = True,
)

trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_dataset["train"],
    eval_dataset = tokenized_dataset["test"],
    tokenizer = tokenizer,
    data_collator = data_collator,
    compute_metrics = compute_metrics
)
'''

In [None]:
#trainer.train()

### Load pretrained model

In [None]:
# Load model and tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
state_dict = torch.load("trained_model_gral_imbd.pth", map_location=device)

text_list = ['''President-elect Trump announced on Tuesday night that he intends to appoint Linda McMahon, former CEO of World Wrestling Entertainment (WWE), to lead the Department of Education. His announcement, which was posted on Truth Social, came hours after two sources told Fox News that McMahon was likely to be picked. "It is my great honor to announce that Linda McMahon, former Administrator of the Small Business Administration, will be the United States Secretary of Education," Trump's statement read.
"As Secretary of Education, Linda will fight tirelessly to expand Choice to every State in America, and empower parents to make the best Education decisions for their families," the press release added. "Linda served for two years on the Connecticut Board of Education, where she was one of fifteen members overseeing all Public Education in the State, including its Technical High School system."''',
             '''Donald Trump believes presidents have almost absolute power. In his second term, there will be few political or legal restraints to check him. The president-elects sweeping victory over Vice President Kamala Harris suddenly turned the theoretical notion that he will indulge his autocratic instincts into a genuine possibility.When Trump returns to the White House in January as one of the most powerful presidents in history, hell be able to take advantage of his own filleting of guardrails during his first presidency, which he continued through legal maneuverings out of office.''',
             '''Nearly 100 Democrats, including Salud Carbajal, requested the Ethics Committee release its report on former Congressman Matt Gaetz's misconduct allegations. The letter, led by Rep. Sean Casten, emphasized that the Senate needs information for Gaetz's attorney general nomination. House Speaker Mike Johnson opposed releasing the report, stating Gaetz is now a "private citizen" and outside the panel's jurisdiction.'''
             , ''' A South Dakota judge dismissed a lawsuit from the anti-abortion group Life Defense targeting an abortion rights measure that voters later rejected.
Judge John Pekas dismissed the lawsuit at the request of Life Defense, which had challenged the ballot measure's petitions.
Voters in nine states, including South Dakota, rejected abortion rights measures during the November election. '''
             ]
model.to('cuda')
print('Trained model predictions')
for text in text_list:
  inputs = tokenizer.encode(text, return_tensors='pt').to('cuda')

  logits = model(inputs).logits
  predictions = torch.max(logits,1).indices

  #print(f'{text} - {id2label[predictions.tolist()[0]]}')
  print(f'{id2label[predictions.tolist()[0]]}')