In [3]:
import torch
import random
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
seed = 42
np.random.seed(seed)
random.seed(seed)
import sklearn
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
print(f"Random seed set as {seed}")

torch.cuda.empty_cache()

import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
import json
import pandas as pd

Random seed set as 42


In [4]:
# Function to efficiently load and pre-process data with sample size reduction
def pass_data():
    df = pd.read_csv("train_40k.csv", encoding="latin-1")

    # Reduce sample size if needed
    sample_size=5000
    
    if len(df) > sample_size:
        df = df.sample(n=sample_size, random_state=42)

    df["text"] = df["Text"]
    df["label"] = df["Cat1"]
    return df

def get_data(test_size=0.2):
    df = pass_data()
    X_train, X_test, y_train, y_test = train_test_split(
        df["text"], df["label"], test_size=test_size, random_state=42
    )
    # Encode labels
    label_encoder = LabelEncoder()
    train_text = X_train.to_numpy()
    train_labels = label_encoder.fit_transform(y_train.to_numpy())
    test_text = X_test.to_numpy()
    test_labels = label_encoder.fit_transform(y_test.to_numpy())
    return (train_text, train_labels), (test_text, test_labels)

(train_text, train_labels), (test_text, test_labels) = get_data()

In [5]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
num_labels = len(np.unique(train_labels))
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
tokenized_texts = []
from tqdm import tqdm

max_seq_length = 512  # Maximum sequence length for BERT

for text in tqdm(train_text):
    tokenized_texts.append(tokenizer(text, truncation=True, padding='max_length', max_length=max_seq_length, return_tensors='pt'))

100%|██████████| 4000/4000 [00:05<00:00, 681.36it/s]


In [7]:
# Tokenize the texts and convert them to tensors
from sklearn.metrics import accuracy_score, f1_score, classification_report

input_ids = torch.cat([t['input_ids'] for t in tokenized_texts], dim=0)
attention_mask = torch.cat([t['attention_mask'] for t in tokenized_texts], dim=0)
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)
labels = torch.tensor(train_labels_encoded)

# Create a dataset and data loader
dataset = TensorDataset(input_ids, attention_mask, labels)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

batch_size = 16
lr = 1e-5

train_loader = DataLoader(train_dataset, batch_size=batch_size)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

optimizer = AdamW(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()



In [8]:
def train_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0.0
    for batch in tqdm(dataloader):
        input_ids, attention_mask, labels = batch
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()

    return total_loss / len(dataloader)

def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for batch in tqdm(dataloader):
            input_ids, attention_mask, labels = batch
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            total_loss += loss.item()

            logits = outputs.logits
            predictions = torch.argmax(logits, dim=1)
            correct_predictions += torch.sum(predictions == labels).item()
            print(labels.cpu())
            total_samples += labels.size(0)


    print(classification_report(predictions.cpu().numpy(), labels.cpu().numpy()))
    return total_loss / len(dataloader), correct_predictions / total_samples


In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [10]:
# Train the model with 3 epochs
num_epochs = 3
for epoch in range(num_epochs):
    train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_accuracy = evaluate(model, val_loader, criterion, device)

    print(f"Epoch {epoch + 1}/{num_epochs} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f} - Val Accuracy: {val_accuracy:.2%}")
    model.save_pretrained(f"fine_tuned_bert_epoch_{epoch+1}_lr_{lr}")

# Save the fine-tuned model
model.save_pretrained(f"fine_tuned_bert")

100%|██████████| 200/200 [08:18<00:00,  2.49s/it]
  2%|▏         | 1/50 [00:00<00:28,  1.73it/s]

tensor([0, 1, 0, 5, 4, 1, 1, 4, 5, 3, 2, 2, 0, 5, 0, 5])


  4%|▍         | 2/50 [00:01<00:27,  1.76it/s]

tensor([0, 5, 5, 5, 1, 3, 1, 1, 4, 4, 0, 1, 1, 5, 0, 5])


  6%|▌         | 3/50 [00:01<00:26,  1.78it/s]

tensor([3, 3, 3, 0, 1, 1, 0, 3, 1, 3, 4, 3, 3, 3, 5, 4])


  8%|▊         | 4/50 [00:02<00:25,  1.77it/s]

tensor([5, 5, 0, 2, 3, 0, 1, 3, 5, 3, 3, 2, 3, 4, 1, 5])


 10%|█         | 5/50 [00:02<00:25,  1.75it/s]

tensor([5, 3, 5, 1, 5, 4, 1, 5, 5, 2, 3, 4, 3, 0, 4, 2])


 12%|█▏        | 6/50 [00:03<00:25,  1.72it/s]

tensor([1, 5, 5, 5, 1, 5, 0, 5, 1, 4, 0, 5, 3, 2, 3, 3])


 14%|█▍        | 7/50 [00:04<00:25,  1.71it/s]

tensor([2, 3, 0, 2, 3, 5, 1, 0, 2, 3, 2, 1, 0, 5, 3, 3])


 16%|█▌        | 8/50 [00:04<00:25,  1.62it/s]

tensor([1, 3, 4, 1, 1, 1, 5, 5, 5, 4, 5, 3, 1, 5, 0, 3])


 18%|█▊        | 9/50 [00:05<00:25,  1.59it/s]

tensor([4, 2, 4, 5, 1, 3, 4, 4, 0, 1, 0, 0, 2, 2, 5, 5])


 20%|██        | 10/50 [00:06<00:25,  1.59it/s]

tensor([5, 0, 2, 2, 2, 5, 4, 1, 3, 1, 5, 0, 4, 3, 3, 1])


 22%|██▏       | 11/50 [00:06<00:24,  1.58it/s]

tensor([3, 3, 0, 3, 0, 3, 5, 3, 4, 5, 3, 5, 5, 5, 5, 2])


 24%|██▍       | 12/50 [00:07<00:23,  1.59it/s]

tensor([0, 5, 5, 4, 5, 3, 1, 2, 5, 3, 2, 5, 3, 5, 0, 3])


 26%|██▌       | 13/50 [00:07<00:23,  1.59it/s]

tensor([2, 5, 1, 0, 1, 4, 3, 0, 5, 4, 5, 1, 1, 0, 5, 2])


 28%|██▊       | 14/50 [00:08<00:22,  1.57it/s]

tensor([3, 4, 4, 3, 2, 5, 3, 3, 4, 3, 5, 2, 0, 2, 4, 0])


 30%|███       | 15/50 [00:09<00:22,  1.54it/s]

tensor([5, 1, 3, 5, 5, 1, 3, 2, 1, 2, 5, 4, 2, 2, 2, 1])


 32%|███▏      | 16/50 [00:09<00:21,  1.56it/s]

tensor([0, 3, 4, 5, 3, 1, 3, 1, 4, 3, 3, 0, 1, 5, 3, 1])


 34%|███▍      | 17/50 [00:10<00:20,  1.58it/s]

tensor([0, 5, 3, 4, 3, 1, 5, 4, 1, 0, 1, 2, 5, 0, 4, 2])


 36%|███▌      | 18/50 [00:11<00:20,  1.58it/s]

tensor([2, 1, 5, 2, 0, 0, 5, 2, 3, 3, 3, 1, 3, 5, 5, 5])


 38%|███▊      | 19/50 [00:11<00:19,  1.60it/s]

tensor([5, 2, 3, 5, 3, 0, 0, 5, 3, 5, 4, 4, 5, 0, 3, 5])


 40%|████      | 20/50 [00:12<00:18,  1.61it/s]

tensor([3, 0, 1, 3, 5, 0, 0, 3, 1, 0, 5, 3, 5, 3, 3, 5])


 42%|████▏     | 21/50 [00:13<00:18,  1.56it/s]

tensor([3, 0, 3, 2, 3, 5, 0, 5, 4, 1, 3, 0, 1, 3, 1, 3])


 44%|████▍     | 22/50 [00:13<00:18,  1.55it/s]

tensor([2, 4, 0, 5, 0, 0, 3, 0, 1, 3, 5, 5, 1, 3, 0, 5])


 46%|████▌     | 23/50 [00:14<00:17,  1.56it/s]

tensor([3, 1, 1, 0, 5, 1, 5, 1, 4, 0, 1, 0, 5, 3, 0, 1])


 48%|████▊     | 24/50 [00:14<00:16,  1.55it/s]

tensor([3, 3, 1, 1, 5, 4, 3, 3, 3, 0, 3, 0, 2, 4, 3, 1])


 50%|█████     | 25/50 [00:15<00:15,  1.57it/s]

tensor([2, 0, 5, 3, 0, 3, 5, 4, 4, 2, 1, 4, 2, 0, 4, 2])


 52%|█████▏    | 26/50 [00:16<00:15,  1.58it/s]

tensor([2, 0, 5, 5, 0, 2, 3, 5, 5, 1, 1, 3, 3, 3, 3, 3])


 54%|█████▍    | 27/50 [00:16<00:14,  1.55it/s]

tensor([2, 5, 3, 3, 5, 5, 5, 2, 3, 5, 0, 1, 2, 2, 0, 0])


 56%|█████▌    | 28/50 [00:17<00:14,  1.52it/s]

tensor([3, 5, 0, 2, 4, 1, 5, 3, 5, 5, 2, 2, 4, 0, 0, 1])


 58%|█████▊    | 29/50 [00:18<00:13,  1.52it/s]

tensor([5, 1, 5, 1, 3, 2, 4, 3, 1, 5, 2, 3, 5, 4, 3, 3])


 60%|██████    | 30/50 [00:18<00:13,  1.53it/s]

tensor([1, 5, 5, 1, 4, 5, 3, 0, 2, 2, 3, 1, 5, 3, 5, 1])


 62%|██████▏   | 31/50 [00:19<00:12,  1.56it/s]

tensor([5, 5, 1, 3, 3, 3, 4, 3, 3, 3, 3, 0, 5, 0, 3, 1])


 64%|██████▍   | 32/50 [00:20<00:11,  1.58it/s]

tensor([4, 3, 5, 3, 0, 5, 5, 5, 3, 1, 2, 1, 3, 1, 2, 5])


 66%|██████▌   | 33/50 [00:20<00:10,  1.55it/s]

tensor([5, 0, 3, 3, 0, 3, 3, 0, 3, 4, 0, 4, 3, 1, 1, 4])


 68%|██████▊   | 34/50 [00:21<00:10,  1.55it/s]

tensor([5, 3, 3, 0, 5, 5, 3, 0, 4, 5, 4, 2, 1, 5, 4, 2])


 70%|███████   | 35/50 [00:22<00:09,  1.54it/s]

tensor([5, 3, 5, 3, 2, 0, 3, 3, 5, 4, 0, 3, 0, 5, 1, 4])


 72%|███████▏  | 36/50 [00:22<00:09,  1.55it/s]

tensor([0, 5, 5, 2, 0, 0, 0, 1, 3, 0, 0, 3, 3, 1, 5, 3])


 74%|███████▍  | 37/50 [00:23<00:08,  1.56it/s]

tensor([5, 4, 1, 5, 2, 5, 2, 3, 3, 4, 5, 5, 3, 3, 5, 3])


 76%|███████▌  | 38/50 [00:23<00:07,  1.59it/s]

tensor([4, 5, 5, 5, 5, 1, 5, 0, 4, 3, 2, 5, 5, 5, 5, 4])


 78%|███████▊  | 39/50 [00:24<00:07,  1.56it/s]

tensor([3, 4, 2, 4, 3, 3, 4, 3, 1, 5, 4, 3, 3, 0, 0, 0])


 80%|████████  | 40/50 [00:25<00:06,  1.56it/s]

tensor([1, 5, 5, 1, 3, 4, 3, 3, 3, 0, 5, 2, 4, 4, 2, 5])


 82%|████████▏ | 41/50 [00:25<00:05,  1.56it/s]

tensor([2, 5, 5, 3, 3, 1, 1, 5, 3, 2, 3, 5, 0, 5, 5, 4])


 84%|████████▍ | 42/50 [00:26<00:05,  1.57it/s]

tensor([0, 5, 5, 3, 3, 3, 2, 0, 1, 2, 1, 0, 0, 1, 5, 4])


 86%|████████▌ | 43/50 [00:27<00:04,  1.57it/s]

tensor([4, 4, 5, 0, 3, 1, 3, 5, 1, 3, 1, 3, 4, 0, 1, 4])


 88%|████████▊ | 44/50 [00:27<00:03,  1.58it/s]

tensor([1, 4, 1, 5, 5, 1, 3, 1, 1, 3, 1, 5, 5, 2, 0, 5])


 90%|█████████ | 45/50 [00:28<00:03,  1.57it/s]

tensor([5, 1, 3, 3, 5, 5, 0, 4, 2, 5, 1, 4, 0, 3, 5, 4])


 92%|█████████▏| 46/50 [00:29<00:02,  1.54it/s]

tensor([2, 3, 1, 3, 5, 4, 3, 3, 3, 1, 5, 5, 0, 3, 2, 3])


 94%|█████████▍| 47/50 [00:29<00:01,  1.54it/s]

tensor([1, 0, 0, 1, 5, 3, 3, 2, 3, 4, 2, 5, 5, 3, 3, 5])


 96%|█████████▌| 48/50 [00:30<00:01,  1.61it/s]

tensor([4, 5, 1, 3, 4, 3, 1, 1, 5, 1, 0, 3, 3, 3, 3, 4])


 98%|█████████▊| 49/50 [00:30<00:00,  1.65it/s]

tensor([5, 3, 3, 0, 4, 5, 3, 1, 5, 1, 0, 5, 4, 3, 3, 2])


100%|██████████| 50/50 [00:31<00:00,  1.59it/s]

tensor([0, 5, 0, 0, 0, 0, 3, 3, 3, 0, 5, 3, 5, 3, 3, 3])
              precision    recall  f1-score   support

           0       1.00      0.86      0.92         7
           3       0.86      1.00      0.92         6
           5       1.00      1.00      1.00         3

    accuracy                           0.94        16
   macro avg       0.95      0.95      0.95        16
weighted avg       0.95      0.94      0.94        16

Epoch 1/3 - Train Loss: 1.2544 - Val Loss: 0.7242 - Val Accuracy: 78.50%



100%|██████████| 200/200 [08:20<00:00,  2.50s/it]
  2%|▏         | 1/50 [00:00<00:28,  1.69it/s]

tensor([0, 1, 0, 5, 4, 1, 1, 4, 5, 3, 2, 2, 0, 5, 0, 5])


  4%|▍         | 2/50 [00:01<00:27,  1.74it/s]

tensor([0, 5, 5, 5, 1, 3, 1, 1, 4, 4, 0, 1, 1, 5, 0, 5])


  6%|▌         | 3/50 [00:01<00:26,  1.75it/s]

tensor([3, 3, 3, 0, 1, 1, 0, 3, 1, 3, 4, 3, 3, 3, 5, 4])


  8%|▊         | 4/50 [00:02<00:26,  1.74it/s]

tensor([5, 5, 0, 2, 3, 0, 1, 3, 5, 3, 3, 2, 3, 4, 1, 5])


 10%|█         | 5/50 [00:02<00:25,  1.75it/s]

tensor([5, 3, 5, 1, 5, 4, 1, 5, 5, 2, 3, 4, 3, 0, 4, 2])


 12%|█▏        | 6/50 [00:03<00:24,  1.76it/s]

tensor([1, 5, 5, 5, 1, 5, 0, 5, 1, 4, 0, 5, 3, 2, 3, 3])


 14%|█▍        | 7/50 [00:03<00:24,  1.76it/s]

tensor([2, 3, 0, 2, 3, 5, 1, 0, 2, 3, 2, 1, 0, 5, 3, 3])


 16%|█▌        | 8/50 [00:04<00:23,  1.77it/s]

tensor([1, 3, 4, 1, 1, 1, 5, 5, 5, 4, 5, 3, 1, 5, 0, 3])


 18%|█▊        | 9/50 [00:05<00:23,  1.77it/s]

tensor([4, 2, 4, 5, 1, 3, 4, 4, 0, 1, 0, 0, 2, 2, 5, 5])


 20%|██        | 10/50 [00:06<00:28,  1.43it/s]

tensor([5, 0, 2, 2, 2, 5, 4, 1, 3, 1, 5, 0, 4, 3, 3, 1])


 22%|██▏       | 11/50 [00:06<00:25,  1.52it/s]

tensor([3, 3, 0, 3, 0, 3, 5, 3, 4, 5, 3, 5, 5, 5, 5, 2])


 24%|██▍       | 12/50 [00:07<00:24,  1.58it/s]

tensor([0, 5, 5, 4, 5, 3, 1, 2, 5, 3, 2, 5, 3, 5, 0, 3])


 26%|██▌       | 13/50 [00:07<00:22,  1.63it/s]

tensor([2, 5, 1, 0, 1, 4, 3, 0, 5, 4, 5, 1, 1, 0, 5, 2])


 28%|██▊       | 14/50 [00:08<00:21,  1.66it/s]

tensor([3, 4, 4, 3, 2, 5, 3, 3, 4, 3, 5, 2, 0, 2, 4, 0])


 30%|███       | 15/50 [00:08<00:20,  1.69it/s]

tensor([5, 1, 3, 5, 5, 1, 3, 2, 1, 2, 5, 4, 2, 2, 2, 1])


 32%|███▏      | 16/50 [00:09<00:19,  1.72it/s]

tensor([0, 3, 4, 5, 3, 1, 3, 1, 4, 3, 3, 0, 1, 5, 3, 1])


 34%|███▍      | 17/50 [00:10<00:19,  1.72it/s]

tensor([0, 5, 3, 4, 3, 1, 5, 4, 1, 0, 1, 2, 5, 0, 4, 2])


 36%|███▌      | 18/50 [00:10<00:18,  1.69it/s]

tensor([2, 1, 5, 2, 0, 0, 5, 2, 3, 3, 3, 1, 3, 5, 5, 5])


 38%|███▊      | 19/50 [00:11<00:18,  1.67it/s]

tensor([5, 2, 3, 5, 3, 0, 0, 5, 3, 5, 4, 4, 5, 0, 3, 5])


 40%|████      | 20/50 [00:11<00:18,  1.62it/s]

tensor([3, 0, 1, 3, 5, 0, 0, 3, 1, 0, 5, 3, 5, 3, 3, 5])


 42%|████▏     | 21/50 [00:12<00:18,  1.60it/s]

tensor([3, 0, 3, 2, 3, 5, 0, 5, 4, 1, 3, 0, 1, 3, 1, 3])


 44%|████▍     | 22/50 [00:13<00:17,  1.61it/s]

tensor([2, 4, 0, 5, 0, 0, 3, 0, 1, 3, 5, 5, 1, 3, 0, 5])


 46%|████▌     | 23/50 [00:13<00:16,  1.60it/s]

tensor([3, 1, 1, 0, 5, 1, 5, 1, 4, 0, 1, 0, 5, 3, 0, 1])


 48%|████▊     | 24/50 [00:14<00:16,  1.60it/s]

tensor([3, 3, 1, 1, 5, 4, 3, 3, 3, 0, 3, 0, 2, 4, 3, 1])


 50%|█████     | 25/50 [00:15<00:15,  1.60it/s]

tensor([2, 0, 5, 3, 0, 3, 5, 4, 4, 2, 1, 4, 2, 0, 4, 2])


 52%|█████▏    | 26/50 [00:15<00:15,  1.59it/s]

tensor([2, 0, 5, 5, 0, 2, 3, 5, 5, 1, 1, 3, 3, 3, 3, 3])


 54%|█████▍    | 27/50 [00:16<00:14,  1.57it/s]

tensor([2, 5, 3, 3, 5, 5, 5, 2, 3, 5, 0, 1, 2, 2, 0, 0])


 56%|█████▌    | 28/50 [00:17<00:14,  1.56it/s]

tensor([3, 5, 0, 2, 4, 1, 5, 3, 5, 5, 2, 2, 4, 0, 0, 1])


 58%|█████▊    | 29/50 [00:17<00:13,  1.55it/s]

tensor([5, 1, 5, 1, 3, 2, 4, 3, 1, 5, 2, 3, 5, 4, 3, 3])


 60%|██████    | 30/50 [00:18<00:12,  1.57it/s]

tensor([1, 5, 5, 1, 4, 5, 3, 0, 2, 2, 3, 1, 5, 3, 5, 1])


 62%|██████▏   | 31/50 [00:18<00:11,  1.59it/s]

tensor([5, 5, 1, 3, 3, 3, 4, 3, 3, 3, 3, 0, 5, 0, 3, 1])


 64%|██████▍   | 32/50 [00:19<00:11,  1.60it/s]

tensor([4, 3, 5, 3, 0, 5, 5, 5, 3, 1, 2, 1, 3, 1, 2, 5])


 66%|██████▌   | 33/50 [00:20<00:10,  1.58it/s]

tensor([5, 0, 3, 3, 0, 3, 3, 0, 3, 4, 0, 4, 3, 1, 1, 4])


 68%|██████▊   | 34/50 [00:20<00:10,  1.56it/s]

tensor([5, 3, 3, 0, 5, 5, 3, 0, 4, 5, 4, 2, 1, 5, 4, 2])


 70%|███████   | 35/50 [00:21<00:09,  1.55it/s]

tensor([5, 3, 5, 3, 2, 0, 3, 3, 5, 4, 0, 3, 0, 5, 1, 4])


 72%|███████▏  | 36/50 [00:22<00:08,  1.57it/s]

tensor([0, 5, 5, 2, 0, 0, 0, 1, 3, 0, 0, 3, 3, 1, 5, 3])


 74%|███████▍  | 37/50 [00:22<00:08,  1.58it/s]

tensor([5, 4, 1, 5, 2, 5, 2, 3, 3, 4, 5, 5, 3, 3, 5, 3])


 76%|███████▌  | 38/50 [00:23<00:07,  1.63it/s]

tensor([4, 5, 5, 5, 5, 1, 5, 0, 4, 3, 2, 5, 5, 5, 5, 4])


 78%|███████▊  | 39/50 [00:23<00:06,  1.66it/s]

tensor([3, 4, 2, 4, 3, 3, 4, 3, 1, 5, 4, 3, 3, 0, 0, 0])


 80%|████████  | 40/50 [00:24<00:05,  1.69it/s]

tensor([1, 5, 5, 1, 3, 4, 3, 3, 3, 0, 5, 2, 4, 4, 2, 5])


 82%|████████▏ | 41/50 [00:25<00:05,  1.71it/s]

tensor([2, 5, 5, 3, 3, 1, 1, 5, 3, 2, 3, 5, 0, 5, 5, 4])


 84%|████████▍ | 42/50 [00:25<00:04,  1.67it/s]

tensor([0, 5, 5, 3, 3, 3, 2, 0, 1, 2, 1, 0, 0, 1, 5, 4])


 86%|████████▌ | 43/50 [00:26<00:04,  1.64it/s]

tensor([4, 4, 5, 0, 3, 1, 3, 5, 1, 3, 1, 3, 4, 0, 1, 4])


 88%|████████▊ | 44/50 [00:26<00:03,  1.63it/s]

tensor([1, 4, 1, 5, 5, 1, 3, 1, 1, 3, 1, 5, 5, 2, 0, 5])


 90%|█████████ | 45/50 [00:27<00:03,  1.64it/s]

tensor([5, 1, 3, 3, 5, 5, 0, 4, 2, 5, 1, 4, 0, 3, 5, 4])


 92%|█████████▏| 46/50 [00:28<00:02,  1.59it/s]

tensor([2, 3, 1, 3, 5, 4, 3, 3, 3, 1, 5, 5, 0, 3, 2, 3])


 94%|█████████▍| 47/50 [00:28<00:01,  1.57it/s]

tensor([1, 0, 0, 1, 5, 3, 3, 2, 3, 4, 2, 5, 5, 3, 3, 5])


 96%|█████████▌| 48/50 [00:29<00:01,  1.57it/s]

tensor([4, 5, 1, 3, 4, 3, 1, 1, 5, 1, 0, 3, 3, 3, 3, 4])


 98%|█████████▊| 49/50 [00:30<00:00,  1.57it/s]

tensor([5, 3, 3, 0, 4, 5, 3, 1, 5, 1, 0, 5, 4, 3, 3, 2])


100%|██████████| 50/50 [00:30<00:00,  1.62it/s]

tensor([0, 5, 0, 0, 0, 0, 3, 3, 3, 0, 5, 3, 5, 3, 3, 3])
              precision    recall  f1-score   support

           0       1.00      0.86      0.92         7
           3       0.86      1.00      0.92         6
           5       1.00      1.00      1.00         3

    accuracy                           0.94        16
   macro avg       0.95      0.95      0.95        16
weighted avg       0.95      0.94      0.94        16

Epoch 2/3 - Train Loss: 0.5900 - Val Loss: 0.5656 - Val Accuracy: 81.12%



100%|██████████| 200/200 [08:25<00:00,  2.53s/it]
  2%|▏         | 1/50 [00:00<00:31,  1.54it/s]

tensor([0, 1, 0, 5, 4, 1, 1, 4, 5, 3, 2, 2, 0, 5, 0, 5])


  4%|▍         | 2/50 [00:01<00:30,  1.58it/s]

tensor([0, 5, 5, 5, 1, 3, 1, 1, 4, 4, 0, 1, 1, 5, 0, 5])


  6%|▌         | 3/50 [00:01<00:28,  1.64it/s]

tensor([3, 3, 3, 0, 1, 1, 0, 3, 1, 3, 4, 3, 3, 3, 5, 4])


  8%|▊         | 4/50 [00:02<00:27,  1.68it/s]

tensor([5, 5, 0, 2, 3, 0, 1, 3, 5, 3, 3, 2, 3, 4, 1, 5])


 10%|█         | 5/50 [00:03<00:26,  1.67it/s]

tensor([5, 3, 5, 1, 5, 4, 1, 5, 5, 2, 3, 4, 3, 0, 4, 2])


 12%|█▏        | 6/50 [00:03<00:27,  1.60it/s]

tensor([1, 5, 5, 5, 1, 5, 0, 5, 1, 4, 0, 5, 3, 2, 3, 3])


 14%|█▍        | 7/50 [00:04<00:26,  1.60it/s]

tensor([2, 3, 0, 2, 3, 5, 1, 0, 2, 3, 2, 1, 0, 5, 3, 3])


 16%|█▌        | 8/50 [00:04<00:26,  1.60it/s]

tensor([1, 3, 4, 1, 1, 1, 5, 5, 5, 4, 5, 3, 1, 5, 0, 3])


 18%|█▊        | 9/50 [00:05<00:25,  1.58it/s]

tensor([4, 2, 4, 5, 1, 3, 4, 4, 0, 1, 0, 0, 2, 2, 5, 5])


 20%|██        | 10/50 [00:06<00:25,  1.59it/s]

tensor([5, 0, 2, 2, 2, 5, 4, 1, 3, 1, 5, 0, 4, 3, 3, 1])


 22%|██▏       | 11/50 [00:06<00:23,  1.64it/s]

tensor([3, 3, 0, 3, 0, 3, 5, 3, 4, 5, 3, 5, 5, 5, 5, 2])


 24%|██▍       | 12/50 [00:07<00:23,  1.61it/s]

tensor([0, 5, 5, 4, 5, 3, 1, 2, 5, 3, 2, 5, 3, 5, 0, 3])


 26%|██▌       | 13/50 [00:08<00:23,  1.60it/s]

tensor([2, 5, 1, 0, 1, 4, 3, 0, 5, 4, 5, 1, 1, 0, 5, 2])


 28%|██▊       | 14/50 [00:08<00:22,  1.57it/s]

tensor([3, 4, 4, 3, 2, 5, 3, 3, 4, 3, 5, 2, 0, 2, 4, 0])


 30%|███       | 15/50 [00:09<00:22,  1.57it/s]

tensor([5, 1, 3, 5, 5, 1, 3, 2, 1, 2, 5, 4, 2, 2, 2, 1])


 32%|███▏      | 16/50 [00:10<00:21,  1.57it/s]

tensor([0, 3, 4, 5, 3, 1, 3, 1, 4, 3, 3, 0, 1, 5, 3, 1])


 34%|███▍      | 17/50 [00:10<00:20,  1.60it/s]

tensor([0, 5, 3, 4, 3, 1, 5, 4, 1, 0, 1, 2, 5, 0, 4, 2])


 36%|███▌      | 18/50 [00:11<00:20,  1.56it/s]

tensor([2, 1, 5, 2, 0, 0, 5, 2, 3, 3, 3, 1, 3, 5, 5, 5])


 38%|███▊      | 19/50 [00:11<00:19,  1.61it/s]

tensor([5, 2, 3, 5, 3, 0, 0, 5, 3, 5, 4, 4, 5, 0, 3, 5])


 40%|████      | 20/50 [00:12<00:18,  1.60it/s]

tensor([3, 0, 1, 3, 5, 0, 0, 3, 1, 0, 5, 3, 5, 3, 3, 5])


 42%|████▏     | 21/50 [00:13<00:18,  1.58it/s]

tensor([3, 0, 3, 2, 3, 5, 0, 5, 4, 1, 3, 0, 1, 3, 1, 3])


 44%|████▍     | 22/50 [00:13<00:17,  1.57it/s]

tensor([2, 4, 0, 5, 0, 0, 3, 0, 1, 3, 5, 5, 1, 3, 0, 5])


 46%|████▌     | 23/50 [00:14<00:17,  1.58it/s]

tensor([3, 1, 1, 0, 5, 1, 5, 1, 4, 0, 1, 0, 5, 3, 0, 1])


 48%|████▊     | 24/50 [00:15<00:16,  1.56it/s]

tensor([3, 3, 1, 1, 5, 4, 3, 3, 3, 0, 3, 0, 2, 4, 3, 1])


 50%|█████     | 25/50 [00:15<00:16,  1.52it/s]

tensor([2, 0, 5, 3, 0, 3, 5, 4, 4, 2, 1, 4, 2, 0, 4, 2])


 52%|█████▏    | 26/50 [00:16<00:15,  1.53it/s]

tensor([2, 0, 5, 5, 0, 2, 3, 5, 5, 1, 1, 3, 3, 3, 3, 3])


 54%|█████▍    | 27/50 [00:17<00:15,  1.51it/s]

tensor([2, 5, 3, 3, 5, 5, 5, 2, 3, 5, 0, 1, 2, 2, 0, 0])


 56%|█████▌    | 28/50 [00:17<00:14,  1.54it/s]

tensor([3, 5, 0, 2, 4, 1, 5, 3, 5, 5, 2, 2, 4, 0, 0, 1])


 58%|█████▊    | 29/50 [00:18<00:13,  1.51it/s]

tensor([5, 1, 5, 1, 3, 2, 4, 3, 1, 5, 2, 3, 5, 4, 3, 3])


 60%|██████    | 30/50 [00:19<00:13,  1.50it/s]

tensor([1, 5, 5, 1, 4, 5, 3, 0, 2, 2, 3, 1, 5, 3, 5, 1])


 62%|██████▏   | 31/50 [00:19<00:12,  1.51it/s]

tensor([5, 5, 1, 3, 3, 3, 4, 3, 3, 3, 3, 0, 5, 0, 3, 1])


 64%|██████▍   | 32/50 [00:20<00:11,  1.51it/s]

tensor([4, 3, 5, 3, 0, 5, 5, 5, 3, 1, 2, 1, 3, 1, 2, 5])


 66%|██████▌   | 33/50 [00:21<00:11,  1.49it/s]

tensor([5, 0, 3, 3, 0, 3, 3, 0, 3, 4, 0, 4, 3, 1, 1, 4])


 68%|██████▊   | 34/50 [00:21<00:10,  1.54it/s]

tensor([5, 3, 3, 0, 5, 5, 3, 0, 4, 5, 4, 2, 1, 5, 4, 2])


 70%|███████   | 35/50 [00:22<00:09,  1.60it/s]

tensor([5, 3, 5, 3, 2, 0, 3, 3, 5, 4, 0, 3, 0, 5, 1, 4])


 72%|███████▏  | 36/50 [00:22<00:08,  1.65it/s]

tensor([0, 5, 5, 2, 0, 0, 0, 1, 3, 0, 0, 3, 3, 1, 5, 3])


 74%|███████▍  | 37/50 [00:23<00:07,  1.63it/s]

tensor([5, 4, 1, 5, 2, 5, 2, 3, 3, 4, 5, 5, 3, 3, 5, 3])


 76%|███████▌  | 38/50 [00:24<00:07,  1.65it/s]

tensor([4, 5, 5, 5, 5, 1, 5, 0, 4, 3, 2, 5, 5, 5, 5, 4])


 78%|███████▊  | 39/50 [00:24<00:06,  1.67it/s]

tensor([3, 4, 2, 4, 3, 3, 4, 3, 1, 5, 4, 3, 3, 0, 0, 0])


 80%|████████  | 40/50 [00:25<00:05,  1.68it/s]

tensor([1, 5, 5, 1, 3, 4, 3, 3, 3, 0, 5, 2, 4, 4, 2, 5])


 82%|████████▏ | 41/50 [00:25<00:05,  1.71it/s]

tensor([2, 5, 5, 3, 3, 1, 1, 5, 3, 2, 3, 5, 0, 5, 5, 4])


 84%|████████▍ | 42/50 [00:26<00:04,  1.71it/s]

tensor([0, 5, 5, 3, 3, 3, 2, 0, 1, 2, 1, 0, 0, 1, 5, 4])


 86%|████████▌ | 43/50 [00:26<00:04,  1.72it/s]

tensor([4, 4, 5, 0, 3, 1, 3, 5, 1, 3, 1, 3, 4, 0, 1, 4])


 88%|████████▊ | 44/50 [00:27<00:03,  1.72it/s]

tensor([1, 4, 1, 5, 5, 1, 3, 1, 1, 3, 1, 5, 5, 2, 0, 5])


 90%|█████████ | 45/50 [00:28<00:02,  1.73it/s]

tensor([5, 1, 3, 3, 5, 5, 0, 4, 2, 5, 1, 4, 0, 3, 5, 4])


 92%|█████████▏| 46/50 [00:29<00:02,  1.38it/s]

tensor([2, 3, 1, 3, 5, 4, 3, 3, 3, 1, 5, 5, 0, 3, 2, 3])


 94%|█████████▍| 47/50 [00:29<00:02,  1.45it/s]

tensor([1, 0, 0, 1, 5, 3, 3, 2, 3, 4, 2, 5, 5, 3, 3, 5])


 96%|█████████▌| 48/50 [00:30<00:01,  1.50it/s]

tensor([4, 5, 1, 3, 4, 3, 1, 1, 5, 1, 0, 3, 3, 3, 3, 4])


 98%|█████████▊| 49/50 [00:31<00:00,  1.51it/s]

tensor([5, 3, 3, 0, 4, 5, 3, 1, 5, 1, 0, 5, 4, 3, 3, 2])


100%|██████████| 50/50 [00:31<00:00,  1.58it/s]

tensor([0, 5, 0, 0, 0, 0, 3, 3, 3, 0, 5, 3, 5, 3, 3, 3])
              precision    recall  f1-score   support

           0       0.83      0.83      0.83         6
           3       0.71      1.00      0.83         5
           4       0.00      0.00      0.00         2
           5       1.00      1.00      1.00         3

    accuracy                           0.81        16
   macro avg       0.64      0.71      0.67        16
weighted avg       0.72      0.81      0.76        16

Epoch 3/3 - Train Loss: 0.3704 - Val Loss: 0.5763 - Val Accuracy: 81.50%



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [11]:
# To use the fine-tuned model for inference:
loaded_model = BertForSequenceClassification.from_pretrained(f"fine_tuned_bert")
loaded_model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [12]:
loaded_model.eval()

all_preds = []

with torch.no_grad():
  for text in tqdm(test_text):
      tokenized_sentence = tokenizer(text, truncation=True, padding='max_length', max_length=max_seq_length, return_tensors='pt')
      input_ids = tokenized_sentence["input_ids"].to(device)
      attention_mask = tokenized_sentence["attention_mask"].to(device)

      outputs = loaded_model(input_ids, attention_mask=attention_mask)
      logits = outputs.logits
      predictions = torch.argmax(logits, dim=1)
      all_preds.extend(predictions.cpu().numpy())

100%|██████████| 1000/1000 [00:25<00:00, 39.56it/s]


In [13]:
bert_accuracy = classification_report(test_labels, all_preds, output_dict=True, zero_division=0)['accuracy']
print("The accuracy of the fine-tuned BERT model is: ", bert_accuracy)

The accuracy of the fine-tuned BERT model is:  0.834
