In [17]:
 import argparse
 
 import torch
 from torch.optim import AdamW
 from torch.utils.data import DataLoader
 from torch import nn
 
 
 import evaluate
 from accelerate import Accelerator, DistributedType
 from datasets import load_dataset
 from transformers import AutoModelForSequenceClassification, BertForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed
 
 import numpy as np
 import pandas as pd
 

In [30]:
 class TextClassificationDataset2(torch.utils.data.Dataset):
 
     def __init__(self, filename):
         self.text = []
         self.labels = []
         with open(filename, 'r') as f:
           for line in f:
             text, label = self.split_by_last_comma(line)
             self.text.append(text)
             self.labels.append(torch.tensor(int(label[0])))
             # print(int(label[0]))
             # print(torch.tensor(int(label[0])))
         self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
         self.max_length = 100
 
     def __len__(self):
         return len(self.text)
 
     def __getitem__(self, idx):
         text = self.text[idx]
         label = self.labels[idx]
         encoding = self.tokenizer(text, return_tensors='pt', max_length=self.max_length, padding='max_length', 
                                   truncation=True)
         return {'input_ids': encoding['input_ids'].flatten(), 'attention_mask': encoding['attention_mask'].flatten(),
                 'label': label}
 
     def split_by_last_comma(self, text):
       return text.rsplit(",", 1) if "," in text else None

In [32]:
 train_dataset = TextClassificationDataset2("./balanced_labeled_commentary.txt")
 eval_dataset = TextClassificationDataset2("./eval.txt")
 # for item in train_dataset:
 #     print(item)
 # for item in eval_dataset:
 #     print(item)

<__main__.TextClassificationDataset2 object at 0x29a45ad90>


In [33]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=16)
eval_dataloader = torch.utils.data.DataLoader(eval_dataset, batch_size=32)

In [19]:
# tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
# for batch in eval_dataloader:
#     for e in batch["input_ids"]:
#         print(tokenizer.decode(e))
#         print("\n")

# for batch in eval_dataloader:
#     for i in range(len(batch["label"])):
#         print(tokenizer.decode(batch["input_ids"][i]))
#         print(batch["label"][i])
#         print("\n")

In [20]:
# tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
# soft = torch.nn.Softmax(dim=1)
# def training_function(config, device):
#      # Sample hyper-parameters for learning rate, batch size, seed and a few other HPs
#      lr = config["lr"]
#      num_epochs = int(config["num_epochs"])
#      seed = int(config["seed"])
#      batch_size = int(config["batch_size"])
 
#      #metric = evaluate.load("glue", "mrpc")
#      #accuracy_metric = evaluate.load("accuracy");
#      f1_metric = evaluate.load("f1", average="weighted");
#      f1_metric.average="weighted"
#      f1_micro = evaluate.load("f1", average="micro");
#      f1_micro.average="micro"
#      # If the batch size is too big we use gradient accumulation
#      gradient_accumulation_steps = 1
 
#      set_seed(seed)
#      # train_dataloader, eval_dataloader = get_dataloaders(batch_size)
#      model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", return_dict=True)
#      model = model.to(device)
 
#      # Instantiate optimizer
#      optimizer = AdamW(params=model.parameters(), lr=lr)
 
#      # Instantiate scheduler
#      lr_scheduler = get_linear_schedule_with_warmup(
#          optimizer=optimizer,
#          num_warmup_steps=100,
#          num_training_steps=(len(train_dataloader) * num_epochs) // gradient_accumulation_steps,
#      )
 
#      for epoch in range(num_epochs):
#          model.train()
#          for step, batch in enumerate(train_dataloader):
#              input_ids = batch['input_ids'].to(device)
#              attention_mask = batch['attention_mask'].to(device)
#              labels = batch['label'].to(device)
#              outputs = model(input_ids=input_ids, attention_mask=attention_mask)
#              if step % 10 == 0:
#                steps = "10 steps: " + str(step)
#                # print(step)
#                # print(input_ids)
#                # print(type(input_ids))
#                # print(attention_mask)
#                # print(labels)
#                # print(outputs)

#                for i in range(len(batch["label"])):
#                  print(tokenizer.decode(batch["input_ids"][i]))
#                  print(batch["label"][i])
#                  print(soft(outputs.logits))
                 
#                  print("\n")
                
#                 #print(tokenizer.decode(token_ids=input_ids))
#              #batch.to(device)

#              #loss = outputs.loss
#              #loss = loss / gradient_accumulation_steps
#              loss = nn.CrossEntropyLoss()(outputs.logits, labels)
#              loss.backward()
#              if step % gradient_accumulation_steps == 0:
#                  optimizer.step()
#                  lr_scheduler.step()
#                  optimizer.zero_grad()
#          print(loss)
#          model.eval()
#          samples_seen = 0
#          for step, batch in enumerate(eval_dataloader):
#              #batch.to(device)
#              input_ids_test = batch['input_ids'].to(device)
#              attention_mask_test = batch['attention_mask'].to(device)
#              labels_test = batch['label'].to(device)
#              with torch.no_grad():
#                  outputs_test = model(input_ids=input_ids_test, attention_mask=attention_mask_test)
#              predictions = outputs_test.logits.argmax(dim=-1)
#              f1_metric.add_batch(
#                  predictions=predictions,
#                  references=labels_test,
#              )
#              f1_micro.add_batch(
#                  predictions=predictions,
#                  references=labels_test,
#              )
 
#          eval_metric = f1_metric.compute(average="weighted")
#          eval_metric2 = f1_micro.compute(average="micro")
#          # Use accelerator.print to print only on the main process.
#          print(f"epoch {epoch}:", eval_metric)
#          print(f"epoch {epoch}:", eval_metric2)

In [None]:
 # def main(device):
 #     config = {"lr": 2e-5, "num_epochs": 3, "seed": 42, "batch_size": 16}
 #     training_function(config, device)
 
 # from time import time
 
 # # for device in ["cpu", "mps"]:
 # for device in ["mps"]:
 #     device = torch.device(device)
 #     print(device)
 #     start_time = time()
 #     main(device)
 #     end_time = time()
 #     print(f"time taken in seconds for training using device {device}: {end_time-start_time}")

In [34]:
device = "mps"
model_name = "bert-base-uncased"  # Replace with your desired BERT model
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=4)  # Adjust num_labels for your classification task
model.to(device)
optimizer = AdamW(model.parameters(), lr=2e-5)  # Adjust learning rate (lr)
model.device

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


device(type='mps', index=0)

In [35]:
from tqdm.auto import tqdm

from sklearn.metrics import f1_score

def evaluate(model, eval_dataloader):
  model.eval()  # Set model to evaluation mode
  num_correct = 0
  num_samples = 0
  f1_scores = torch.zeros(4)
  with torch.no_grad():  # Disable gradient calculation for evaluation
    for batch in eval_dataloader:
      #input_ids, attention_mask, labels = batch
      input_ids = batch["input_ids"].to(device)
      attention_mask = batch["attention_mask"].to(device)
      labels = batch["label"].to(device) 
      outputs = model(input_ids, attention_mask=attention_mask)
      predictions = torch.argmax(outputs.logits, dim=1)  # Get predicted class labels
      print(labels)
      print(predictions)

      num_correct += torch.sum(predictions == labels).item()
      num_samples += labels.size(0)  # Count total samples
      for i in range(4):  # Assuming 4 classes (0 to 3)
        f1_scores[i] += f1_score(labels.cpu() == i, predictions.cpu() == i, average='binary')
  eval_acc = num_correct / num_samples
  f1_scores /= len(eval_dataloader)
  print(f"Evaluation Accuracy: {eval_acc:.4f}")
  print(f"F1 Scores: {f1_scores.tolist()}")
  return eval_acc  # Optional: Return accuracy for potential early stopping

# eval_acc = evaluate(model, train_dataloader)  # Evaluate on validation set
# # Optional: Early stopping based on evaluation metric (e.g., eval_acc)

pbar.set_description(f"Epoch {epoch+1} - Loss: {loss.item():.4f} - Eval Acc: {eval_acc:.4f}")
for epoch in range(5):
  with tqdm(train_dataloader, unit="batch") as pbar:
    model.train()
    for batch in pbar:
      #print(batch["input_ids"])
      input_ids = batch["input_ids"].to(device)
      attention_mask = batch["attention_mask"].to(device)
      labels = batch["label"].to(device)
      outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
      predictions = torch.argmax(outputs.logits, dim=1)
      #print(len(labels))
      print("labels " + str(labels))
      #print(len(predictions))
      print("predictions: " + str(predictions))
      loss = outputs.loss
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()

      pbar.set_description(f"Epoch {epoch+1} - Loss: {loss.item():.4f}")

  # Evaluation step
  eval_acc = evaluate(model, train_dataloader)  # Evaluate on validation set
  # Optional: Early stopping based on evaluation metric (e.g., eval_acc)

  pbar.set_description(f"Epoch {epoch+1} - Loss: {loss.item():.4f} - Eval Acc: {eval_acc:.4f}")

  0%|          | 0/37 [00:00<?, ?batch/s]

labels tensor([3, 3, 1, 3, 0, 0, 3, 0, 2, 2, 3, 1, 3, 1, 2, 3], device='mps:0')
predictions: tensor([0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 3, 3, 3], device='mps:0')
labels tensor([2, 3, 3, 1, 1, 1, 2, 2, 3, 1, 2, 1, 2, 2, 3, 2], device='mps:0')
predictions: tensor([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 3], device='mps:0')
labels tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
predictions: tensor([3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 1, 3, 3, 0], device='mps:0')
labels tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
predictions: tensor([3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], device='mps:0')
labels tensor([3, 1, 3, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
predictions: tensor([3, 1, 3, 3, 1, 3, 1, 1, 3, 3, 3, 3, 3, 3, 3, 1], device='mps:0')
labels tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
predictions: tensor([1, 3, 1, 1, 1, 1, 1, 3, 3, 1, 3, 3, 1, 3, 1, 1], device='mps:0')
labe

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
tensor([0, 1, 0, 2, 1, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 2, 0, 1, 3], device='mps:0')
tensor([3, 1, 3, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
tensor([3, 1, 2, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
tensor([0, 3, 3, 3, 2, 3, 1, 3, 2, 2, 0, 2, 0, 3, 1, 2], device='mps:0')
tensor([0, 3, 3, 3, 2, 3, 1, 3, 2, 2, 0, 2, 0, 3, 1, 2], device='mps:0')
tensor([1, 3, 1, 3, 1, 0, 2, 1, 1, 0, 2, 3, 3, 0, 3, 3], device='mps:0')
tensor([1, 2, 1, 3, 1, 0, 2, 1, 1, 0, 2, 3, 3, 0, 3, 3], device='mps:0')
tensor([1, 2, 1, 1, 2, 3, 3, 2, 2, 0, 2, 1, 0, 3, 1, 0], device='mps:0')
tensor([1, 2, 1, 1, 2, 3, 3, 2, 2, 0, 2, 1, 0, 3, 1

  0%|          | 0/37 [00:00<?, ?batch/s]

labels tensor([3, 3, 1, 3, 0, 0, 3, 0, 2, 2, 3, 1, 3, 1, 2, 3], device='mps:0')
predictions: tensor([3, 3, 1, 3, 0, 0, 3, 0, 2, 2, 3, 1, 3, 1, 2, 3], device='mps:0')
labels tensor([2, 3, 3, 1, 1, 1, 2, 2, 3, 1, 2, 1, 2, 2, 3, 2], device='mps:0')
predictions: tensor([2, 3, 3, 1, 1, 1, 2, 2, 3, 1, 2, 1, 2, 2, 3, 2], device='mps:0')
labels tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
predictions: tensor([0, 1, 0, 2, 1, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
labels tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
predictions: tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 2, 0, 1, 3], device='mps:0')
labels tensor([3, 1, 3, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
predictions: tensor([3, 1, 2, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
labels tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
predictions: tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
labe

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
tensor([3, 1, 3, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
tensor([3, 1, 2, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
tensor([0, 3, 3, 3, 2, 3, 1, 3, 2, 2, 0, 2, 0, 3, 1, 2], device='mps:0')
tensor([0, 3, 3, 3, 2, 3, 1, 3, 2, 2, 0, 2, 0, 3, 1, 2], device='mps:0')
tensor([1, 3, 1, 3, 1, 0, 2, 1, 1, 0, 2, 3, 3, 0, 3, 3], device='mps:0')
tensor([1, 2, 1, 3, 1, 0, 2, 1, 1, 0, 2, 3, 3, 0, 3, 3], device='mps:0')
tensor([1, 2, 1, 1, 2, 3, 3, 2, 2, 0, 2, 1, 0, 3, 1, 0], device='mps:0')
tensor([1, 2, 1, 1, 2, 3, 3, 2, 2, 0, 2, 1, 0, 3, 1, 0], device='mps:0')
tensor([3, 1, 1, 2, 1, 2, 1, 2, 1, 3, 2, 1, 0, 2, 1, 1], device='mps:0')
tensor([3, 1, 1, 2, 1, 2, 1, 2, 1, 3, 2, 1, 0, 2, 1

  0%|          | 0/37 [00:00<?, ?batch/s]

labels tensor([3, 3, 1, 3, 0, 0, 3, 0, 2, 2, 3, 1, 3, 1, 2, 3], device='mps:0')
predictions: tensor([3, 3, 1, 3, 0, 0, 3, 0, 2, 2, 3, 1, 3, 1, 2, 3], device='mps:0')
labels tensor([2, 3, 3, 1, 1, 1, 2, 2, 3, 1, 2, 1, 2, 2, 3, 2], device='mps:0')
predictions: tensor([2, 3, 3, 1, 1, 1, 2, 2, 3, 1, 2, 1, 2, 2, 3, 2], device='mps:0')
labels tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
predictions: tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
labels tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
predictions: tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 2, 0, 1, 3], device='mps:0')
labels tensor([3, 1, 3, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
predictions: tensor([3, 1, 2, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
labels tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
predictions: tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
labe

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
tensor([3, 1, 3, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
tensor([3, 1, 2, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
tensor([0, 3, 3, 3, 2, 3, 1, 3, 2, 2, 0, 2, 0, 3, 1, 2], device='mps:0')
tensor([0, 3, 3, 3, 2, 3, 1, 3, 2, 2, 0, 2, 0, 3, 1, 2], device='mps:0')
tensor([1, 3, 1, 3, 1, 0, 2, 1, 1, 0, 2, 3, 3, 0, 3, 3], device='mps:0')
tensor([1, 3, 1, 3, 1, 0, 2, 1, 1, 0, 2, 3, 3, 0, 3, 3], device='mps:0')
tensor([1, 2, 1, 1, 2, 3, 3, 2, 2, 0, 2, 1, 0, 3, 1, 0], device='mps:0')
tensor([1, 2, 1, 1, 2, 3, 3, 2, 2, 0, 2, 1, 0, 3, 1

  0%|          | 0/37 [00:00<?, ?batch/s]

labels tensor([3, 3, 1, 3, 0, 0, 3, 0, 2, 2, 3, 1, 3, 1, 2, 3], device='mps:0')
predictions: tensor([3, 3, 1, 3, 0, 0, 3, 0, 2, 2, 3, 1, 3, 1, 2, 3], device='mps:0')
labels tensor([2, 3, 3, 1, 1, 1, 2, 2, 3, 1, 2, 1, 2, 2, 3, 2], device='mps:0')
predictions: tensor([2, 3, 3, 1, 1, 1, 2, 2, 3, 1, 2, 1, 2, 2, 3, 2], device='mps:0')
labels tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
predictions: tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
labels tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
predictions: tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
labels tensor([3, 1, 3, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
predictions: tensor([3, 1, 2, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
labels tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
predictions: tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
labe

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
tensor([3, 1, 3, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
tensor([3, 1, 3, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
tensor([0, 3, 3, 3, 2, 3, 1, 3, 2, 2, 0, 2, 0, 3, 1, 2], device='mps:0')
tensor([0, 3, 3, 3, 2, 3, 1, 3, 2, 2, 0, 2, 0, 3, 1, 2], device='mps:0')
tensor([1, 3, 1, 3, 1, 0, 2, 1, 1, 0, 2, 3, 3, 0, 3, 3], device='mps:0')
tensor([1, 3, 1, 3, 1, 0, 2, 1, 1, 0, 2, 3, 3, 0, 3, 3], device='mps:0')
tensor([1, 2, 1, 1, 2, 3, 3, 2, 2, 0, 2, 1, 0, 3, 1, 0], device='mps:0')
tensor([1, 2, 1, 1, 2, 3, 3, 2, 2, 0, 2, 1, 0, 3, 1

  0%|          | 0/37 [00:00<?, ?batch/s]

labels tensor([3, 3, 1, 3, 0, 0, 3, 0, 2, 2, 3, 1, 3, 1, 2, 3], device='mps:0')
predictions: tensor([3, 3, 1, 3, 0, 0, 3, 0, 2, 2, 3, 1, 3, 1, 2, 3], device='mps:0')
labels tensor([2, 3, 3, 1, 1, 1, 2, 2, 3, 1, 2, 1, 2, 2, 3, 2], device='mps:0')
predictions: tensor([2, 3, 3, 1, 1, 1, 2, 2, 3, 1, 2, 1, 2, 2, 3, 2], device='mps:0')
labels tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
predictions: tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
labels tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
predictions: tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
labels tensor([3, 1, 3, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
predictions: tensor([3, 1, 3, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
labels tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
predictions: tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
labe

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
tensor([0, 1, 0, 2, 3, 1, 2, 1, 0, 3, 1, 2, 3, 1, 0, 0], device='mps:0')
tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
tensor([2, 1, 2, 1, 1, 3, 1, 2, 2, 0, 0, 2, 3, 0, 1, 3], device='mps:0')
tensor([3, 1, 3, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
tensor([3, 1, 3, 1, 1, 0, 0, 2, 3, 3, 2, 2, 2, 3, 3, 1], device='mps:0')
tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
tensor([2, 3, 0, 0, 1, 1, 1, 2, 1, 1, 3, 3, 1, 2, 1, 2], device='mps:0')
tensor([0, 3, 3, 3, 2, 3, 1, 3, 2, 2, 0, 2, 0, 3, 1, 2], device='mps:0')
tensor([0, 3, 3, 3, 2, 3, 1, 3, 2, 2, 0, 2, 0, 3, 1, 2], device='mps:0')
tensor([1, 3, 1, 3, 1, 0, 2, 1, 1, 0, 2, 3, 3, 0, 3, 3], device='mps:0')
tensor([1, 3, 1, 3, 1, 0, 2, 1, 1, 0, 2, 3, 3, 0, 3, 3], device='mps:0')
tensor([1, 2, 1, 1, 2, 3, 3, 2, 2, 0, 2, 1, 0, 3, 1, 0], device='mps:0')
tensor([1, 2, 1, 1, 2, 3, 3, 2, 2, 0, 2, 1, 0, 3, 1

In [36]:
from sklearn.metrics import f1_score

def evaluate(model, eval_dataloader):
  model.eval()  # Set model to evaluation mode
  num_correct = 0
  num_samples = 0
  f1_scores = torch.zeros(4)
  with torch.no_grad():  # Disable gradient calculation for evaluation
    for batch in eval_dataloader:
      #input_ids, attention_mask, labels = batch
      input_ids = batch["input_ids"].to(device)
      attention_mask = batch["attention_mask"].to(device)
      labels = batch["label"].to(device) 
      outputs = model(input_ids, attention_mask=attention_mask)
      predictions = torch.argmax(outputs.logits, dim=1)  # Get predicted class labels
      print(labels)
      print(predictions)

      num_correct += torch.sum(predictions == labels).item()
      num_samples += labels.size(0)  # Count total samples
      for i in range(4):  # Assuming 4 classes (0 to 3)
        f1_scores[i] += f1_score(labels.cpu() == i, predictions.cpu() == i, average='binary')
  eval_acc = num_correct / num_samples
  f1_scores /= len(eval_dataloader)
  print(f"Evaluation Accuracy: {eval_acc:.4f}")
  print(f"F1 Scores: {f1_scores.tolist()}")
  return eval_acc  # Optional: Return accuracy for potential early stopping

eval_acc = evaluate(model, eval_dataloader)  # Evaluate on validation set
# Optional: Early stopping based on evaluation metric (e.g., eval_acc)

pbar.set_description(f"Epoch {epoch+1} - Loss: {loss.item():.4f} - Eval Acc: {eval_acc:.4f}")

tensor([1, 0, 0, 1, 2, 0, 2, 0, 0, 2, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2,
        1, 1, 2, 2, 2, 2, 1, 1], device='mps:0')
tensor([1, 0, 0, 1, 2, 0, 2, 0, 0, 2, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2,
        1, 1, 2, 3, 2, 2, 1, 1], device='mps:0')
tensor([2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 2, 2, 1, 1, 0, 1, 0,
        0, 0, 0, 0, 0, 0, 1, 2], device='mps:0')
tensor([2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 3, 2, 2, 1, 1, 0, 1, 0,
        0, 0, 0, 0, 0, 0, 1, 2], device='mps:0')
tensor([0, 2, 1, 2, 2, 2, 0, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 2, 0, 0, 0, 2, 0,
        0, 2, 2, 2, 1, 0, 0, 2], device='mps:0')
tensor([0, 2, 1, 2, 2, 2, 0, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 2, 0, 0, 0, 2, 0,
        0, 2, 2, 3, 1, 0, 0, 2], device='mps:0')
tensor([2, 0, 2, 1, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0,
        2, 1, 2, 1, 1, 0, 1, 2], device='mps:0')
tensor([2, 0, 2, 1, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0,
        2, 1, 2, 

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


tensor([0, 0, 2, 0, 0, 0, 2, 1, 2, 0, 0, 0, 1, 0, 0, 1, 0, 1, 2, 0, 1, 2, 1, 0,
        0, 0, 0, 0, 0, 0, 1, 0], device='mps:0')
tensor([0, 0, 2, 0, 0, 0, 2, 1, 2, 0, 0, 0, 1, 0, 0, 1, 0, 1, 2, 0, 1, 2, 1, 0,
        0, 0, 0, 0, 0, 0, 1, 0], device='mps:0')


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


tensor([2, 0, 0, 2, 2, 1, 1, 2, 1, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 2, 1,
        2, 2, 2, 0, 0, 2, 0, 0], device='mps:0')
tensor([2, 0, 0, 2, 2, 1, 1, 2, 1, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 2, 1,
        2, 2, 2, 0, 0, 2, 0, 0], device='mps:0')


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


tensor([0, 2, 0, 2, 2, 1, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 0, 0,
        0, 0, 2, 0, 0, 0, 1, 0], device='mps:0')
tensor([0, 2, 0, 2, 2, 1, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 0, 0,
        0, 0, 2, 0, 0, 0, 1, 0], device='mps:0')


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


tensor([3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 2, 1, 2, 1, 0, 2, 2, 1, 2, 0, 0, 0, 0,
        0, 1, 0, 0, 2, 0, 2, 0], device='mps:0')
tensor([3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 2, 1, 2, 1, 0, 2, 2, 1, 2, 0, 0, 0, 0,
        0, 1, 0, 0, 2, 0, 2, 0], device='mps:0')
tensor([0, 0, 2, 0, 1, 0, 0, 0, 2, 2, 1, 2, 0, 0, 0, 2, 0, 1, 0, 0, 0, 2, 2, 1,
        2, 0, 0, 2, 0, 0, 2, 1], device='mps:0')
tensor([0, 0, 2, 0, 1, 0, 0, 0, 2, 2, 1, 2, 0, 0, 0, 2, 0, 1, 0, 0, 0, 2, 2, 1,
        2, 0, 0, 2, 0, 0, 2, 1], device='mps:0')


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


tensor([0, 1, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 3], device='mps:0')
tensor([0, 1, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 3], device='mps:0')
Evaluation Accuracy: 0.9901
F1 Scores: [1.0, 1.0, 0.9857698678970337, 0.20000000298023224]


In [None]:
torch.save(model.state_dict(), "model_unbalanced_data.pt")

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
)

In [None]:
import argparse

import torch
from torch.optim import AdamW
from torch.utils.data import DataLoader

import evaluate
from accelerate import Accelerator, DistributedType
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed



MAX_GPU_BATCH_SIZE = 16
EVAL_BATCH_SIZE = 32

def get_dataloaders(batch_size: int = 16):
    """
    Creates a set of `DataLoader`s for the `glue` dataset,
    using "bert-base-cased" as the tokenizer.

    Args:
        batch_size (`int`, *optional*):
            The batch size for the train and validation DataLoaders.
    """
    tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
    datasets = load_dataset("glue", "mrpc")

    def tokenize_function(examples):
        # max_length=None => use the model max length (it's actually the default)
        outputs = tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, max_length=None)
        return outputs

    tokenized_datasets = datasets.map(
        tokenize_function,
        batched=True,
        remove_columns=["idx", "sentence1", "sentence2"],
    )

    # We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
    # transformers library
    tokenized_datasets = tokenized_datasets.rename_column("label", "labels")

    def collate_fn(examples):
        
        return tokenizer.pad(examples, padding="longest", return_tensors="pt")

    # Instantiate dataloaders.
    train_dataloader = DataLoader(
        tokenized_datasets["train"], shuffle=True, collate_fn=collate_fn, batch_size=batch_size
    )
    eval_dataloader = DataLoader(
        tokenized_datasets["validation"], shuffle=False, collate_fn=collate_fn, batch_size=EVAL_BATCH_SIZE
    )

    return train_dataloader, eval_dataloader


In [None]:
def training_function(config, device):
    # Sample hyper-parameters for learning rate, batch size, seed and a few other HPs
    lr = config["lr"]
    num_epochs = int(config["num_epochs"])
    seed = int(config["seed"])
    batch_size = int(config["batch_size"])

    metric = evaluate.load("glue", "mrpc")

    # If the batch size is too big we use gradient accumulation
    gradient_accumulation_steps = 1

    set_seed(seed)
    train_dataloader, eval_dataloader = get_dataloaders(batch_size)
    model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", return_dict=True)
    model = model.to(device)

    # Instantiate optimizer
    optimizer = AdamW(params=model.parameters(), lr=lr)

    # Instantiate scheduler
    lr_scheduler = get_linear_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=100,
        num_training_steps=(len(train_dataloader) * num_epochs) // gradient_accumulation_steps,
    )

    for epoch in range(num_epochs):
        model.train()
        for step, batch in enumerate(train_dataloader):
            batch.to(device)
            outputs = model(**batch)
            loss = outputs.loss
            loss = loss / gradient_accumulation_steps
            loss.backward()
            if step % gradient_accumulation_steps == 0:
                optimizer.step()
                lr_scheduler.step()
                optimizer.zero_grad()
        print(loss)
        model.eval()
        samples_seen = 0
        for step, batch in enumerate(eval_dataloader):
            batch.to(device)
            with torch.no_grad():
                outputs = model(**batch)
            predictions = outputs.logits.argmax(dim=-1)
            metric.add_batch(
                predictions=predictions,
                references=batch["labels"],
            )

        eval_metric = metric.compute()
        # Use accelerator.print to print only on the main process.
        print(f"epoch {epoch}:", eval_metric)

In [None]:
datasets = load_dataset("glue", "mrpc")

In [None]:
datasets["train"][0]

In [None]:
def main(device):
    config = {"lr": 2e-5, "num_epochs": 3, "seed": 42, "batch_size": 16}
    training_function(config, device)

from time import time

# for device in ["cpu", "mps"]:
for device in ["mps", "cpu"]:
    device = torch.device(device)
    print(device)
    start_time = time()
    main(device)
    end_time = time()
    print(f"time taken in seconds for training using device {device}: {end_time-start_time}")