In [1]:
import json
import random
import torch

In [2]:
import pandas as pd

dataset = pd.read_csv("data_full_v1.csv")

In [3]:
dataset['ground_truth'] = dataset.apply(lambda row: 1 if row['label']=='yes' else 0, axis=1)

dataset['prompt'] = dataset.apply(lambda row: (row['prompt']).removeprefix('Imagine a self-contained, hypothetical world with only the following conditions, and without any unmentioned factors or causal relationships: '), axis=1)


In [4]:
dataset['prompt'] = dataset['prompt'].str.lower()

In [5]:
# dataset = dataset[0:1000]

In [6]:
dataset

  # ['prompt'].str.lower()

Unnamed: 0,id,prompt,label,ground_truth
0,4,husband has a direct effect on wife and alarm ...,yes,1
1,7,husband has a direct effect on wife and alarm ...,no,0
2,8,husband has a direct effect on wife and alarm ...,yes,1
3,15,husband has a direct effect on wife and alarm ...,no,0
4,21,husband has a direct effect on wife and alarm ...,no,0
...,...,...,...,...
10107,31012,zuph has a direct effect on glimx. jyka has a ...,yes,1
10108,31014,zuph has a direct effect on glimx. jyka has a ...,yes,1
10109,31015,zuph has a direct effect on glimx. jyka has a ...,no,0
10110,31016,zuph has a direct effect on glimx. jyka has a ...,no,0


In [7]:
dataset = json.loads(dataset.to_json(orient ='records'))

# Shuffle the dataset
random.shuffle(dataset)

# Define split sizes for a 75-25 split
train_size = int(0.75 * len(dataset))
valid_size = len(dataset) - train_size

# Split the dataset
train_data = dataset[:train_size]
validation_data = dataset[train_size:]

In [8]:
from torch.utils.data import Dataset

class CladderDataset(Dataset):
    def __init__(self, cladder_data, tokenizer):
        self.cladder_data = cladder_data
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.cladder_data)

    def __getitem__(self, idx):
        data = self.cladder_data[idx]
        # print(data)
        prompt = data['prompt']
        label = data['ground_truth']

        # print(prompt)
        # print(label)

        # Tokenize
        encoded_input = self.tokenizer(prompt, padding='max_length', truncation=True, return_tensors='pt').to("cuda")
        # print(encoded_input)
        encoded_input['labels'] = torch.tensor(label).to("cuda")

        return encoded_input


In [9]:
# from transformers import BertTokenizer, BertForSequenceClassification
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
from transformers import DebertaTokenizer as tkz
from transformers import DebertaForSequenceClassification as mpc
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load tokenizer and model
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels = 2)

# run = 0
overall_accuracies, accuracies, f1s, precisions, recalls = [], [], [], [], []

# for run in range(2):
for run in range(3):
  model_base = 'microsoft/deberta-base'
  tokenizer = tkz.from_pretrained(model_base)
  model = mpc.from_pretrained(model_base, num_labels = 2)

  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
  model.to(device)

  # Prepare the DataLoader
  cladder_dataset = CladderDataset(train_data, tokenizer)
  dataloader = DataLoader(cladder_dataset, batch_size=8, shuffle=True)

  # Optimizer and Loss Function
  optimizer = optim.Adam(model.parameters(), lr=1e-5)
  loss_fn = torch.nn.CrossEntropyLoss()



  # Training Loop
  model.train()

  #change to 3

  for epoch in range(3):  # Number of epochs
    for batch in dataloader:
        optimizer.zero_grad()

        input_ids = batch['input_ids'].squeeze(1)
        attention_mask = batch['attention_mask'].squeeze(1)
        labels = batch['labels']

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        loss.backward()
        optimizer.step()

    print(f"Epoch: {epoch}, Loss: {loss.item()}")

  validation_dataset = CladderDataset(validation_data, tokenizer)
  validation_dataloader = DataLoader(validation_dataset, batch_size=8)

  model.eval()
  correct_predictions = 0
  total_predictions = 0

  accuracy, f1, precision, recall = [], [], [], []
  batches = 0

  def compute_metrics(y_true, y_pred):
      accuracy = accuracy_score(y_true, y_pred)
      # print(y_true)
      # print(y_pred)
      f1 = f1_score(y_true, y_pred)

      precision = precision_score(y_true, y_pred)
      # print(precision)
      recall = recall_score(y_true, y_pred)
      return [accuracy, f1, precision, recall]

  with torch.no_grad():
      for batch in validation_dataloader:
          input_ids = batch['input_ids'].squeeze(1)
          attention_mask = batch['attention_mask'].squeeze(1)
          labels = batch['labels'].cpu()  # ground truth
          # print(labels)

          outputs = model(input_ids=input_ids, attention_mask=attention_mask)
          predictions = torch.argmax(outputs.logits, dim=1).cpu()  # predictions
          # print(predictions)

          calculated_metrics = compute_metrics(labels, predictions)
          accuracy.append(calculated_metrics[0])
          f1.append(calculated_metrics[1])
          precision.append(calculated_metrics[2])
          recall.append(calculated_metrics[3])
          batches += 1

          correct_predictions += (predictions == labels).sum().item()
          total_predictions += labels.size(0)

  overall_accuracy = correct_predictions / total_predictions
  print(f"Overall Accuracy: {overall_accuracy:.4f}")
  overall_accuracies.append(overall_accuracy)

  accuracy_var = sum(accuracy)/batches
  print(f"Accuracy: {sum(accuracy)/batches:.4f}")
  print(accuracy_var)
  accuracies.append(accuracy_var)

  f1_var = sum(f1)/batches
  print(f"F1: {sum(f1)/batches:.4f}")
  print(f1_var)
  f1s.append(f1_var)

  precision_var = sum(precision)/batches
  print(f"Precision: {sum(precision)/batches:.4f}")
  print(precision_var)
  precisions.append(precision_var)

  recall_var = sum(recall)/batches
  print(f"Recall: {sum(recall)/batches:.4f}")
  print(recall_var)
  recalls.append(precision_var)

  print(f'Number of batches: {batches}')


  # model_path = 'cladder_' + model_base + '_' + str(run) + '.pt'
  # torch.save(model.state_dict(), model_path)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.weight', 'pooler.dense.bias', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: 0, Loss: 0.7088209986686707
Epoch: 1, Loss: 0.6882362961769104
Epoch: 2, Loss: 0.7008388042449951


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Overall Accuracy: 0.4964
Accuracy: 0.4964
0.4964398734177215
F1: 0.0064
0.006389391199517783
Precision: 0.0190
0.0189873417721519
Recall: 0.0039
0.0038502109704641346
Number of batches: 316


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.weight', 'pooler.dense.bias', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: 0, Loss: 0.7116249799728394
Epoch: 1, Loss: 0.686917245388031
Epoch: 2, Loss: 0.6866650581359863


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Overall Accuracy: 0.4964
Accuracy: 0.4964
0.4964398734177215
F1: 0.0000
0.0
Precision: 0.0000
0.0
Recall: 0.0000
0.0
Number of batches: 316


Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.weight', 'pooler.dense.bias', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: 0, Loss: 0.6800844073295593
Epoch: 1, Loss: 0.7021425366401672
Epoch: 2, Loss: 0.685391366481781


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Overall Accuracy: 0.5036
Accuracy: 0.5036
0.5035601265822784
F1: 0.6517
0.6516874124469056
Precision: 0.5036
0.5035601265822784
Recall: 0.9905
0.990506329113924
Number of batches: 316


In [None]:
yfrom sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

overall_accuracies, accuracies, f1s, precisions, recalls = [], [], [], [], []

for run in range(3):
  model_path = 'cladder_' + model_base + '_' + str(run) + '.pt'
  print(model_path)

  model = mpc.from_pretrained(model_base, num_labels = 2)
  model.load_state_dict(torch.load(model_path))

  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
  model.to(device)

  tokenizer = tkz.from_pretrained(model_base)
  validation_dataset = CladderDataset(validation_data, tokenizer)
  validation_dataloader = DataLoader(validation_dataset, batch_size=8)




  model.eval()
  correct_predictions = 0
  total_predictions = 0

  accuracy, f1, precision, recall = [], [], [], []
  batches = 0

  def compute_metrics(y_true, y_pred):
      accuracy = accuracy_score(y_true, y_pred)
      # print(y_true)
      # print(y_pred)
      f1 = f1_score(y_true, y_pred)

      precision = precision_score(y_true, y_pred)
      # print(precision)
      recall = recall_score(y_true, y_pred)
      return [accuracy, f1, precision, recall]

  with torch.no_grad():
      for batch in validation_dataloader:
          input_ids = batch['input_ids'].squeeze(1)
          attention_mask = batch['attention_mask'].squeeze(1)
          labels = batch['labels'].cpu()  # ground truth
          # print(labels)

          outputs = model(input_ids=input_ids, attention_mask=attention_mask)
          predictions = torch.argmax(outputs.logits, dim=1).cpu()  # predictions
          # print(predictions)

          calculated_metrics = compute_metrics(labels, predictions)
          accuracy.append(calculated_metrics[0])
          f1.append(calculated_metrics[1])
          precision.append(calculated_metrics[2])
          recall.append(calculated_metrics[3])
          batches += 1

          correct_predictions += (predictions == labels).sum().item()
          total_predictions += labels.size(0)

  overall_accuracy = correct_predictions / total_predictions
  print(f"Overall Accuracy: {overall_accuracy:.4f}")
  overall_accuracies.append(overall_accuracy)

  accuracy_var = sum(accuracy)/batches
  print(f"Accuracy: {sum(accuracy)/batches:.4f}")
  print(accuracy_var)
  accuracies.append(accuracy_var)

  f1_var = sum(f1)/batches
  print(f"F1: {sum(f1)/batches:.4f}")
  print(f1_var)
  f1s.append(f1_var)

  precision_var = sum(precision)/batches
  print(f"Precision: {sum(precision)/batches:.4f}")
  print(precision_var)
  precisions.append(precision_var)

  recall_var = sum(recall)/batches
  print(f"Recall: {sum(recall)/batches:.4f}")
  print(recall_var)
  recalls.append(precision_var)

  print(f'Number of batches: {batches}')

In [None]:
# import torch
# from torch.utils.data import DataLoader
# import torch.optim as optim
# from transformers import DebertaTokenizer as tkz
# from transformers import DebertaForSequenceClassification as mpc

# # Load tokenizer and model
# # tokenizer = DebertaTokenizer.from_pretrained("microsoft/deberta-base")
# # model = DebertaForSequenceClassification.from_pretrained("microsoft/deberta-base", num_labels = 2)

# # Load tokenizer and model
# model_base = 'microsoft/deberta-base'
# tokenizer = tkz.from_pretrained(model_base)
# model = mpc.from_pretrained(model_base, num_labels = 2)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['pooler.dense.weight', 'classifier.weight', 'classifier.bias', 'pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

DebertaForSequenceClassification(
  (deberta): DebertaModel(
    (embeddings): DebertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=0)
      (LayerNorm): DebertaLayerNorm()
      (dropout): StableDropout()
    )
    (encoder): DebertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x DebertaLayer(
          (attention): DebertaAttention(
            (self): DisentangledSelfAttention(
              (in_proj): Linear(in_features=768, out_features=2304, bias=False)
              (pos_dropout): StableDropout()
              (pos_proj): Linear(in_features=768, out_features=768, bias=False)
              (pos_q_proj): Linear(in_features=768, out_features=768, bias=True)
              (dropout): StableDropout()
            )
            (output): DebertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): DebertaLayerNorm()
              (dropout): StableDropout()
            )
          )
          (

In [None]:
# Prepare the DataLoader
cladder_dataset = CladderDataset(train_data, tokenizer)
dataloader = DataLoader(cladder_dataset, batch_size=8, shuffle=True)

# Optimizer and Loss Function
optimizer = optim.Adam(model.parameters(), lr=1e-5)
loss_fn = torch.nn.CrossEntropyLoss()

In [None]:
# Training Loop
model.train()

for epoch in range(3):  # Number of epochs
  for batch in dataloader:
      optimizer.zero_grad()

      input_ids = batch['input_ids'].squeeze(1)
      attention_mask = batch['attention_mask'].squeeze(1)
      labels = batch['labels']

      outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
      loss = outputs.loss

      loss.backward()
      optimizer.step()

      print(f"Epoch: {epoch}, Loss: {loss.item()}")

print('Training done')

Epoch: 0, Loss: 0.7212357521057129
Epoch: 0, Loss: 0.7140586376190186
Epoch: 0, Loss: 0.6905425190925598
Epoch: 0, Loss: 0.6869018077850342
Epoch: 0, Loss: 0.6743146777153015
Epoch: 0, Loss: 0.6784912943840027
Epoch: 0, Loss: 0.6847488880157471
Epoch: 0, Loss: 0.6818044781684875
Epoch: 0, Loss: 0.679998517036438
Epoch: 0, Loss: 0.7078869342803955
Epoch: 0, Loss: 0.6616466641426086
Epoch: 0, Loss: 0.711787760257721
Epoch: 0, Loss: 0.7045871615409851
Epoch: 0, Loss: 0.6893954277038574
Epoch: 0, Loss: 0.6902510523796082
Epoch: 0, Loss: 0.7007126212120056
Epoch: 0, Loss: 0.6949743628501892
Epoch: 0, Loss: 0.6700750589370728
Epoch: 0, Loss: 0.6610363721847534
Epoch: 0, Loss: 0.6868401765823364
Epoch: 0, Loss: 0.6885695457458496
Epoch: 0, Loss: 0.7225001454353333
Epoch: 0, Loss: 0.6886356472969055
Epoch: 0, Loss: 0.6761928200721741
Epoch: 0, Loss: 0.692642331123352
Epoch: 0, Loss: 0.6758873462677002
Epoch: 0, Loss: 0.6926476955413818
Epoch: 0, Loss: 0.7266694903373718
Epoch: 0, Loss: 0.66208

In [None]:
from google.colab import drive
drive.mount('/gdrive')

MessageError: Error: credential propagation was unsuccessful

In [None]:
run = 1
model_path = '/content/drive/MyDrive/Models/copa_' + model_base + '_' + str(run) + '.pt'
torch.save(model.state_dict(), model_path)

RuntimeError: Parent directory /content/drive/MyDrive/Models/copa_microsoft does not exist.

In [None]:
validation_dataset = CladderDataset(validation_data, tokenizer)
validation_dataloader = DataLoader(validation_dataset, batch_size=8)

In [None]:
model.eval()
correct_predictions = 0
total_predictions = 0

with torch.no_grad():
    for batch in validation_dataloader:
        input_ids = batch['input_ids'].squeeze(1)
        attention_mask = batch['attention_mask'].squeeze(1)
        labels = batch['labels']

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        predictions = torch.argmax(outputs.logits, dim=1)
        correct_predictions += (predictions == labels).sum().item()
        print(f"predictions: {predictions}, labels: {labels}")
        total_predictions += labels.size(0)

print(f"total_predictions: {total_predictions}")

overall_accuracy = correct_predictions / total_predictions
print(f"Overall Accuracy: {overall_accuracy:.4f}")
