In [None]:
# finetuning adapted from tutorial at hugging face: https://huggingface.co/docs/transformers/training#finetune-a-pretrained-model

import pandas as pd
import numpy as np
import torch
import torchvision
from torch.optim.lr_scheduler import StepLR
from torchvision import transforms
from torch import nn
import torch.nn.functional as F
from torch.utils.data import random_split
from torch.utils.data import DataLoader, TensorDataset, Dataset
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

use_drive = True
if use_drive:
  PATH = "/content/drive/MyDrive/CIL 2022/"
  from google.colab import drive
  drive.mount('/content/drive')
  %cd /content/drive/My Drive/CIL 2022/
  !ls
else:
  PATH = "./"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/CIL 2022
'basic urban dictionary+21. July RandomForestClassifier(n_estimators=128, random_state=0, n_jobs=-1)with-stemming_with-lemmatize_no-stopwords_with-spellcorrect.txt'
'basic urban dictionary+21. July RandomForestClassifier(n_estimators=128, random_state=0, n_jobs=-1)with-stemming_with-lemmatize_with-stopwords_no-spellcorrect.txt'
'Bert inspiration'
 BoW_v2.ipynb
 code
 data
 finetuned_roberta_model
 finetuned_roberta_model_in_steps
 finetuned_roberta_model_in_steps_submission.csv
 finetuned_roberta_model_in_steps_val.csv
 finetuned_roberta_model_in_steps_with_dropout
 finetuned_roberta_model_in_steps_with_dropout_half
'For Euler Cluster'
'GRU4_stanford glove_raw_test.csv'
'GRU4_stanford glove_raw_train.csv'
'GRU4_stanford glove_raw_val.csv'
'Grubert stats and model'
'Grubert v.A.1.;epochnr=2 time_duraction=12342.342999219894s'
'

In [None]:
# HYPERPARAMETERS for data

TRUNCATE_TRAINING_DATA_TO = -1 #200000 # number or -1 for no truncating

PREPROCESSING_OPTIONS = [ "raw",
"no-stemming_no-lemmatize_no-stopwords_no-spellcorrect",
"no-stemming_no-lemmatize_with-stopwords_no-spellcorrect",
"no-stemming_no-lemmatize_with-stopwords_with-spellcorrect",
"no-stemming_with-lemmatize_with-stopwords_no-spellcorrect",
"no-stemming_with-lemmatize_with-stopwords_with-spellcorrect",
"with-stemming_no-lemmatize_with-stopwords_no-spellcorrect",
"with-stemming_with-lemmatize_no-stopwords_with-spellcorrect",
"with-stemming_with-lemmatize_with-stopwords_no-spellcorrect" ]
PREPROCESSING_CHOICE = PREPROCESSING_OPTIONS[0] # one from PREPROCESSING_OPTIONS

pretrained_model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"

In [None]:
def read_file_and_strip(filename):
  lines = []
  with open(filename) as file:
    for line in file:
      lines.append(line.strip())
  return np.asarray(lines)

def read_data():
  dataset_path = PATH + "data/" + PREPROCESSING_CHOICE + "/"

  train_sentences = read_file_and_strip(dataset_path + "train_sentences.txt")
  train_labels = read_file_and_strip(dataset_path + "train_labels.txt").astype(int)
  val_sentences = read_file_and_strip(dataset_path + "val_sentences.txt")
  val_labels = read_file_and_strip(dataset_path + "val_labels.txt").astype(int)
  
  return train_sentences, train_labels, val_sentences, val_labels

X_train, y_train, X_test, y_test = read_data()
print(len(X_train))

if TRUNCATE_TRAINING_DATA_TO > 0:
  X_train = X_train[:TRUNCATE_TRAINING_DATA_TO]
  y_train = y_train[:TRUNCATE_TRAINING_DATA_TO]

print("len(X_train): %d" %len(X_train))
print("len(y_train): %d" %len(y_train))
print("len(X_test): %d" %len(X_test))
print("len(y_test): %d" %len(y_test))

2043433
len(X_train): 2043433
len(y_train): 2043433
len(X_test): 227049
len(y_test): 227049


In [None]:
try:
  from transformers import AutoTokenizer, AutoModel, AutoConfig, AutoModelForSequenceClassification
  print("transformers package already installed")
except ModuleNotFoundError:
  ! pip install transformers datasets
  from transformers import AutoTokenizer, AutoModel, AutoConfig, AutoModelForSequenceClassification

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.21.0-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 6.9 MB/s 
[?25hCollecting datasets
  Downloading datasets-2.4.0-py3-none-any.whl (365 kB)
[K     |████████████████████████████████| 365 kB 71.9 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 33.2 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 71.7 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 7.7 MB/s 
Collecting xxhash
  Downloading xx

In [None]:
from datasets import Dataset
train_dictionary = {"label": y_train, "text": X_train}
train_dataset = Dataset.from_dict(train_dictionary)
test_dictionary = {"label": y_test, "text": X_test}
test_dataset = Dataset.from_dict(test_dictionary)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name)

def tokenize_function(record):
    return tokenizer(record["text"], padding="max_length", max_length=75, truncation=True)

def tokenize_dataset(dataset):
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
  tokenized_datasets = tokenized_datasets.remove_columns(["text"])
  tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
  print(tokenized_datasets)
  print(tokenized_datasets[0])
  tokenized_datasets.set_format("torch")
  return tokenized_datasets

train_tokenized_dataset = tokenize_dataset(train_dataset)
test_tokenized_dataset = tokenize_dataset(test_dataset)

Downloading config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

  0%|          | 0/2044 [00:00<?, ?ba/s]

Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 2043433
})
{'labels': 0, 'input_ids': [0, 41552, 12105, 15698, 596, 24, 16, 98, 543, 7, 120, 5324, 17487, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}


  0%|          | 0/228 [00:00<?, ?ba/s]

Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 227049
})
{'labels': 1, 'input_ids': [0, 354, 70, 59, 106, 12644, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}


In [None]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_tokenized_dataset, shuffle=True, batch_size=8)
eval_dataloader = DataLoader(test_tokenized_dataset, batch_size=8)

In [None]:
# HYPERPARAMETERS for model

SHOULD_FREEZE_CLASSIFIER = False
REPLACE_LAST_LAYER = True
NUMBER_OF_NODES_FOR_LAST_LAYER = 2


LOAD_PRETRAINED_WEIGHTS_UNTIL_LAST_K_PARAMETERS = 4 # 2 or 4
if SHOULD_FREEZE_CLASSIFIER:
  LOAD_PRETRAINED_WEIGHTS_UNTIL_LAST_K_PARAMETERS = 2


REMAIN_THREE_LABELS = False
if REMAIN_THREE_LABELS:
  NUMBER_OF_NODES_FOR_LAST_LAYER = 3

ADD_ONE_EXTRA_LAYER = False

num_epochs = 5

lr = 1e-3

In [None]:
model_with_weights = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name)
print("↑ you can ignore above warning about weights not loaded.")

# if not REMAIN_THREE_LABELS:
#   model.num_labels = 2
print(model_with_weights.roberta.embeddings.word_embeddings.weight.data)

if not REMAIN_THREE_LABELS:
  config = AutoConfig.from_pretrained(pretrained_model_name)
  config.num_labels = 2
  # config.classifier_dropout = 0.2
  model = AutoModelForSequenceClassification.from_config(config)
  # we have to restore the weights, see https://stackoverflow.com/a/72940359/2529745
  print(model.roberta.embeddings.word_embeddings.weight.data)
  for parameter_name, parameter in list(model.named_parameters())[:-LOAD_PRETRAINED_WEIGHTS_UNTIL_LAST_K_PARAMETERS]:
    parameter.data = model_with_weights.state_dict().get(parameter_name).data.clone()
    print(parameter_name)
  
  print(model.roberta.embeddings.word_embeddings.weight.data)
else:
  model = model_with_weights


Downloading pytorch_model.bin:   0%|          | 0.00/478M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tensor([[ 0.1062, -0.0219,  0.1073,  ...,  0.0102, -0.0185, -0.0205],
        [-0.0098,  0.0382,  0.1892,  ...,  0.0786,  0.0935,  0.0373],
        [-0.0415, -0.0245, -0.0071,  ...,  0.0421, -0.0180, -0.0493],
        ...,
        [-0.0335,  0.0925,  0.0733,  ...,  0.0880,  0.0541,  0.0345],
        [-0.0108,  0.0454,  0.1191,  ...,  0.0080,  0.0899, -0.0048],
        [ 0.0478,  0.0302,  0.0396,  ..., -0.0031,  0.0100,  0.0257]])
tensor([[-0.0143, -0.0344,  0.0075,  ...,  0.0081, -0.0007,  0.0015],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0097,  0.0192, -0.0121,  ...,  0.0309, -0.0143,  0.0068],
        ...,
        [-0.0089, -0.0237, -0.0137,  ...,  0.0091,  0.0028,  0.0075],
        [ 0.0243,  0.0100, -0.0289,  ...,  0.0112,  0.0106, -0.0113],
        [ 0.0153,  0.0173,  0.0422,  ...,  0.0256, -0.0503,  0.0025]])
roberta.embeddings.word_embeddings.weight
roberta.embeddings.position_embeddings.weight
roberta.embeddings.token_type_embeddings.wei

In [None]:
# from tqdm import tqdm

# correct_guess = 0
# model.eval()
# X_test = np.asarray(["I hate this shit. It's the worst. Really bad, annoying, offensive. It just makes me sad", "This is great. I love how delicious this is. You are a wonderful cook. Congratulations."])
# y_test = np.asarray([0, 1])
# bar = tqdm(range(len(X_test)))
# for i in range(len(X_test)):
#   tweet, label = X_test[i], y_test[i]
#   print(tweet)
#   encoded_tweet = tokenizer(tweet, return_tensors='pt')
#   output = model(**encoded_tweet)
#   scores = output[0][0].detach().numpy()
#   scores = [scores[0], scores[2]]
#   print(scores)
#   label_to_choose = np.argmax(scores)
#   print(label_to_choose)
#   if label == label_to_choose:
#     correct_guess = correct_guess + 1
#   bar.update(1)

# print(correct_guess / len(X_test))
# model.train()

In [None]:
for param in model.roberta.embeddings.parameters():
    param.requires_grad = False
for param in model.roberta.encoder.parameters():
    param.requires_grad = False
if SHOULD_FREEZE_CLASSIFIER:
  for param in list(model.classifier.parameters())[:-1]:
      param.requires_grad = False
      print(param)

if REPLACE_LAST_LAYER:
  model.classifier.out_proj = nn.Linear(in_features=768, out_features=NUMBER_OF_NODES_FOR_LAST_LAYER, bias=True)

if ADD_ONE_EXTRA_LAYER:
  model.classifier = nn.Sequential(
      model.classifier, 
      nn.Linear(in_features=NUMBER_OF_NODES_FOR_LAST_LAYER, out_features=2, bias=True)
  )

In [None]:
import torch

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = model.to(device)

In [None]:
print(model)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerN

In [None]:
from torch.optim import SGD, AdamW

optimizer = AdamW(model.parameters(), lr=lr, weight_decay=5e-2)
# optimizer = SGD(model.parameters(), lr=lr, momentum=0.9)

# from transformers.optimization import Adafactor
# # replace AdamW with Adafactor
# optimizer = Adafactor(
#     model.parameters(),
#     lr=lr,
#     eps=(1e-30, 1e-3),
#     clip_threshold=1.0,
#     decay_rate=-0.8,
#     beta1=None,
#     weight_decay=0.0,
#     relative_step=False,
#     scale_parameter=False,
#     warmup_init=False,
# )

In [None]:
from transformers import get_scheduler

num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    name="cosine", optimizer=optimizer, num_warmup_steps=50000, num_training_steps=num_training_steps
)

In [None]:
from datasets import load_metric
def evaluate_test(use_mini_dataset = False):
  acc_metric = load_metric("accuracy")
  f1_metric = load_metric("f1")
  model.eval()
  if not use_mini_dataset:
    batch_count = len(eval_dataloader)
    print("evaluation batch count %d" %batch_count)
    test_progress_bar = tqdm(range(batch_count))
  for i, batch in enumerate(eval_dataloader):
      batch = {k: v.to(device) for k, v in batch.items()}
      with torch.no_grad():
          outputs = model(**batch)

      logits = outputs.logits
      if REMAIN_THREE_LABELS and not ADD_ONE_EXTRA_LAYER:
        scores = logits.cpu().numpy()
        removed_neutral_col = np.delete(scores, 1, axis=1)
        scores = torch.tensor(removed_neutral_col)
        predictions = torch.argmax(scores, dim=-1)
      else:
        predictions = torch.argmax(logits, dim=-1)
      acc_metric.add_batch(predictions=predictions, references=batch["labels"])
      f1_metric.add_batch(predictions=predictions, references=batch["labels"])
      if use_mini_dataset and i > 50:
        break
      if not use_mini_dataset:
        test_progress_bar.update(1)

  model.train()

  accuracy = acc_metric.compute()["accuracy"]
  if use_mini_dataset:
    return accuracy
  print({"accuracy": accuracy})
  print(f1_metric.compute())
  return accuracy

In [None]:
from tqdm.auto import tqdm

best_accuracy = 0.0
should_do_training = not (REMAIN_THREE_LABELS and not ADD_ONE_EXTRA_LAYER)
if should_do_training:
  model.train()
  for epoch in range(num_epochs):
    mini_test_accuracies = []
    print("epoch %d" %epoch)
    batch_count = len(train_dataloader)
    progress_bar = tqdm(range(batch_count))
    for i, batch in enumerate(train_dataloader):
      if i % 2000 == 0:
        mini_test_accuracy = evaluate_test(True)
        mini_test_accuracies.append(mini_test_accuracy)
        progress_bar.set_postfix_str("epoch %d mini test acc: %f, list: %s" %(i, mini_test_accuracy, str([ '%.3f' % item for item in mini_test_accuracies ])))
      batch = {k: v.to(device) for k, v in batch.items()}
      outputs = model(**batch)
      loss = outputs.loss
      loss.backward()

      optimizer.step()
      lr_scheduler.step()
      optimizer.zero_grad()
      progress_bar.update(1)
  
    print("evaluate test at the end of epoch %d" %epoch)
    epoch_accuracy = evaluate_test(False)
    if epoch_accuracy > best_accuracy:
      best_accuracy = epoch_accuracy
      print("save model")
      model.save_pretrained(PATH + "finetuned_roberta_model")

else:
  print("evaluate once")
  # print(evaluate_test(True))
  evaluate_test(False)

# currently the best:
# mini batch accuracies while training: ['0.522', '0.858', '0.839', '0.858', '0.844', '0.844', '0.849', '0.858', '0.863', '0.865', '0.841', '0.861', '0.870']
# validation accuracy: 0.8450818986210025

# adamw:
# ['0.469', '0.846', '0.837', '0.851', '0.851', '0.849', '0.865', '0.844', '0.877', '0.853', '0.863', '0.868', '0.865']
# accuracy 0.8495831296328106

epoch 0


  0%|          | 0/255430 [00:00<?, ?it/s]

evaluate test at the end of epoch 0
evaluation batch count 28382


  0%|          | 0/28382 [00:00<?, ?it/s]

{'accuracy': 0.8365859351946056}
{'f1': 0.8404453408216186}
save model
epoch 1


  0%|          | 0/255430 [00:00<?, ?it/s]

evaluate test at the end of epoch 1
evaluation batch count 28382


  0%|          | 0/28382 [00:00<?, ?it/s]

{'accuracy': 0.8420781417227119}
{'f1': 0.8396364807327632}
save model
epoch 2


  0%|          | 0/255430 [00:00<?, ?it/s]

evaluate test at the end of epoch 2
evaluation batch count 28382


  0%|          | 0/28382 [00:00<?, ?it/s]

{'accuracy': 0.8498473897704901}
{'f1': 0.8505117119329293}
save model
epoch 3


  0%|          | 0/255430 [00:00<?, ?it/s]

evaluate test at the end of epoch 3
evaluation batch count 28382


  0%|          | 0/28382 [00:00<?, ?it/s]

{'accuracy': 0.8533136019097199}
{'f1': 0.8553974670134291}
save model
epoch 4


  0%|          | 0/255430 [00:00<?, ?it/s]

evaluate test at the end of epoch 4
evaluation batch count 28382


  0%|          | 0/28382 [00:00<?, ?it/s]

{'accuracy': 0.8557976472039075}
{'f1': 0.8565884512113394}
save model


In [None]:
print("preprocessing choice: %s" %PREPROCESSING_CHOICE)
print("pretrained model name: %s" %pretrained_model_name)
print("should freeze classifier: %d" %SHOULD_FREEZE_CLASSIFIER)
print("remain three labels: %d" %REMAIN_THREE_LABELS)
print("add extra layer: %d" %REMAIN_THREE_LABELS)
print("replace last layer: %d" %ADD_ONE_EXTRA_LAYER)
print("number of nodes for last layer: %d" %NUMBER_OF_NODES_FOR_LAST_LAYER)
print("load pretrained weights until last %d parameters" %LOAD_PRETRAINED_WEIGHTS_UNTIL_LAST_K_PARAMETERS)
print("LR: %f" %lr)
print("max epoch count: %d" %num_epochs)
print("optimizer: " + str(optimizer))
print("mini batch accuracies while training: %s" %str([ '%.3f' % item for item in mini_test_accuracies ]))

preprocessing choice: raw
pretrained model name: cardiffnlp/twitter-roberta-base-sentiment-latest
should freeze classifier: 0
remain three labels: 0
add extra layer: 0
replace last layer: 0
number of nodes for last layer: 2
load pretrained weights until last 4 parameters
LR: 0.001000
max epoch count: 5
optimizer: AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    eps: 1e-08
    foreach: None
    initial_lr: 0.001
    lr: 0.0
    maximize: False
    weight_decay: 0.05
)
mini batch accuracies while training: ['0.865', '0.875', '0.880', '0.875', '0.870', '0.873', '0.877', '0.870', '0.880', '0.873', '0.873', '0.870', '0.877', '0.873', '0.875', '0.877', '0.873', '0.873', '0.873', '0.865', '0.868', '0.870', '0.877', '0.875', '0.880', '0.873', '0.870', '0.880', '0.873', '0.875', '0.868', '0.877', '0.875', '0.873', '0.875', '0.873', '0.875', '0.875', '0.885', '0.877', '0.880', '0.877', '0.868', '0.875', '0.877', '0.877', '0.875', '0.875', '0.868', '0

In [None]:
# fintune
optimizer = AdamW(model.parameters(), lr=1e-6, weight_decay=5e-2)

num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    name="cosine", optimizer=optimizer, num_warmup_steps=50000, num_training_steps=num_training_steps
)

for param in model.roberta.encoder.parameters():
  param.requires_grad = True

model = model.to(device)

model.train()
for epoch in range(num_epochs):
  mini_test_accuracies = []
  print("epoch %d" %epoch)
  batch_count = len(train_dataloader)
  progress_bar = tqdm(range(batch_count))
  for i, batch in enumerate(train_dataloader):
    if i % 2000 == 0:
      mini_test_accuracy = evaluate_test(True)
      mini_test_accuracies.append(mini_test_accuracy)
      progress_bar.set_postfix_str("epoch %d mini test acc: %f, list: %s" %(i, mini_test_accuracy, str([ '%.3f' % item for item in mini_test_accuracies ])))
    batch = {k: v.to(device) for k, v in batch.items()}
    outputs = model(**batch)
    loss = outputs.loss
    loss.backward()

    optimizer.step()
    lr_scheduler.step()
    optimizer.zero_grad()
    progress_bar.update(1)

  print("evaluate test at the end of epoch %d" %epoch)
  epoch_accuracy = evaluate_test(False)
  if epoch_accuracy > best_accuracy:
    best_accuracy = epoch_accuracy
    print("save model")
    model.save_pretrained(PATH + "finetuned_roberta_model")

epoch 0


  0%|          | 0/255430 [00:00<?, ?it/s]

evaluate test at the end of epoch 0
evaluation batch count 28382


  0%|          | 0/28382 [00:00<?, ?it/s]

{'accuracy': 0.8970486546956824}
{'f1': 0.8956030459346598}
save model
epoch 1


  0%|          | 0/255430 [00:00<?, ?it/s]

In [None]:
from tqdm.auto import tqdm

best_accuracy = 0.8970486546956824
optimizer = AdamW(model.parameters(), lr=1e-6, weight_decay=5e-2)

num_epochs = 2
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    name="cosine", optimizer=optimizer, num_warmup_steps=5000, num_training_steps=num_training_steps
)

  # config.classifier_dropout = 0.5
  # config.hidden_dropout_prob = 0.5
  # config.attention_probs_dropout_prob = 0.5
model = model.from_pretrained(PATH + "finetuned_roberta_model")

for param in model.roberta.embeddings.parameters():
    param.requires_grad = True
for param in model.roberta.encoder.parameters():
  param.requires_grad = True
for param in model.classifier.parameters():
  param.requires_grad = True

model = model.to(device)

model.train()
for epoch in range(num_epochs):
  mini_test_accuracies = []
  print("optimizer: " + str(optimizer))
  print("epoch %d/%d" %(epoch, num_epochs))
  batch_count = len(train_dataloader)
  progress_bar = tqdm(range(batch_count))
  last_loss = 0.0
  for i, batch in enumerate(train_dataloader):
    if i % 5000 == 0:
      mini_test_accuracy = evaluate_test(True)
      last_loss = last_loss / 5000
      mini_test_accuracies.append((last_loss, mini_test_accuracy))
      progress_bar.set_postfix_str("last loss %f, epoch %d mini test acc: %f, list: %s" %(last_loss, i, mini_test_accuracy, str([ 'L%.3f A%.3f' %(loss_value, acc_val) for (loss_value, acc_val) in mini_test_accuracies ])))
      last_loss = 0.0
    batch = {k: v.to(device) for k, v in batch.items()}
    outputs = model(**batch)
    loss = outputs.loss
    last_loss += loss.item()
    loss.backward()

    optimizer.step()
    lr_scheduler.step()
    optimizer.zero_grad()
    progress_bar.update(1)

  print("evaluate test at the end of epoch %d" %epoch)
  epoch_accuracy = evaluate_test(False)
  if epoch_accuracy > best_accuracy:
    best_accuracy = epoch_accuracy
    print("save model")
    model.save_pretrained(PATH + "finetuned_robert_model2")

optimizer: AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    eps: 1e-08
    foreach: None
    initial_lr: 1e-06
    lr: 0.0
    maximize: False
    weight_decay: 0.05
)
epoch 0/2


  0%|          | 0/255430 [00:00<?, ?it/s]

Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-22-7dc954799080>", line 36, in <module>
    mini_test_accuracy = evaluate_test(True)
  File "<ipython-input-20-614a7553594e>", line 3, in evaluate_test
    acc_metric = load_metric("accuracy")
  File "/usr/local/lib/python3.7/dist-packages/datasets/load.py", line 1404, in load_metric
    metric.download_and_prepare(download_config=download_config)
  File "/usr/local/lib/python3.7/dist-packages/datasets/metric.py", line 607, in download_and_prepare
    dataset_name=self.name, download_config=download_config, data_dir=self.data_dir
  File "/usr/local/lib/python3.7/dist-packages/datasets/download/download_manager.py", line 173, in __init__
    self._base_path = base_path or os.path.abspath(".")
  File "/usr/lib/python3.7/posixpath.py", line 383, in abspath
    cwd = os.getcwd

OSError: ignored

In [None]:
model.save_pretrained(PATH + "finetuned_robert_model2")

In [None]:
# if EVALUATE_ONLY:
#   model.load_state_dict(torch.load(name_of_file_model))
#   model.eval()

#   train_dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False)
#   train_loss, train_acc = evaluate_and_save_to_csv(model, train_dataloader, PATH + "/" + model_name + "_train.csv", criterion)
#   print(f'\t Training. Loss: {train_loss:.3f} |  Training. Acc: {train_acc*100:.2f}%')
#   val_loss, val_acc = evaluate_and_save_to_csv(model, val_dataloader, PATH + "/" + model_name + "_val.csv", criterion)
#   print(f'\t Validation. Loss: {val_loss:.3f} |  Validation. Acc: {val_acc*100:.2f}%')

#   testset_data_path = "/content/drive/MyDrive/CIL 2022/data/test data/test_data.txt"

#   #read test data in
#   test_sentences = read_file_and_strip(testset_data_path)
#   test_data, test_lengths = transform_to_embeddings(test_sentences, embedding_model, embedding_model_type, word_to_ix)
#   test_labels_list = [0] * len(test_sentences)
#   test_labels = torch.tensor(test_labels_list, dtype=torch.long, device=DEVICE) 
#   test_dataset = Dataset(test_data, test_labels, test_lengths)
#   test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

#   evaluate_and_save_to_csv(model, test_dataloader, PATH + "/" + model_name + "_test.csv", criterion)