## Setup

In [1]:
!pip install transformers torch datasets "ray[tune]"

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m51.2 MB/s[0m eta [36m0:00:00[0m
Collecting datasets
  Downloading datasets-2.10.1-py3-none-any.whl (469 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m469.0/469.0 KB[0m [31m41.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ray[tune]
  Downloading ray-2.3.0-cp38-cp38-manylinux2014_x86_64.whl (58.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.6/58.6 MB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.12.1-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m24.8 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading t

In [2]:
from pathlib import Path

WORKING_ENV = 'COLAB' # Can be LABS, COLAB or PAPERSPACE

assert WORKING_ENV in ['COLAB', 'PAPERSPACE']

if WORKING_ENV == 'COLAB':
    from google.colab import drive
    %load_ext google.colab.data_table
    content_path = '/content/drive/MyDrive/'
    drive.mount('/content/drive/', force_remount=True) # Outputs will be saved in your google drive

else: # Using Paperspace
    # Paperspace does not properly render animated progress bars
    # Strongly recommend using the JupyterLab UI instead of theirs
    !pip install ipywidgets 
    content_path = '/notebooks'

content_path = Path(content_path)

Mounted at /content/drive/


In [3]:
data_folder = f"{content_path}/NLP/data"
results_folder = f"{content_path}/NLP/results"
logging_folder = f"{content_path}/NLP/logs"
hp_search_folder = f"{content_path}/NLP/hp_search"

In [4]:
import pandas as pd
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding, AutoModelForSequenceClassification, DebertaTokenizer
import torch.nn as nn
import torch
import datasets
# from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from tqdm import tqdm
import os
import itertools

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

## Load data

In [7]:
pcl_df_train_train = pd.read_csv(f"{data_folder}/pcl_df_train_train_preprocessed.csv")
pcl_df_train_train_aug = pd.read_csv(f"{data_folder}/pcl_df_train_train_aug.csv")
pcl_df_train_train_gpt = pd.read_csv(f"{data_folder}/pcl_df_train_train_aug_chatgpt.csv")

pcl_df_train_dev = pd.read_csv(f"{data_folder}/pcl_df_train_dev_preprocessed.csv")
pcl_df_dev = pd.read_csv(f"{data_folder}/pcl_df_dev_preprocessed.csv")

In [8]:
pcl_df_train_train.shape

(6700, 8)

In [9]:
pcl_df_train_train["class"].value_counts()

0    6075
1     625
Name: class, dtype: int64

In [10]:
pcl_df_train_train_aug.shape

(12150, 7)

In [11]:
pcl_df_train_train_aug["class"].value_counts()

0    6075
1    6075
Name: class, dtype: int64

In [12]:
pcl_df_train_train_gpt.shape

(13983, 7)

In [13]:
pcl_df_train_train_gpt["class"].value_counts()

1    7908
0    6075
Name: class, dtype: int64

In [14]:
pcl_df_train_train.columns

Index(['par_id', 'art_id', 'keyword', 'country_code', 'text', 'label', 'class',
       'preprocessed_text'],
      dtype='object')

In [15]:
pcl_df_train_train = pcl_df_train_train[['text', 'class']]
pcl_df_train_train_aug = pcl_df_train_train_aug[['text', 'class']]
pcl_df_train_train_gpt = pcl_df_train_train_gpt[['text', 'class']]

pcl_df_train_dev = pcl_df_train_dev[['text', 'class']]
pcl_df_dev = pcl_df_dev[['text', 'class']]


In [16]:
pcl_df_train_train = datasets.Dataset.from_pandas(pcl_df_train_train)
pcl_df_train_train_aug = datasets.Dataset.from_pandas(pcl_df_train_train_aug)
pcl_df_train_train_gpt = datasets.Dataset.from_pandas(pcl_df_train_train_gpt)

pcl_df_train_dev = datasets.Dataset.from_pandas(pcl_df_train_dev)
pcl_df_dev = datasets.Dataset.from_pandas(pcl_df_dev)

In [17]:
type(pcl_df_train_train)

datasets.arrow_dataset.Dataset

### Helper functions

In [18]:
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}

In [19]:
def model_init_clf():

    model = AutoModelForSequenceClassification.from_pretrained(
        "microsoft/deberta-base", 
        num_labels=2, 
        id2label=id2label, 
        label2id=label2id
    )

    for param in model.deberta.embeddings.parameters():
        param.requires_grad = False
    for i in range(6):
        for param in model.deberta.encoder.layer[i].parameters():
            param.requires_grad = False

    model.classifier = torch.nn.Sequential(
        torch.nn.Linear(768, 1024),
        torch.nn.BatchNorm1d(1024),
        torch.nn.Dropout(0.2),
        torch.nn.ReLU(),
        torch.nn.Linear(1024, 256),
        torch.nn.BatchNorm1d(256),
        torch.nn.Dropout(0.2),
        torch.nn.ReLU(),
        torch.nn.Linear(256, 64),
        torch.nn.BatchNorm1d(64),
        torch.nn.Dropout(0.2),
        torch.nn.ReLU(),  
        torch.nn.Linear(64, 2),
        torch.nn.Softmax(dim=-1)
    )

    return model


def model_init():

    model = AutoModelForSequenceClassification.from_pretrained(
        "microsoft/deberta-base", 
        num_labels=2, 
        id2label=id2label, 
        label2id=label2id
    )

    for param in model.deberta.embeddings.parameters():
        param.requires_grad = False
    for i in range(6):
        for param in model.deberta.encoder.layer[i].parameters():
            param.requires_grad = False

    return model


tokenizer = DebertaTokenizer.from_pretrained("microsoft/deberta-base")

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/474 [00:00<?, ?B/s]

In [20]:
def tokenization(batched_text):
    return tokenizer(
        batched_text['text'], 
        padding = 'max_length', 
        truncation=True, 
        max_length = 512
    )

In [21]:
# define accuracy metrics
def compute_metrics(pred):

    preds, labels = pred
    preds = np.argmax(preds, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average='binary'
    )
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }


### Tokenization

In [22]:
pcl_df_train_train = pcl_df_train_train.map(
    tokenization, batched = True, batch_size = len(pcl_df_train_train)
)

pcl_df_train_train_aug = pcl_df_train_train_aug.map(
    tokenization, batched = True, batch_size = len(pcl_df_train_train_aug)
)

pcl_df_train_train_gpt = pcl_df_train_train_gpt.map(
    tokenization, batched = True, batch_size = len(pcl_df_train_train_gpt)
)


pcl_df_train_dev = pcl_df_train_dev.map(
    tokenization, batched = True, batch_size = len(pcl_df_train_dev)
)

pcl_df_dev = pcl_df_dev.map(
    tokenization, batched = True, batch_size = len(pcl_df_dev)
)


Map:   0%|          | 0/6700 [00:00<?, ? examples/s]

Map:   0%|          | 0/12150 [00:00<?, ? examples/s]

Map:   0%|          | 0/13983 [00:00<?, ? examples/s]

Map:   0%|          | 0/1675 [00:00<?, ? examples/s]

Map:   0%|          | 0/2094 [00:00<?, ? examples/s]

In [23]:
pcl_df_train_train.set_format(
    'torch', columns=['input_ids', 'attention_mask', 'class']
)

pcl_df_train_train_aug.set_format(
    'torch', columns=['input_ids', 'attention_mask', 'class']
)

pcl_df_train_train_gpt.set_format(
    'torch', columns=['input_ids', 'attention_mask', 'class']
)



pcl_df_train_dev.set_format(
    'torch', columns=['input_ids', 'attention_mask', 'class']
)
pcl_df_dev.set_format(
    'torch', columns=['input_ids', 'attention_mask', 'class']
)


In [24]:
pcl_df_train_train = pcl_df_train_train.rename_column("class", "label")
pcl_df_train_train_aug= pcl_df_train_train_aug.rename_column("class", "label")
pcl_df_train_train_gpt = pcl_df_train_train_gpt.rename_column("class", "label")


pcl_df_train_dev = pcl_df_train_dev.rename_column("class", "label")
pcl_df_dev = pcl_df_dev.rename_column("class", "label")

### Train with best hyperparameters on the original train data (without modified classifier)

In [31]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 8
logging_steps = 100

In [32]:
training_args_og = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-frozen-og'
)


In [33]:
trainer_og = Trainer(
        args=training_args_og,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/559M [00:00<?, ?B/s]

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/pytorch_model.bin
Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model

In [34]:
trainer_og.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
0,No log,0.241042,0.905672,0.150538,0.823529,0.08284
1,0.302000,0.226136,0.909851,0.584022,0.546392,0.627219
2,0.302000,0.191363,0.925373,0.485597,0.797297,0.349112
3,0.188000,0.184284,0.928955,0.560886,0.745098,0.449704
4,0.188000,0.18633,0.928955,0.554307,0.755102,0.43787


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-52
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-52/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-52/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-52/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-52/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassific

TrainOutput(global_step=260, training_loss=0.2251452115865854, metrics={'train_runtime': 610.0298, 'train_samples_per_second': 54.915, 'train_steps_per_second': 0.426, 'total_flos': 1.0257529279905792e+16, 'train_loss': 0.2251452115865854, 'epoch': 4.99})

In [35]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_og.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.2261359840631485,
 'eval_accuracy': 0.9098507462686567,
 'eval_f1': 0.5840220385674931,
 'eval_precision': 0.5463917525773195,
 'eval_recall': 0.6272189349112426,
 'eval_runtime': 8.3126,
 'eval_samples_per_second': 201.501,
 'eval_steps_per_second': 12.631,
 'epoch': 4.99}

### Make predictions on official dev set

In [36]:
dev_set_preds_og, dev_set_labels_og, dev_set_metrics_og = trainer_og.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [37]:
dev_set_metrics_og

{'dev_loss': 0.2377750724554062,
 'dev_accuracy': 0.9011461318051576,
 'dev_f1': 0.5450549450549451,
 'dev_precision': 0.484375,
 'dev_recall': 0.6231155778894473,
 'dev_runtime': 10.4557,
 'dev_samples_per_second': 200.273,
 'dev_steps_per_second': 12.529}

In [38]:
dev_set_pred_labels_og = np.argmax(dev_set_preds_og)

### Saving trained model

In [39]:
# save the best model
trainer_og.save_model(f'{results_folder}/deberta_frozen_og')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta_frozen_og
Configuration saved in /content/drive/MyDrive/NLP/results/deberta_frozen_og/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta_frozen_og/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_og/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_og/special_tokens_map.json


### Train with best hyperparameters on the original train data (without modified classifier) - grad accum step = 2

In [40]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 2
logging_steps = 100

In [41]:
training_args_og_2 = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-frozen-2'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [42]:
trainer_og_2 = Trainer(
        args=training_args_og_2,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [43]:
trainer_og_2.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
0,0.2194,0.189348,0.922985,0.547368,0.672414,0.461538
1,0.191,0.181461,0.924776,0.582781,0.661654,0.52071
2,0.1413,0.188476,0.921791,0.618076,0.609195,0.627219
3,0.1033,0.214919,0.924776,0.590909,0.654676,0.538462
4,0.0848,0.226444,0.929552,0.611842,0.688889,0.550296


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-209
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-209/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-209/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-209/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-209/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClas

TrainOutput(global_step=1045, training_loss=0.15721534884147095, metrics={'train_runtime': 533.6003, 'train_samples_per_second': 62.781, 'train_steps_per_second': 1.958, 'total_flos': 1.0267340403081216e+16, 'train_loss': 0.15721534884147095, 'epoch': 5.0})

In [44]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_og_2.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.18847616016864777,
 'eval_accuracy': 0.9217910447761194,
 'eval_f1': 0.6180758017492712,
 'eval_precision': 0.6091954022988506,
 'eval_recall': 0.6272189349112426,
 'eval_runtime': 8.2327,
 'eval_samples_per_second': 203.457,
 'eval_steps_per_second': 12.754,
 'epoch': 5.0}

### Make predictions on official dev set

In [45]:
dev_set_preds_og_2, dev_set_labels_og_2, dev_set_metrics_og_2 = trainer_og_2.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [46]:
dev_set_metrics_og_2

{'dev_loss': 0.1874750256538391,
 'dev_accuracy': 0.9250238777459407,
 'dev_f1': 0.6252983293556086,
 'dev_precision': 0.5954545454545455,
 'dev_recall': 0.6582914572864321,
 'dev_runtime': 10.4208,
 'dev_samples_per_second': 200.945,
 'dev_steps_per_second': 12.571}

In [47]:
dev_set_pred_labels_og_2 = np.argmax(dev_set_preds_og_2)

### Saving trained model

In [48]:
# save the best model
trainer_og_2.save_model(f'{results_folder}/deberta_frozen_og_2')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta_frozen_og_2
Configuration saved in /content/drive/MyDrive/NLP/results/deberta_frozen_og_2/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta_frozen_og_2/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_og_2/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_og_2/special_tokens_map.json


### Train with best hyperparameters on the original train data (with modified classifier)

In [49]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 8
logging_steps = 100

In [50]:
training_args_clf = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-frozen-clf'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [51]:
trainer_clf = Trainer(
        args=training_args_clf,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init_clf,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [52]:
trainer_clf.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
0,No log,0.625534,0.882985,0.010101,0.034483,0.005917
1,0.655400,0.636259,0.880597,0.099099,0.207547,0.065089
2,0.655400,0.616467,0.724776,0.285271,0.193277,0.544379
3,0.640000,0.610887,0.871045,0.142857,0.216867,0.106509
4,0.640000,0.604085,0.874627,0.146341,0.233766,0.106509


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-52
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-52/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-52/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-52/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-52/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassific

TrainOutput(global_step=260, training_loss=0.6440114534818209, metrics={'train_runtime': 527.8301, 'train_samples_per_second': 63.467, 'train_steps_per_second': 0.493, 'total_flos': 1.036725218131968e+16, 'train_loss': 0.6440114534818209, 'epoch': 4.99})

In [53]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_clf.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.6164670586585999,
 'eval_accuracy': 0.724776119402985,
 'eval_f1': 0.2852713178294574,
 'eval_precision': 0.19327731092436976,
 'eval_recall': 0.5443786982248521,
 'eval_runtime': 8.2747,
 'eval_samples_per_second': 202.425,
 'eval_steps_per_second': 12.689,
 'epoch': 4.99}

### Make predictions on official dev set

In [54]:
dev_set_preds_clf, dev_set_labels_clf, dev_set_metrics_clf = trainer_clf.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [55]:
dev_set_metrics_clf

{'dev_loss': 0.6174400448799133,
 'dev_accuracy': 0.723018147086915,
 'dev_f1': 0.2676767676767677,
 'dev_precision': 0.178752107925801,
 'dev_recall': 0.5326633165829145,
 'dev_runtime': 10.4247,
 'dev_samples_per_second': 200.869,
 'dev_steps_per_second': 12.566}

In [56]:
dev_set_pred_labels_clf = np.argmax(dev_set_preds_clf)

### Saving trained model

In [57]:
# save the best model
trainer_clf.save_model(f'{results_folder}/deberta_frozen_clf')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta_frozen_clf
Configuration saved in /content/drive/MyDrive/NLP/results/deberta_frozen_clf/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta_frozen_clf/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_clf/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_clf/special_tokens_map.json


### Train with best hyperparameters on the original train data (with modified classifier) - grad accum steps = 2

In [58]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 2
logging_steps = 100

In [59]:
training_args_clf_2 = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-frozen-clf-2'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [60]:
trainer_clf_2 = Trainer(
        args=training_args_clf_2,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init_clf,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [61]:
trainer_clf_2.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
0,0.6319,0.612852,0.804179,0.428571,0.303704,0.727811
1,0.5669,0.560483,0.878806,0.545861,0.438849,0.721893
2,0.5164,0.541161,0.886567,0.572072,0.461818,0.751479
3,0.5043,0.512947,0.899701,0.594203,0.502041,0.727811
4,0.4946,0.508892,0.917015,0.612813,0.578947,0.650888


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-209
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-209/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-209/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-209/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-209/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClas

TrainOutput(global_step=1045, training_loss=0.5505819749604002, metrics={'train_runtime': 542.9896, 'train_samples_per_second': 61.695, 'train_steps_per_second': 1.925, 'total_flos': 1.037716825227264e+16, 'train_loss': 0.5505819749604002, 'epoch': 5.0})

In [62]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_clf_2.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.5088923573493958,
 'eval_accuracy': 0.9170149253731343,
 'eval_f1': 0.6128133704735376,
 'eval_precision': 0.5789473684210527,
 'eval_recall': 0.650887573964497,
 'eval_runtime': 8.2565,
 'eval_samples_per_second': 202.87,
 'eval_steps_per_second': 12.717,
 'epoch': 5.0}

### Make predictions on official dev set

In [63]:
dev_set_preds_clf_2, dev_set_labels_clf_2, dev_set_metrics_clf_2 = trainer_clf_2.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [64]:
dev_set_metrics_clf_2

{'dev_loss': 0.5138863325119019,
 'dev_accuracy': 0.9097421203438395,
 'dev_f1': 0.5827814569536424,
 'dev_precision': 0.5196850393700787,
 'dev_recall': 0.6633165829145728,
 'dev_runtime': 10.4959,
 'dev_samples_per_second': 199.506,
 'dev_steps_per_second': 12.481}

In [65]:
dev_set_pred_labels_clf_2 = np.argmax(dev_set_preds_clf_2)

### Saving trained model

In [66]:
# save the best model
trainer_clf_2.save_model(f'{results_folder}/deberta_frozen_clf_2')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta_frozen_clf_2
Configuration saved in /content/drive/MyDrive/NLP/results/deberta_frozen_clf_2/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta_frozen_clf_2/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_clf_2/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_clf_2/special_tokens_map.json


### Train with best hyperparameters on the augmented train data (without chatgpt data)

In [124]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 8
logging_steps = 100

In [125]:
training_args_aug = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-frozen-aug-run2'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [126]:
trainer_aug = Trainer(
        args=training_args_aug,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train_aug,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [127]:
trainer_aug.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.233965,0.905075,0.541787,0.52809,0.556213
2,0.320100,0.178831,0.925373,0.603175,0.650685,0.56213
3,0.160700,0.174194,0.933731,0.640777,0.707143,0.585799
4,0.118300,0.191051,0.927164,0.641176,0.637427,0.64497
5,0.086200,0.19986,0.934328,0.642857,0.71223,0.585799


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-95
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-95/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-95/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-95/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-95/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassific

TrainOutput(global_step=475, training_loss=0.15530719054372688, metrics={'train_runtime': 902.3503, 'train_samples_per_second': 67.324, 'train_steps_per_second': 0.526, 'total_flos': 1.8625804153344e+16, 'train_loss': 0.15530719054372688, 'epoch': 5.0})

In [128]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_aug.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.1998598724603653,
 'eval_accuracy': 0.9343283582089552,
 'eval_f1': 0.6428571428571428,
 'eval_precision': 0.7122302158273381,
 'eval_recall': 0.5857988165680473,
 'eval_runtime': 8.31,
 'eval_samples_per_second': 201.564,
 'eval_steps_per_second': 12.635,
 'epoch': 5.0}

### Make predictions on official dev set

In [129]:
dev_set_preds_aug, dev_set_labels_aug, dev_set_metrics_aug = trainer_aug.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [130]:
dev_set_metrics_aug

{'dev_loss': 0.2281123697757721,
 'dev_accuracy': 0.9235912129894938,
 'dev_f1': 0.5721925133689839,
 'dev_precision': 0.6114285714285714,
 'dev_recall': 0.5376884422110553,
 'dev_runtime': 10.5199,
 'dev_samples_per_second': 199.05,
 'dev_steps_per_second': 12.453}

In [131]:
dev_set_pred_labels_aug = np.argmax(dev_set_preds_aug)

### Saving trained model

In [132]:
# save the best model
trainer_aug.save_model(f'{results_folder}/deberta_frozen_aug_run2')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta_frozen_aug_run2
Configuration saved in /content/drive/MyDrive/NLP/results/deberta_frozen_aug_run2/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta_frozen_aug_run2/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_aug_run2/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_aug_run2/special_tokens_map.json


### Train with best hyperparameters on the augmented train data (without chatgpt data) - gradient accumulation step = 2

In [76]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 2
logging_steps = 100

In [77]:
training_args_aug_2 = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-aug-2'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [78]:
trainer_aug_2 = Trainer(
        args=training_args_aug_2,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train_aug,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [79]:
trainer_aug_2.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1745,0.192528,0.919403,0.615385,0.593407,0.639053
2,0.1129,0.192449,0.922388,0.603659,0.622642,0.585799


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-380
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-380/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-380/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-380/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-380/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClas

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1745,0.192528,0.919403,0.615385,0.593407,0.639053
2,0.1129,0.192449,0.922388,0.603659,0.622642,0.585799
3,0.0457,0.323385,0.926567,0.562278,0.705357,0.467456
4,0.0233,0.386253,0.92597,0.592105,0.666667,0.532544
5,0.0138,0.428724,0.92597,0.575342,0.682927,0.497041


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-1140
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-1140/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-1140/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-1140/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-1140/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenc

TrainOutput(global_step=1900, training_loss=0.09108196082868074, metrics={'train_runtime': 923.5536, 'train_samples_per_second': 65.779, 'train_steps_per_second': 2.057, 'total_flos': 1.8625804153344e+16, 'train_loss': 0.09108196082868074, 'epoch': 5.0})

In [80]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_aug_2.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.19252756237983704,
 'eval_accuracy': 0.9194029850746268,
 'eval_f1': 0.6153846153846154,
 'eval_precision': 0.5934065934065934,
 'eval_recall': 0.6390532544378699,
 'eval_runtime': 8.2672,
 'eval_samples_per_second': 202.607,
 'eval_steps_per_second': 12.701,
 'epoch': 5.0}

### Make predictions on official dev set

In [81]:
dev_set_preds_aug_2, dev_set_labels_aug_2, dev_set_metrics_aug_2 = trainer_aug_2.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [87]:
dev_set_metrics_aug_2

{'dev_loss': 0.20923367142677307,
 'dev_accuracy': 0.9145176695319962,
 'dev_f1': 0.597752808988764,
 'dev_precision': 0.540650406504065,
 'dev_recall': 0.6683417085427136,
 'dev_runtime': 10.5095,
 'dev_samples_per_second': 199.248,
 'dev_steps_per_second': 12.465}

In [83]:
dev_set_pred_labels_aug_2 = np.argmax(dev_set_preds_aug_2)

### Saving trained model

In [84]:
# save the best model
trainer_aug_2.save_model(f'{results_folder}/deberta_frozen_aug_2')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta_frozen_aug_2
Configuration saved in /content/drive/MyDrive/NLP/results/deberta_frozen_aug_2/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta_frozen_aug_2/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_aug_2/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_aug_2/special_tokens_map.json


### Train with best hyperparameters on the augmented train data + chatgpt data

In [86]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 8
logging_steps = 100

In [88]:
training_args_gpt = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-frozen-gpt'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [89]:
trainer_gpt = Trainer(
        args=training_args_gpt,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train_gpt,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [90]:
trainer_gpt.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
0,0.3521,0.312298,0.897313,0.022727,0.285714,0.011834
1,0.2035,0.214098,0.902687,0.089385,0.8,0.047337
2,0.1554,0.187606,0.924776,0.507812,0.747126,0.384615
3,0.1303,0.183998,0.922985,0.603077,0.628205,0.579882
4,0.1082,0.182163,0.927761,0.605863,0.673913,0.550296


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-109
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-109/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-109/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-109/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-109/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClas

TrainOutput(global_step=545, training_loss=0.1817144253932008, metrics={'train_runtime': 1103.1269, 'train_samples_per_second': 63.379, 'train_steps_per_second': 0.494, 'total_flos': 2.142626662472909e+16, 'train_loss': 0.1817144253932008, 'epoch': 5.0})

In [91]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_gpt.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.18216276168823242,
 'eval_accuracy': 0.9277611940298508,
 'eval_f1': 0.6058631921824105,
 'eval_precision': 0.6739130434782609,
 'eval_recall': 0.5502958579881657,
 'eval_runtime': 8.3305,
 'eval_samples_per_second': 201.069,
 'eval_steps_per_second': 12.604,
 'epoch': 5.0}

### Make predictions on official dev set

In [92]:
dev_set_preds_gpt, dev_set_labels_gpt, dev_set_metrics_gpt = trainer_gpt.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [93]:
dev_set_metrics_gpt

{'dev_loss': 0.1997695416212082,
 'dev_accuracy': 0.9231136580706781,
 'dev_f1': 0.5751978891820579,
 'dev_precision': 0.6055555555555555,
 'dev_recall': 0.5477386934673367,
 'dev_runtime': 10.4385,
 'dev_samples_per_second': 200.603,
 'dev_steps_per_second': 12.55}

In [94]:
dev_set_pred_labels_gpt = np.argmax(dev_set_preds_gpt)

### Saving trained model

In [95]:
# save the best model
trainer_gpt.save_model(f'{results_folder}/deberta_frozen_gpt')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta_frozen_gpt
Configuration saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt/special_tokens_map.json


### Train with best hyperparameters on the augmented train data + chatgpt data (grad accumulation step = 2)

In [133]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 2
logging_steps = 100

In [134]:
training_args_gpt_2 = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-frozen-gpt-2-run2'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [135]:
trainer_gpt_2 = Trainer(
        args=training_args_gpt_2,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train_gpt,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [136]:
trainer_gpt_2.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1484,0.187943,0.922388,0.560811,0.653543,0.491124
2,0.089,0.209641,0.927761,0.625387,0.655844,0.597633
3,0.0321,0.359288,0.916418,0.593023,0.582857,0.60355
4,0.0235,0.409433,0.926567,0.619195,0.649351,0.591716
5,0.0101,0.431822,0.92597,0.6,0.659574,0.550296


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-437
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-437/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-437/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-437/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-437/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClas

TrainOutput(global_step=2185, training_loss=0.08197649164930906, metrics={'train_runtime': 1060.6336, 'train_samples_per_second': 65.918, 'train_steps_per_second': 2.06, 'total_flos': 2.143577115030528e+16, 'train_loss': 0.08197649164930906, 'epoch': 5.0})

In [137]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_gpt_2.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.20964111387729645,
 'eval_accuracy': 0.9277611940298508,
 'eval_f1': 0.6253869969040248,
 'eval_precision': 0.6558441558441559,
 'eval_recall': 0.5976331360946746,
 'eval_runtime': 8.2874,
 'eval_samples_per_second': 202.115,
 'eval_steps_per_second': 12.67,
 'epoch': 5.0}

### Make predictions on official dev set

In [138]:
dev_set_preds_gpt_2, dev_set_labels_gpt_2, dev_set_metrics_gpt_2 = trainer_gpt_2.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [139]:
dev_set_metrics_gpt_2

{'dev_loss': 0.23594646155834198,
 'dev_accuracy': 0.9149952244508118,
 'dev_f1': 0.5594059405940593,
 'dev_precision': 0.551219512195122,
 'dev_recall': 0.5678391959798995,
 'dev_runtime': 10.5423,
 'dev_samples_per_second': 198.628,
 'dev_steps_per_second': 12.426}

In [140]:
dev_set_pred_labels_gpt_2 = np.argmax(dev_set_preds_gpt_2)

### Saving trained model

In [141]:
# save the best model
trainer_gpt_2.save_model(f'{results_folder}/deberta_frozen_gpt_2_run2')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_2_run2
Configuration saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_2_run2/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_2_run2/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_2_run2/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_2_run2/special_tokens_map.json


### Train with best hyperparameters on the augmented train data + chatgpt data (with modified classifier)

In [105]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 8
logging_steps = 100

In [106]:
training_args_gpt_clf = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-frozen-gpt-clf'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [107]:
trainer_gpt_clf = Trainer(
        args=training_args_gpt_clf,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train_gpt,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init_clf,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [108]:
trainer_gpt_clf.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
0,0.5363,0.459893,0.887761,0.06,0.193548,0.035503
1,0.4512,0.42547,0.90209,0.078652,0.777778,0.04142
2,0.4306,0.413329,0.912836,0.391667,0.661972,0.278107
3,0.4182,0.408445,0.923582,0.549296,0.678261,0.461538
4,0.411,0.406648,0.924179,0.588997,0.65,0.538462


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-109
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-109/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-109/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-109/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-109/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClas

TrainOutput(global_step=545, training_loss=0.4460966092730881, metrics={'train_runtime': 1034.8507, 'train_samples_per_second': 67.56, 'train_steps_per_second': 0.527, 'total_flos': 2.165545945239552e+16, 'train_loss': 0.4460966092730881, 'epoch': 5.0})

In [109]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_gpt_clf.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.40664806962013245,
 'eval_accuracy': 0.924179104477612,
 'eval_f1': 0.5889967637540453,
 'eval_precision': 0.65,
 'eval_recall': 0.5384615384615384,
 'eval_runtime': 8.2917,
 'eval_samples_per_second': 202.009,
 'eval_steps_per_second': 12.663,
 'epoch': 5.0}

### Make predictions on official dev set

In [110]:
dev_set_preds_gpt_clf, dev_set_labels_gpt_clf, dev_set_metrics_gpt_clf = trainer_gpt_clf.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [111]:
dev_set_metrics_gpt_clf

{'dev_loss': 0.40973177552223206,
 'dev_accuracy': 0.9212034383954155,
 'dev_f1': 0.5691906005221932,
 'dev_precision': 0.592391304347826,
 'dev_recall': 0.5477386934673367,
 'dev_runtime': 10.4887,
 'dev_samples_per_second': 199.644,
 'dev_steps_per_second': 12.49}

In [112]:
dev_set_pred_labels_gpt_clf = np.argmax(dev_set_preds_gpt_clf)

### Saving trained model

In [113]:
# save the best model
trainer_gpt_clf.save_model(f'{results_folder}/deberta_frozen_gpt_clf')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_clf
Configuration saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_clf/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_clf/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_clf/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_clf/special_tokens_map.json


### Train with best hyperparameters on the augmented train data + chatgpt data (with modified classifier) (grad accumulation step = 2)

In [142]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 2
logging_steps = 100

In [143]:
training_args_gpt_clf_2 = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-frozen-gpt-clf-2-run2'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [144]:
trainer_gpt_clf_2 = Trainer(
        args=training_args_gpt_clf_2,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train_gpt,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init_clf,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [145]:
trainer_gpt_clf_2.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4231,0.40764,0.922985,0.520446,0.7,0.414201
2,0.3938,0.396195,0.925373,0.485597,0.797297,0.349112
3,0.3777,0.391949,0.930149,0.611296,0.69697,0.544379
4,0.3612,0.392733,0.928358,0.565217,0.728972,0.461538
5,0.3584,0.388912,0.932537,0.608997,0.733333,0.52071


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-437
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-437/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-437/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-437/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-437/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClas

TrainOutput(global_step=2185, training_loss=0.39435667391499885, metrics={'train_runtime': 1069.5302, 'train_samples_per_second': 65.37, 'train_steps_per_second': 2.043, 'total_flos': 2.16650656461312e+16, 'train_loss': 0.39435667391499885, 'epoch': 5.0})

In [146]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_gpt_clf_2.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.39194872975349426,
 'eval_accuracy': 0.9301492537313433,
 'eval_f1': 0.611295681063123,
 'eval_precision': 0.696969696969697,
 'eval_recall': 0.5443786982248521,
 'eval_runtime': 8.3108,
 'eval_samples_per_second': 201.545,
 'eval_steps_per_second': 12.634,
 'epoch': 5.0}

### Make predictions on official dev set

In [147]:
dev_set_preds_gpt_clf_2, dev_set_labels_gpt_clf_2, dev_set_metrics_gpt_clf_2 = trainer_gpt_clf_2.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [148]:
dev_set_metrics_gpt_clf_2

{'dev_loss': 0.3965911269187927,
 'dev_accuracy': 0.9235912129894938,
 'dev_f1': 0.550561797752809,
 'dev_precision': 0.6242038216560509,
 'dev_recall': 0.49246231155778897,
 'dev_runtime': 10.5422,
 'dev_samples_per_second': 198.631,
 'dev_steps_per_second': 12.426}

In [149]:
dev_set_pred_labels_gpt_clf_2 = np.argmax(dev_set_preds_gpt_clf_2)

### Saving trained model

In [150]:
# save the best model
trainer_gpt_clf_2.save_model(f'{results_folder}/deberta_frozen_gpt_clf_2_run2')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_clf_2_run2
Configuration saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_clf_2_run2/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_clf_2_run2/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_clf_2_run2/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta_frozen_gpt_clf_2_run2/special_tokens_map.json


In [None]:
# ### Load trained model
# trained_model = AutoModelForSequenceClassification.from_pretrained(
#     f'{results_folder}/deberta_gpt', 
#     num_labels=2, 
#     id2label=id2label, 
#     label2id=label2id
# )


loading configuration file /content/drive/MyDrive/NLP/results/deberta_gpt/config.json
Model config DebertaConfig {
  "_name_or_path": "/content/drive/MyDrive/NLP/results/deberta_gpt",
  "architectures": [
    "DebertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",
  "t

### FINAL MODEL: Train with best hyperparameters on the entire train dataset (train_train_aug + train_dev) - gradient accumulation step = 2

In [157]:
pcl_df_train_train = pd.read_csv(f"{data_folder}/pcl_df_train_train_preprocessed.csv")
pcl_df_train_train_aug = pd.read_csv(f"{data_folder}/pcl_df_train_train_aug.csv")
pcl_df_train_train_gpt = pd.read_csv(f"{data_folder}/pcl_df_train_train_aug_chatgpt.csv")

pcl_df_train_dev = pd.read_csv(f"{data_folder}/pcl_df_train_dev_preprocessed.csv")
pcl_df_dev_pd = pd.read_csv(f"{data_folder}/pcl_df_dev_preprocessed.csv")


pcl_df_train_pd = pd.concat(
    [pcl_df_train_train_aug, pcl_df_train_dev], axis=0, ignore_index=True
)

In [158]:
pcl_df_train = pcl_df_train_pd[['text', 'class']]
pcl_df_dev = pcl_df_dev_pd[['text', 'class']]

In [159]:
pcl_df_train = datasets.Dataset.from_pandas(pcl_df_train)
pcl_df_dev = datasets.Dataset.from_pandas(pcl_df_dev)

In [160]:
pcl_df_train = pcl_df_train.map(
    tokenization, batched = True, batch_size = len(pcl_df_train)
)

pcl_df_dev = pcl_df_dev.map(
    tokenization, batched = True, batch_size = len(pcl_df_dev)
)

Map:   0%|          | 0/8375 [00:00<?, ? examples/s]

Map:   0%|          | 0/2094 [00:00<?, ? examples/s]

In [161]:
pcl_df_train.set_format(
    'torch', columns=['input_ids', 'attention_mask', 'class']
)

pcl_df_dev.set_format(
    'torch', columns=['input_ids', 'attention_mask', 'class']
)

In [162]:
pcl_df_train = pcl_df_train.rename_column("class", "label")
pcl_df_dev = pcl_df_dev.rename_column("class", "label")

In [163]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 2
logging_steps = 100

In [164]:
training_args_best = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-final'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [165]:
trainer_best = Trainer(
        args=training_args_best,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train,
        eval_dataset=pcl_df_dev,
        model_init=model_init,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [166]:
trainer_best.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2284,0.190793,0.925979,0.605598,0.613402,0.59799
2,0.168,0.180054,0.927412,0.621891,0.615764,0.628141
3,0.1355,0.200687,0.917861,0.59434,0.56,0.633166
4,0.1064,0.224061,0.917383,0.57284,0.563107,0.582915
5,0.0709,0.252049,0.916428,0.563591,0.559406,0.567839


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2094
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-262
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-262/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-262/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-262/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-262/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClas

TrainOutput(global_step=1310, training_loss=0.14853475803637323, metrics={'train_runtime': 664.7676, 'train_samples_per_second': 62.992, 'train_steps_per_second': 1.971, 'total_flos': 1.283877446784e+16, 'train_loss': 0.14853475803637323, 'epoch': 5.0})

In [167]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_best.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2094
  Batch size = 16


{'eval_loss': 0.1800541877746582,
 'eval_accuracy': 0.9274116523400191,
 'eval_f1': 0.6218905472636815,
 'eval_precision': 0.6157635467980296,
 'eval_recall': 0.628140703517588,
 'eval_runtime': 10.4412,
 'eval_samples_per_second': 200.552,
 'eval_steps_per_second': 12.546,
 'epoch': 5.0}

### Make predictions on official dev set

In [168]:
dev_set_preds_final, dev_set_labels_final, dev_set_metrics_final = trainer_best.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [169]:
dev_set_metrics_final

{'dev_loss': 0.1800541877746582,
 'dev_accuracy': 0.9274116523400191,
 'dev_f1': 0.6218905472636815,
 'dev_precision': 0.6157635467980296,
 'dev_recall': 0.628140703517588,
 'dev_runtime': 10.5266,
 'dev_samples_per_second': 198.925,
 'dev_steps_per_second': 12.445}

In [172]:
dev_set_preds_final

array([[ 0.739 , -0.727 ],
       [ 1.629 , -1.75  ],
       [ 0.485 , -0.5283],
       ...,
       [ 1.041 , -1.13  ],
       [ 2.074 , -2.096 ],
       [-0.9194,  1.135 ]], dtype=float16)

In [174]:
dev_set_pred_labels_final = np.argmax(dev_set_preds_final, axis=1)

In [177]:
dev_set_pred_labels_final.shape

(2094,)

In [178]:
pcl_df_dev_pd.shape

(2094, 9)

In [180]:
pcl_df_dev_pd["pred_class"] = dev_set_pred_labels_final

In [203]:
pcl_df_dev_pd.isna().sum()

par_id               0
art_id               0
keyword              0
country_code         0
text                 0
label                0
class                0
preprocessed_text    0
len_text             0
pred_class           0
dtype: int64

In [204]:
pcl_df_dev_pd.to_csv(f"{results_folder}/pcl_df_dev_w_preds.csv", index=False)

### Make predictions on the official test set

In [187]:
pcl_df_test_pd = pd.read_csv(f"{data_folder}/pcl_df_test_preprocessed.csv")

In [188]:
pcl_df_test_pd.columns

Index(['par_id', 'art_id', 'keyword', 'country_code', 'text'], dtype='object')

In [189]:
pcl_df_test = pcl_df_test_pd[['text']]

In [190]:
pcl_df_test = datasets.Dataset.from_pandas(pcl_df_test)

In [191]:
pcl_df_test = pcl_df_test.map(
    tokenization, batched = True, batch_size = len(pcl_df_test)
)

Map:   0%|          | 0/3832 [00:00<?, ? examples/s]

In [192]:
pcl_df_test.set_format(
    'torch', columns=['input_ids', 'attention_mask']
)

In [193]:
test_set_preds_final, test_set_labels_final, test_set_metrics_final = trainer_best.predict(
    pcl_df_test, metric_key_prefix="test"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 3832
  Batch size = 16


In [194]:
test_set_metrics_final

{'test_runtime': 18.6207,
 'test_samples_per_second': 205.792,
 'test_steps_per_second': 12.889}

In [195]:
test_set_preds_final

array([[ 2.854, -3.26 ],
       [ 0.478, -0.598],
       [ 2.295, -2.469],
       ...,
       [ 2.564, -2.74 ],
       [ 2.098, -2.209],
       [ 1.189, -1.275]], dtype=float16)

In [196]:
test_set_pred_labels_final = np.argmax(test_set_preds_final, axis=1)

In [197]:
test_set_pred_labels_final.shape

(3832,)

### Save predicted labels on dev set and test set

In [198]:
import numpy as np
np.savetxt(f"{results_folder}/dev.txt", dev_set_pred_labels_final)
np.savetxt(f"{results_folder}/test.txt", test_set_pred_labels_final)