## Setup

In [41]:
!pip install transformers torch datasets "ray[tune]"

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [42]:
from pathlib import Path

WORKING_ENV = 'COLAB' # Can be LABS, COLAB or PAPERSPACE

assert WORKING_ENV in ['COLAB', 'PAPERSPACE']

if WORKING_ENV == 'COLAB':
    from google.colab import drive
    %load_ext google.colab.data_table
    content_path = '/content/drive/MyDrive/'
    drive.mount('/content/drive/', force_remount=True) # Outputs will be saved in your google drive

else: # Using Paperspace
    # Paperspace does not properly render animated progress bars
    # Strongly recommend using the JupyterLab UI instead of theirs
    !pip install ipywidgets 
    content_path = '/notebooks'

content_path = Path(content_path)

The google.colab.data_table extension is already loaded. To reload it, use:
  %reload_ext google.colab.data_table
Mounted at /content/drive/


In [43]:
data_folder = f"{content_path}/NLP/data"
results_folder = f"{content_path}/NLP/results"
logging_folder = f"{content_path}/NLP/logs"
hp_search_folder = f"{content_path}/NLP/hp_search"

In [44]:
import pandas as pd
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding, AutoModelForSequenceClassification, DebertaTokenizer
import torch.nn as nn
import torch
import datasets
# from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from tqdm import tqdm
import os
import itertools

In [45]:
# from ray.tune.suggest.hyperopt import HyperOptSearch
# from ray.tune.schedulers import ASHAScheduler, PopulationBasedTraining
# from ray.tune import CLIReporter
# from ray import tune

In [46]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

## Load data

In [47]:
pcl_df_train_train = pd.read_csv(f"{data_folder}/pcl_df_train_train_preprocessed.csv")
pcl_df_train_train_aug = pd.read_csv(f"{data_folder}/pcl_df_train_train_aug.csv")
pcl_df_train_train_gpt = pd.read_csv(f"{data_folder}/pcl_df_train_train_aug_chatgpt.csv")


pcl_df_train_dev = pd.read_csv(f"{data_folder}/pcl_df_train_dev_preprocessed.csv")
pcl_df_dev = pd.read_csv(f"{data_folder}/pcl_df_dev_preprocessed.csv")

In [48]:
pcl_df_train_train.shape

(6700, 8)

In [49]:
pcl_df_train_train["class"].value_counts()

0    6075
1     625
Name: class, dtype: int64

In [50]:
pcl_df_train_train_aug.shape

(12150, 7)

In [51]:
pcl_df_train_train_aug["class"].value_counts()

0    6075
1    6075
Name: class, dtype: int64

In [52]:
pcl_df_train_train_gpt.shape

(13983, 7)

In [53]:
pcl_df_train_train_gpt["class"].value_counts()

1    7908
0    6075
Name: class, dtype: int64

In [54]:
pcl_df_train_train.columns

Index(['par_id', 'art_id', 'keyword', 'country_code', 'text', 'label', 'class',
       'preprocessed_text'],
      dtype='object')

In [55]:
pcl_df_train_train = pcl_df_train_train[['text', 'class']]
pcl_df_train_train_aug = pcl_df_train_train_aug[['text', 'class']]
pcl_df_train_train_gpt = pcl_df_train_train_gpt[['text', 'class']]


pcl_df_train_dev = pcl_df_train_dev[['text', 'class']]
pcl_df_dev = pcl_df_dev[['text', 'class']]


In [56]:
pcl_df_train_train = datasets.Dataset.from_pandas(pcl_df_train_train)
pcl_df_train_train_aug = datasets.Dataset.from_pandas(pcl_df_train_train_aug)
pcl_df_train_train_gpt = datasets.Dataset.from_pandas(pcl_df_train_train_gpt)

pcl_df_train_dev = datasets.Dataset.from_pandas(pcl_df_train_dev)
pcl_df_dev = datasets.Dataset.from_pandas(pcl_df_dev)

In [57]:
type(pcl_df_train_train)

datasets.arrow_dataset.Dataset

### Helper functions

In [58]:
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}

In [59]:
def model_init_clf():

    model = AutoModelForSequenceClassification.from_pretrained(
        "microsoft/deberta-base", 
        num_labels=2, 
        id2label=id2label, 
        label2id=label2id
    )

    model.classifier = torch.nn.Sequential(
        torch.nn.Linear(768, 1024),
        torch.nn.BatchNorm1d(1024),
        torch.nn.Dropout(0.2),
        torch.nn.ReLU(),
        torch.nn.Linear(1024, 256),
        torch.nn.BatchNorm1d(256),
        torch.nn.Dropout(0.2),
        torch.nn.ReLU(),
        torch.nn.Linear(256, 64),
        torch.nn.BatchNorm1d(64),
        torch.nn.Dropout(0.2),
        torch.nn.ReLU(),  
        torch.nn.Linear(64, 2),
        torch.nn.Softmax(dim=-1)
    )

    return model


def model_init():

    model = AutoModelForSequenceClassification.from_pretrained(
        "microsoft/deberta-base", 
        num_labels=2, 
        id2label=id2label, 
        label2id=label2id
    )

    return model


tokenizer = DebertaTokenizer.from_pretrained("microsoft/deberta-base")

loading file vocab.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/vocab.json
loading file merges.txt from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/merges.txt
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at None
loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/tokenizer_config.json
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 

In [60]:
def tokenization(batched_text):
    return tokenizer(
        batched_text['text'], 
        padding = 'max_length', 
        truncation=True, 
        max_length = 512
    )

In [61]:
# define accuracy metrics
def compute_metrics(pred):
    preds, labels = pred
    preds = np.argmax(preds, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average='binary'
    )
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

### Tokenization

In [62]:
pcl_df_train_train = pcl_df_train_train.map(
    tokenization, batched = True, batch_size = len(pcl_df_train_train)
)

pcl_df_train_train_aug = pcl_df_train_train_aug.map(
    tokenization, batched = True, batch_size = len(pcl_df_train_train_aug)
)

pcl_df_train_train_gpt = pcl_df_train_train_gpt.map(
    tokenization, batched = True, batch_size = len(pcl_df_train_train_gpt)
)


pcl_df_train_dev = pcl_df_train_dev.map(
    tokenization, batched = True, batch_size = len(pcl_df_train_dev)
)

pcl_df_dev = pcl_df_dev.map(
    tokenization, batched = True, batch_size = len(pcl_df_dev)
)


Exception in thread Thread-16:
Traceback (most recent call last):
  File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.8/dist-packages/tensorboard/summary/writer/event_file_writer.py", line 247, in run
    self._record_writer.flush()
  File "/usr/local/lib/python3.8/dist-packages/tensorboard/summary/writer/record_writer.py", line 43, in flush
    self._writer.flush()
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/lib/io/file_io.py", line 221, in flush
    self._writable_file.flush()
tensorflow.python.framework.errors_impl.FailedPreconditionError: /content/drive/MyDrive/NLP/logs/events.out.tfevents.1678118948.bab67a06e9e0.46919.2; Transport endpoint is not connected


Map:   0%|          | 0/6700 [00:00<?, ? examples/s]

Map:   0%|          | 0/12150 [00:00<?, ? examples/s]

Map:   0%|          | 0/13983 [00:00<?, ? examples/s]

Map:   0%|          | 0/1675 [00:00<?, ? examples/s]

Map:   0%|          | 0/2094 [00:00<?, ? examples/s]

In [63]:
pcl_df_train_train.set_format(
    'torch', columns=['input_ids', 'attention_mask', 'class']
)

pcl_df_train_train_aug.set_format(
    'torch', columns=['input_ids', 'attention_mask', 'class']
)

pcl_df_train_train_gpt.set_format(
    'torch', columns=['input_ids', 'attention_mask', 'class']
)



pcl_df_train_dev.set_format(
    'torch', columns=['input_ids', 'attention_mask', 'class']
)
pcl_df_dev.set_format(
    'torch', columns=['input_ids', 'attention_mask', 'class']
)


In [64]:
pcl_df_train_train = pcl_df_train_train.rename_column("class", "label")
pcl_df_train_train_aug= pcl_df_train_train_aug.rename_column("class", "label")
pcl_df_train_train_gpt = pcl_df_train_train_gpt.rename_column("class", "label")


pcl_df_train_dev = pcl_df_train_dev.rename_column("class", "label")
pcl_df_dev = pcl_df_dev.rename_column("class", "label")

### Grid search

In [25]:
learning_rate_vals = [1e-5, 2e-5]
weight_decay_vals = [0.1, 0.01]
per_device_train_batch_size_vals = [16, 32]
warmup_steps_vals = [0, 200]

In [26]:
for learning_rate, weight_decay, per_device_train_batch_size, warmup_steps in list(
    itertools.product(
    learning_rate_vals, weight_decay_vals, 
    per_device_train_batch_size_vals, warmup_steps_vals)
    ):
  
  print(learning_rate, weight_decay, per_device_train_batch_size, warmup_steps)

2e-05 0.1 16 0
2e-05 0.1 16 200
2e-05 0.1 32 0
2e-05 0.1 32 200
2e-05 0.01 16 0
2e-05 0.01 16 200
2e-05 0.01 32 0
2e-05 0.01 32 200


In [27]:
experiment_lr = []
experiment_wd = []
experiment_train_batch_size = []
experiment_warmup = []

experiment_acc = []
experiment_precision = []
experiment_recall = []
experiment_f1 = []

for learning_rate, weight_decay, per_device_train_batch_size, warmup_steps in tqdm(
    list(
    itertools.product(
    learning_rate_vals, weight_decay_vals, 
    per_device_train_batch_size_vals, warmup_steps_vals)
    )):
    
    training_args = TrainingArguments(
        output_dir=hp_search_folder, 
        learning_rate=learning_rate,  # config
        warmup_steps=warmup_steps, #config
        weight_decay=weight_decay,  # config
        per_device_train_batch_size=per_device_train_batch_size,  # config
        num_train_epochs=10,
        per_device_eval_batch_size=16, 
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        gradient_accumulation_steps=8,
        logging_steps=100,
        logging_dir=logging_folder,
    )

    trainer = Trainer(
        args=training_args,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init_clf,
        compute_metrics=compute_metrics,
    )

    trainer.train()

    metrics = trainer.evaluate()

    experiment_lr.append(learning_rate)
    experiment_wd.append(weight_decay)
    experiment_train_batch_size.append(per_device_train_batch_size)
    experiment_warmup.append(warmup_steps)
    experiment_acc.append(metrics['eval_accuracy'])
    experiment_precision.append(metrics['eval_precision'])
    experiment_recall.append(metrics['eval_recall'])
    experiment_f1.append(metrics['eval_f1'])


  0%|          | 0/8 [00:00<?, ?it/s]loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4

Epoch,Training Loss,Validation Loss


  0%|          | 0/8 [01:23<?, ?it/s]


KeyboardInterrupt: ignored

In [None]:
grid_search_results = pd.DataFrame({
    'learning_rate': experiment_lr,
    'weight_decay': experiment_wd,
    'per_device_train_batch_size': experiment_train_batch_size,
    'warmup_steps': experiment_warmup,
    'accuracy': experiment_acc,
    'precision': experiment_precision,
    'recall': experiment_recall,
    'f1': experiment_f1
})

In [None]:
# get the best hyperparameters with highest f1 score
grid_search_results = grid_search_results.sort_values(by='f1', ascending=False)
grid_search_results.to_csv(f"{results_folder}_grid_search_results.csv", index=False)

# get the first row of the dataframe
best_hyperparameters = grid_search_results.iloc[0]

# get the best hyperparameters
best_learning_rate = best_hyperparameters['learning_rate']
best_weight_decay = best_hyperparameters['weight_decay']
best_per_device_train_batch_size = int(best_hyperparameters['per_device_train_batch_size'])
best_warmup_steps = int(best_hyperparameters['warmup_steps'])

In [None]:
best_hyperparameters

### Train with best hyperparameters on the original train data (without modified classifier)

In [65]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 8
logging_steps = 100

In [66]:
training_args_og = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-og'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [67]:
trainer_og = Trainer(
        args=training_args_og,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [68]:
trainer_og.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
0,No log,0.226401,0.912239,0.316279,0.73913,0.201183
1,0.282100,0.197442,0.92,0.507353,0.669903,0.408284
2,0.282100,0.212624,0.921194,0.52518,0.669725,0.431953
3,0.148400,0.218819,0.922388,0.518519,0.693069,0.414201
4,0.148400,0.225651,0.920597,0.536585,0.652542,0.455621


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-52
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-52/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-52/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-52/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-52/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassific

TrainOutput(global_step=260, training_loss=0.18767846180842473, metrics={'train_runtime': 571.2245, 'train_samples_per_second': 58.646, 'train_steps_per_second': 0.455, 'total_flos': 1.0257529279905792e+16, 'train_loss': 0.18767846180842473, 'epoch': 4.99})

In [69]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_og.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.2256513237953186,
 'eval_accuracy': 0.9205970149253732,
 'eval_f1': 0.5365853658536586,
 'eval_precision': 0.652542372881356,
 'eval_recall': 0.4556213017751479,
 'eval_runtime': 8.3648,
 'eval_samples_per_second': 200.245,
 'eval_steps_per_second': 12.553,
 'epoch': 4.99}

### Make predictions on official dev set

In [70]:
dev_set_preds_og, dev_set_labels_og, dev_set_metrics_og = trainer_og.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [71]:
dev_set_metrics_og

{'dev_loss': 0.2107057124376297,
 'dev_accuracy': 0.9235912129894938,
 'dev_f1': 0.550561797752809,
 'dev_precision': 0.6242038216560509,
 'dev_recall': 0.49246231155778897,
 'dev_runtime': 10.543,
 'dev_samples_per_second': 198.615,
 'dev_steps_per_second': 12.425}

In [72]:
dev_set_pred_labels_og = np.argmax(dev_set_preds_og)

### Saving trained model

In [73]:
# save the best model
trainer_og.save_model(f'{results_folder}/deberta_og')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta_og
Configuration saved in /content/drive/MyDrive/NLP/results/deberta_og/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta_og/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta_og/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta_og/special_tokens_map.json


### Train with best hyperparameters on the original train data

In [74]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 8
logging_steps = 100

In [75]:
training_args = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [76]:
trainer = Trainer(
        args=training_args,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init_clf,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [77]:
trainer.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
0,No log,0.625831,0.899104,0.0,0.0,0.0
1,0.656800,0.624853,0.899104,0.0,0.0,0.0
2,0.656800,0.598154,0.899701,0.023256,0.666667,0.011834
3,0.637400,0.600669,0.865672,0.380165,0.35567,0.408284
4,0.637400,0.61265,0.833433,0.449704,0.337278,0.674556


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-52
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-52/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-52/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-52/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-52/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. I

TrainOutput(global_step=260, training_loss=0.6420766537006085, metrics={'train_runtime': 570.8137, 'train_samples_per_second': 58.688, 'train_steps_per_second': 0.455, 'total_flos': 1.036725218131968e+16, 'train_loss': 0.6420766537006085, 'epoch': 4.99})

In [78]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.6126503348350525,
 'eval_accuracy': 0.8334328358208956,
 'eval_f1': 0.44970414201183434,
 'eval_precision': 0.33727810650887574,
 'eval_recall': 0.6745562130177515,
 'eval_runtime': 8.3784,
 'eval_samples_per_second': 199.92,
 'eval_steps_per_second': 12.532,
 'epoch': 4.99}

### Make predictions on official dev set

In [79]:
dev_set_preds, dev_set_labels, dev_set_metrics = trainer.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [80]:
dev_set_metrics

{'dev_loss': 0.6138441562652588,
 'dev_accuracy': 0.8247373447946514,
 'dev_f1': 0.41279999999999994,
 'dev_precision': 0.3028169014084507,
 'dev_recall': 0.6482412060301508,
 'dev_runtime': 10.5319,
 'dev_samples_per_second': 198.824,
 'dev_steps_per_second': 12.438}

In [81]:
dev_set_pred_labels = np.argmax(dev_set_preds)

### Saving trained model

In [82]:
# save the best model
trainer.save_model(f'{results_folder}/deberta')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta
Configuration saved in /content/drive/MyDrive/NLP/results/deberta/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta/special_tokens_map.json


### Train with best hyperparameters on the augmented train data (without chatgpt data)

In [83]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 8
logging_steps = 100

In [84]:
training_args_aug = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-aug'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [85]:
trainer_aug = Trainer(
        args=training_args_aug,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train_aug,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init_clf,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [86]:
trainer_aug.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.455971,0.887761,0.06,0.193548,0.035503
2,0.520700,0.448133,0.90209,0.203883,0.567568,0.12426
3,0.447700,0.435982,0.917612,0.546053,0.614815,0.491124
4,0.423400,0.418351,0.921791,0.501901,0.702128,0.390533
5,0.407600,0.416931,0.917612,0.543046,0.616541,0.485207


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-95
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-95/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-95/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-95/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-95/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassific

TrainOutput(global_step=475, training_loss=0.4418833883185136, metrics={'train_runtime': 965.0952, 'train_samples_per_second': 62.947, 'train_steps_per_second': 0.492, 'total_flos': 1.882504094976e+16, 'train_loss': 0.4418833883185136, 'epoch': 5.0})

In [87]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_aug.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.4359823167324066,
 'eval_accuracy': 0.9176119402985075,
 'eval_f1': 0.5460526315789473,
 'eval_precision': 0.6148148148148148,
 'eval_recall': 0.4911242603550296,
 'eval_runtime': 8.3922,
 'eval_samples_per_second': 199.591,
 'eval_steps_per_second': 12.512,
 'epoch': 5.0}

### Make predictions on official dev set

In [88]:
dev_set_preds_aug, dev_set_labels_aug, dev_set_metrics_aug = trainer_aug.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [89]:
dev_set_metrics_aug

{'dev_loss': 0.4416044056415558,
 'dev_accuracy': 0.9173829990448902,
 'dev_f1': 0.5181058495821728,
 'dev_precision': 0.58125,
 'dev_recall': 0.46733668341708545,
 'dev_runtime': 10.5645,
 'dev_samples_per_second': 198.211,
 'dev_steps_per_second': 12.4}

In [90]:
dev_set_pred_labels_aug = np.argmax(dev_set_preds_aug)

### Saving trained model

In [91]:
# save the best model
trainer_aug.save_model(f'{results_folder}/deberta_aug')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta_aug
Configuration saved in /content/drive/MyDrive/NLP/results/deberta_aug/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta_aug/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta_aug/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta_aug/special_tokens_map.json


### Train with best hyperparameters on the augmented train data + chatgpt data

In [92]:
# hyperparameters
# lr = best_learning_rate
# weight_decay = best_weight_decay
# train_batch_size = best_per_device_train_batch_size
# warmup_steps = best_warmup_steps
# eval_batch_size = 16
# gradient_accumulation_steps = 8
# logging_steps = 100

lr = 2e-05
weight_decay = 0.1
train_batch_size = 16
warmup_steps = 0
eval_batch_size = 16
gradient_accumulation_steps = 8
logging_steps = 100

In [93]:
training_args_gpt = TrainingArguments(
    output_dir = results_folder,
    num_train_epochs = 5,
    per_device_train_batch_size = train_batch_size,
    learning_rate = lr,
    gradient_accumulation_steps = gradient_accumulation_steps,    
    per_device_eval_batch_size= eval_batch_size,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=True,
    metric_for_best_model = 'eval_f1',
    greater_is_better = True,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_steps = logging_steps,
    fp16 = True,
    logging_dir=logging_folder,
    dataloader_num_workers = 0,
    run_name = 'deberta-classification-gpt'
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [94]:
trainer_gpt = Trainer(
        args=training_args_gpt,
        tokenizer=tokenizer,
        train_dataset=pcl_df_train_train_gpt,
        eval_dataset=pcl_df_train_dev,
        model_init=model_init_clf,
        compute_metrics=compute_metrics,
    )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

In [95]:
trainer_gpt.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--deberta-base/snapshots/0d1b43ccf21b5acd9f4e5f7b077fa698f05cf195/config.json
Model config DebertaConfig {
  "_name_or_path": "microsoft/deberta-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "c2p",
    "p2c"
  ],
  "position_biased_input": false,
  "relative_attention": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "voc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
0,0.5275,0.443908,0.898507,0.0,0.0,0.0
1,0.4425,0.421078,0.918806,0.358491,0.883721,0.224852
2,0.4206,0.41019,0.921194,0.426087,0.803279,0.289941
3,0.407,0.405032,0.922388,0.545455,0.666667,0.461538
4,0.3949,0.403793,0.920597,0.536585,0.652542,0.455621


The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/NLP/results/checkpoint-109
Configuration saved in /content/drive/MyDrive/NLP/results/checkpoint-109/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/checkpoint-109/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/checkpoint-109/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/checkpoint-109/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClas

TrainOutput(global_step=545, training_loss=0.43446609427075866, metrics={'train_runtime': 1112.7087, 'train_samples_per_second': 62.833, 'train_steps_per_second': 0.49, 'total_flos': 2.165545945239552e+16, 'train_loss': 0.43446609427075866, 'epoch': 5.0})

In [96]:
# evaluate the model on eval_dataset=pcl_df_train_dev, this should give the 
# best performance found during the training process
trainer_gpt.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1675
  Batch size = 16


{'eval_loss': 0.40503227710723877,
 'eval_accuracy': 0.9223880597014925,
 'eval_f1': 0.5454545454545455,
 'eval_precision': 0.6666666666666666,
 'eval_recall': 0.46153846153846156,
 'eval_runtime': 8.3583,
 'eval_samples_per_second': 200.4,
 'eval_steps_per_second': 12.562,
 'epoch': 5.0}

### Make predictions on official dev set

In [97]:
dev_set_preds_gpt, dev_set_labels_gpt, dev_set_metrics_gpt = trainer_gpt.predict(
    pcl_df_dev, metric_key_prefix="dev"
)

The following columns in the test set don't have a corresponding argument in `DebertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DebertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2094
  Batch size = 16


In [98]:
dev_set_metrics_gpt

{'dev_loss': 0.4062204957008362,
 'dev_accuracy': 0.9259789875835721,
 'dev_f1': 0.5373134328358209,
 'dev_precision': 0.6617647058823529,
 'dev_recall': 0.45226130653266333,
 'dev_runtime': 10.569,
 'dev_samples_per_second': 198.127,
 'dev_steps_per_second': 12.395}

In [99]:
dev_set_pred_labels_gpt = np.argmax(dev_set_preds_gpt)

### Saving trained model

In [100]:
# save the best model
trainer_gpt.save_model(f'{results_folder}/deberta_gpt')

Saving model checkpoint to /content/drive/MyDrive/NLP/results/deberta_gpt
Configuration saved in /content/drive/MyDrive/NLP/results/deberta_gpt/config.json
Model weights saved in /content/drive/MyDrive/NLP/results/deberta_gpt/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/NLP/results/deberta_gpt/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/NLP/results/deberta_gpt/special_tokens_map.json
