### In this notebook I will do the following:
* Perform  standard fine-tuning with the BERT Transformer using the entire dataset or only a few samples.
* Run experiments across different datasets, nr of training examples and training seeds in order to have reliable results.
* Compare the results. 

In [None]:
%load_ext autoreload
%autoreload 2

* Import the libraries

In [8]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from datasets import load_dataset
from sklearn.metrics import accuracy_score
from helper_functions import get_data, preprocess_ade_sent
import warnings
from tqdm import tqdm
import numpy as np
from transformers import TrainingArguments
import pandas as pd
from transformers import Trainer
warnings.filterwarnings('ignore')

In [9]:
def tokenize_function(examples):
    """
        Tokenize the text and return the tokenized text
    """
    tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
    return tokenizer(examples["text"], truncation=True, padding='max_length')

def compute_metrics(eval_preds):
    """
        Compute the accuracy score using the predictions and the labels
    """
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    acc_score = accuracy_score(labels, predictions)
    return {'Accuracy' : acc_score}

def tokenize_data(df):
    """
        Tokenize the data and return the tokenized data
    """
    tokenized_df = df.map(tokenize_function, batched=True)
    tokenized_df = tokenized_df.remove_columns(column_names=['text', 'label_text'])
    return tokenized_df

def finetune_bert(train_df, test_df):
    """
        Fine tune the BERT model and return the trainer object
    """
    tokenized_train = tokenize_data(train_df)
    tokenized_test = tokenize_data(test_df)

    model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

    training_args = TrainingArguments(
        output_dir='model_results5',          # output directory
        overwrite_output_dir=True,       # overwrite the output_dir if it exists
        num_train_epochs=1,              # total number of training epochs
        learning_rate=2e-5,              # set learning rate 
        per_device_train_batch_size=8,  # batch size per device during training
        per_device_eval_batch_size=8,   # batch size for evaluation
        warmup_steps=20,                # number of warmup steps for learning rate scheduler
        weight_decay=0.01,               # strength of weight decay
        logging_dir=None,            # directory for storing logs
        logging_steps=50,             # nr of training/update sets to show logging
        evaluation_strategy='steps'
    )

    trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    compute_metrics=compute_metrics
    )

    tqdm(trainer.train())

    return trainer


    

In [10]:
def run_experiment_finetune_bert(seeds, dataset_dic, nr_ex_class, get_full_data=False):
    """
        Run the experiment for the standard fine-tuning of the BERT model
    """
    try:
        cols = ['dataset_name', 'nr_ex_per_class' ,'seed', 'model', 'accuracy']
        result_df = pd.DataFrame()

        for dataset_name in tqdm(dataset_dic):
            for nr in nr_ex_class:
                for seed in seeds: 
                    # get the training and test data
                    train_df, test_df = get_data(dataset_dic[dataset_name], seed=seed, 
                                                 nr_example_per_class=nr, get_full_data=get_full_data)
                    # finetune the BERT model
                    trainer = finetune_bert(train_df, test_df)
                    acc_score = trainer.evaluate()['eval_Accuracy']

                    # save the results
                    row_data = [dataset_name, nr ,seed, trainer, acc_score]
                    row_dic = {el1:[el2] for el1, el2 in zip(cols, row_data)}
                    result_df = pd.concat([result_df, pd.DataFrame.from_dict(row_dic)], ignore_index=True)
                    
        return result_df
    except Exception as e:
        print(f'Error MSG: {e}')
        return result_df


In [None]:
# load and preprocess the 3 datasets : ade, sent and sst2
ade_df = load_dataset('SetFit/ade_corpus_v2_classification')
sent_df = load_dataset('SetFit/SentEval-CR')
ade_df, sent_df = preprocess_ade_sent(ade_df, sent_df)
sst2_df = load_dataset('SetFit/sst2')
dataset_dic = {'ade':ade_df, 'sent':sent_df, 'sst2':sst2_df}

# initialize the seeds and the number of examples per class
seeds = [10, 20, 30, 40, 50]
nr_ex_class = [18, 50]


* Run experiments using few-shot training data.

In [None]:
result_df_fewshot = run_experiment_finetune_bert(seeds, dataset_dic, nr_ex_class)

In [21]:
# save the results
result_df_fewshot.to_csv('Results_DF/result_df_finetune_bert_fewshot.csv', index=False)

* Run experiments using training size = 800.

In [None]:
result_df_fulldata = run_experiment_finetune_bert(seeds, dataset_dic, nr_ex_class, get_full_data=True)

In [None]:
# save the results
result_df_fulldata.to_csv('Results_DF/result_df_finetune_bert_fulldata.csv', index=False)