In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install jupyter-lsp==2.2.0

In [None]:
!pip install comet_ml transformers tensorflow-text sacrebleu bert_score rouge_score datasets sentencepiece deep-translator

In [None]:
import pandas as pd
import numpy as np
import torch
from comet_ml import Experiment
from tqdm.auto import tqdm
from datasets import load_metric

import tensorflow_hub as hub
import tensorflow as tf
import tensorflow_text as text  # Needed for loading universal-sentence-encoder-cmlm/multilingual-preprocess
import numpy as np

from transformers import get_cosine_schedule_with_warmup, get_constant_schedule_with_warmup
from transformers.models.bart.modeling_bart import shift_tokens_right
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

import pickle

In [None]:
# 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
from huggingface_hub import notebook_login
notebook_login()

In [None]:
def read_dataset(input_path):
  df = pd.read_parquet(input_path)
  return df[["#1 String", "#2 String"]]

In [None]:
from transformers import AutoConfig, AutoModelForCausalLM

# Download configuration from huggingface.co and cache.
config = AutoConfig.from_pretrained("ThanhJamieAI/ParapharseV8_8E_4B")
model = AutoModelForCausalLM.from_config(config)

In [None]:
def preprocess_dataset(input_path: str, sep: str, select_columns: list, rename_columns: list):
    if input_path.split(".")[1] == "txt":
        df = pd.read_csv(input_path, sep=sep, quoting=csv.QUOTE_NONE)
    elif input_path.split(".")[1] == "parquet":
        df = pd.read_parquet(input_path)
    else:
        df = pd.read_csv(input_path, sep=sep)

    if len(select_columns) == 0:
        return df
    elif len(rename_columns) == 0:
        return df[select_columns]
    elif len(select_columns) == len(rename_columns):
        for i in range(0, len(select_columns)):
            df.rename(columns={select_columns[i]: rename_columns[i]}, inplace=True)
        return df[rename_columns]
    else:
        return df

In [None]:
data_test = pd.read_parquet("/kaggle/input/1111111/train_quality_dataset11111.parquet")
# data_test['#1 String'] = data_test['#1 String'].astype('str')
# data_test['#2 String'] = data_test['#2 String'].astype('str')
a = data_test.iloc[0:35000]

In [None]:
a.to_parquet('/kaggle/working/test.parquet')

In [None]:
def tokenize_and_prepare_dataset(raw_dataframe, tokenizer, model):
  data_x = tokenizer(raw_dataframe['#1 String'].tolist(), padding='max_length',
                     max_length=raw_dataframe['#1 String'].map(lambda x: len(x)).max(), return_tensors='pt')

  data_y = None

  with tokenizer.as_target_tokenizer():
      data_y = tokenizer(raw_dataframe['#2 String'].tolist(), padding='max_length',
                              max_length=raw_dataframe['#2 String'].map(lambda x: len(x)).max(), return_tensors='np')
  data_y = data_y['input_ids']
  data_y[data_y == model.config.pad_token_id] = -100

  data_x['labels'] = torch.tensor(data_y)

#   data_x['decoder_input_ids'] = shift_tokens_right(data_x['labels'], model.config.pad_token_id, model.config.decoder_start_token_id)

  return data_x

In [None]:
def tokenize_and_prepare_test_dataset(raw_dataframe, tokenizer):
  data_x = tokenizer(raw_dataframe['#1 String'].tolist(), padding='max_length',
                     max_length=raw_dataframe['#1 String'].map(lambda x: len(x)).max(), return_tensors='pt')

  data_y = None
  with tokenizer.as_target_tokenizer():
      data_y = tokenizer(raw_dataframe['#2 String'].tolist(), add_special_tokens=False)

  return data_x, data_y['input_ids'], raw_dataframe['#2 String'].tolist(), raw_dataframe['#1 String'].tolist()

In [None]:
class DatasetFromDictData(torch.utils.data.Dataset):
  def __init__(self, data, transform=None, target_transform=None):
    self.data = data
    self.transform = transform
    self.target_transform = target_transform

  def __len__(self):
    if 'input_ids' in self.data:
      return len(self.data['input_ids'])
    else:
      return len(self.data[list(self.data.keys())[0]])

  def __getitem__(self, idx):
    return {key: self.data[key][idx] for key in self.data.keys()}

In [None]:
def train(model, dataloader, experiment, hyperparameters, tokenized_labels, do_logging=True, validation_dataloader=None):

  optimizer = torch.optim.AdamW(model.parameters(), lr=hyperparameters['lr_rate'])# lr=0.0005)

  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
  model.to(device)

  num_training_steps = hyperparameters['num_epochs'] * len(dataloader)

  #lr_scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=int(0.05*num_training_steps), num_training_steps=num_training_steps)
  lr_scheduler = get_constant_schedule_with_warmup(optimizer, num_warmup_steps=int(hyperparameters['num_warmup_steps_percent_of_all']*num_training_steps))

  progress_bar = tqdm(range(num_training_steps))

  global_step = 0

  experiment.log_parameters(hyperparameters)

  model.train()
  for epoch in range(hyperparameters['num_epochs']):
    running_avg = 0.0
    running_avg_loss = 0.0
    epoch_step = 0

    if epoch == 0 and validation_dataloader is not None:
      # do a validation run before training
      rouge_score, sacrebleu_score, bertscore_score, labse_score, _ = evaluate(model, validation_dataloader, device, None, untokenized_labels)

      to_log = get_metrics_dict(rouge_score, sacrebleu_score, bertscore_score)
      to_log['labse_score'] = labse_score

      experiment.log_metrics(to_log, epoch=epoch)

    for batch in dataloader:
      batch = {k: v.to(device) for k, v in batch.items()}

      outputs = model(**batch)
      loss = outputs.loss
      loss.backward()

      optimizer.step()
      lr_scheduler.step()
      optimizer.zero_grad()

      progress_bar.update(1)

      if not do_logging:
        del batch
        continue

      preds = torch.argmax(outputs.logits.detach(), dim=-1)
      labels_np = batch['labels'].detach()

      num_correct = (labels_np[labels_np != -100] == preds[labels_np != -100]).sum().item()
      num_total = labels_np[labels_np != -100].numel()

      del batch
      del preds
      del labels_np

      #num_correct = (preds == batch['labels'] and batch['labels'] != -100).sum().item()
      #num_total = batch['labels'][batch['labels'] != -100].numel()

      batch_acc = (float(num_correct)/num_total)

      epoch_step += 1
      global_step += 1

      the_loss = loss.item()

      running_avg = ((epoch_step-1)/float(epoch_step))*running_avg + batch_acc/float(epoch_step)
      running_avg_loss = ((epoch_step-1)/float(epoch_step))*running_avg_loss + the_loss/float(epoch_step)

      progress_bar.set_postfix_str(f"batch_loss={the_loss:.5f} epoch_running_avg_loss={running_avg_loss:.5f} batch_acc={batch_acc:.5f}\tepoch_running_avg_acc={running_avg:.5f}", refresh=True)

      experiment.log_metric('batch_acc', batch_acc, step=global_step)
      experiment.log_metric(f'batch_acc_epoch_{epoch}', batch_acc, step=epoch_step)

      experiment.log_metric('batch_loss', the_loss, step=global_step)

      experiment.log_metric('per_epoch_running_avg_acc', running_avg, step=global_step)
      experiment.log_metric('per_epoch_running_avg_loss', running_avg_loss, step=global_step)

    if validation_dataloader is not None:
      rouge_score, sacrebleu_score, bertscore_score, labse_score, _ = evaluate(model, validation_dataloader, device, None, untokenized_labels)

      to_log = get_metrics_dict(rouge_score, sacrebleu_score, bertscore_score)
      to_log['labse_score'] = labse_score

      experiment.log_metrics(to_log, epoch=epoch+1)


In [None]:
def train_grad_acc(model, dataloader, experiment, hyperparameters, tokenized_labels, do_logging=True, validation_dataloader=None):

  optimizer = torch.optim.AdamW(model.parameters(), lr=hyperparameters['lr_rate'])# lr=0.0005)

  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
  model.to(device)

  num_training_steps = hyperparameters['num_epochs'] * len(dataloader)
  effective_num_training_steps = hyperparameters['num_epochs'] * (len(dataloader)/hyperparameters['num_grad_acc_steps'])

  #lr_scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=int(0.05*num_training_steps), num_training_steps=effective_num_training_steps)
  lr_scheduler = get_constant_schedule_with_warmup(optimizer, num_warmup_steps=int(hyperparameters['num_warmup_steps_percent_of_all']*effective_num_training_steps))

  progress_bar = tqdm(range(num_training_steps))

  global_step = 0

  experiment.log_parameters(hyperparameters)

  model.train()
  for epoch in range(hyperparameters['num_epochs']):
    running_avg = 0.0
    running_avg_loss = 0.0
    epoch_step = 0

    if epoch == 0 and validation_dataloader is not None:
      # do a validation run before training
      rouge_score, sacrebleu_score, bertscore_score, labse_score, _ = evaluate(model, validation_dataloader, device, None, untokenized_labels)

      to_log = get_metrics_dict(rouge_score, sacrebleu_score, bertscore_score)
      to_log['labse_score'] = labse_score

      experiment.log_metrics(to_log, epoch=epoch)

    for batch_num, batch in enumerate(dataloader):
      batch = {k: v.to(device) for k, v in batch.items()}

      outputs = model(**batch)
      loss = outputs.loss / hyperparameters['num_grad_acc_steps']
      loss.backward()

      if ((batch_num+1) % hyperparameters['num_grad_acc_steps']) == 0 or batch_num+1 == len(dataloader):
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

      progress_bar.update(1)

      if not do_logging:
        del batch
        continue

      preds = torch.argmax(outputs.logits.detach(), dim=-1)
      labels_np = batch['labels'].detach()

      num_correct = (labels_np[labels_np != -100] == preds[labels_np != -100]).sum().item()
      num_total = labels_np[labels_np != -100].numel()

      del batch
      del preds
      del labels_np

      #num_correct = (preds == batch['labels'] and batch['labels'] != -100).sum().item()
      #num_total = batch['labels'][batch['labels'] != -100].numel()

      batch_acc = (float(num_correct)/num_total)

      epoch_step += 1
      global_step += 1

      the_loss = loss.item()

      running_avg = ((epoch_step-1)/float(epoch_step))*running_avg + batch_acc/float(epoch_step)
      running_avg_loss = ((epoch_step-1)/float(epoch_step))*running_avg_loss + the_loss/float(epoch_step)

      progress_bar.set_postfix_str(f"batch_loss={the_loss:.5f} epoch_running_avg_loss={running_avg_loss:.5f} batch_acc={batch_acc:.5f}\tepoch_running_avg_acc={running_avg:.5f}", refresh=True)

      experiment.log_metric('batch_acc', batch_acc, step=global_step)
      experiment.log_metric(f'batch_acc_epoch_{epoch}', batch_acc, step=epoch_step)

      experiment.log_metric('batch_loss', the_loss, step=global_step)

      experiment.log_metric('per_epoch_running_avg_acc', running_avg, step=global_step)
      experiment.log_metric('per_epoch_running_avg_loss', running_avg_loss, step=global_step)

    if validation_dataloader is not None:
      rouge_score, sacrebleu_score, bertscore_score, labse_score, _ = evaluate(model, validation_dataloader, device, None, untokenized_labels)

      to_log = get_metrics_dict(rouge_score, sacrebleu_score, bertscore_score)
      to_log['labse_score'] = labse_score

      experiment.log_metrics(to_log, epoch=epoch+1)


In [None]:
def calculate_labSE_score(outputs, labels):

  def normalization(embeds):
    norms = np.linalg.norm(embeds, 2, axis=1, keepdims=True)
    return embeds/norms

  preprocessor = hub.KerasLayer(
      "https://tfhub.dev/google/universal-sentence-encoder-cmlm/multilingual-preprocess/2")
  encoder = hub.KerasLayer("https://tfhub.dev/google/LaBSE/2")

  dataset = DatasetFromDictData({'outputs': outputs, 'labels': labels})
  dataloader = torch.utils.data.DataLoader(dataset, batch_size=8)

  similarities = []

  for batch in dataloader:
    predictions = tf.constant(batch['outputs'])
    references = tf.constant(batch['labels'])

    predictions_embeds = encoder(preprocessor(predictions))["default"]
    references_embeds = encoder(preprocessor(references))["default"]

    # For semantic similarity tasks, apply l2 normalization to embeddings
    predictions_embeds = normalization(predictions_embeds)
    references_embeds = normalization(references_embeds)

    similarities.extend(np.matmul(predictions_embeds, np.transpose(references_embeds)).diagonal().tolist())

  return np.mean(similarities)

In [None]:
def get_preds_and_labels_for_eval_test(model, evaluation_dataloader, device):
  decoded_outputs = []
  tokenized_outputs = []

  eval_progress_bar = tqdm(range(len(evaluation_dataloader)))

  with torch.no_grad():
    model.eval()
    for batch in evaluation_dataloader:
      batch = {k: v.to(device) for k, v in batch.items()}

      curr_outp = model.generate(**batch, max_length=300, num_beams=3, early_stopping=True)

      del batch

      decoded_output = tokenizer.batch_decode(curr_outp.tolist(), skip_special_tokens=True)

      # dirty trick to get rid of the special tokens and have the raw tokens instead of decoded output
      tokenized_output = tokenizer(decoded_output, add_special_tokens=False)['input_ids']

      decoded_outputs.extend(decoded_output)
      tokenized_outputs.extend(tokenized_output)

      eval_progress_bar.update(1)

  return decoded_outputs, tokenized_outputs

In [None]:
def evaluate(model, evaluation_dataloader, device, tokenized_labels, untokenized_labels, untokenized_inputs=None):

  decoded_outputs, tokenized_outputs = get_preds_and_labels_for_eval_test(model, evaluation_dataloader, device)
  references_for_sacrebleu = None

  # if there are more than 1500 samples, reduce them to 1500 by random sampling
  # to have a reasonable run time.
  if len(tokenized_labels) > 5000:
    print(f'{len(tokenized_labels)} samples, reducing down to 5000')
  do = np.array(decoded_outputs, dtype=object)
  to = np.array(tokenized_outputs, dtype=object)
  tl = np.array(tokenized_labels, dtype=object)
  ul = np.array(untokenized_labels, dtype=object)
  ui = np.array(untokenized_inputs, dtype=object)

  indicies = list(range(0, len(tokenized_labels)))
  np.random.shuffle(indicies)

  indicies = indicies[0:4999]

  decoded_outputs = do[indicies].tolist()
  tokenized_outputs = to[indicies].tolist()
  tokenized_labels = tl[indicies].tolist()
  untokenized_labels = ul[indicies].tolist()
  untokenized_inputs = ui[indicies].tolist()

  label_references_for_sacrebleu = [[lab] for lab in untokenized_labels]
  input_references_for_sacrebleu = [[inp] for inp in untokenized_inputs]

  print(f'new len:{len(tokenized_labels)}')

  rouge = load_metric('rouge')
  sacrebleu = load_metric('sacrebleu')
  bertscore = load_metric('bertscore')

  label_output_rouge_score = rouge.compute(predictions=decoded_outputs, references=untokenized_labels, rouge_types=['rouge1', 'rouge2', 'rouge3', 'rouge4', 'rougeL', 'rougeLsum'])
  print('rouge1 computed')
  label_output_sacrebleu_score = sacrebleu.compute(predictions=decoded_outputs, references=label_references_for_sacrebleu)
  print('sacrebleu1 computed')
  label_output_bertscore_score = bertscore.compute(predictions=decoded_outputs, references=untokenized_labels, lang='en')
  print('bertscore1 computed')
  label_output_labse_score = calculate_labSE_score(decoded_outputs, untokenized_labels)
  print('labsescore1 computed')

  if untokenized_inputs is not None:
    print('vleze')
    references_for_sacrebleu = [[lab] for lab in untokenized_inputs]

    input_output_rouge_score = rouge.compute(predictions=decoded_outputs, references=untokenized_inputs, rouge_types=['rouge1', 'rouge2', 'rouge3', 'rouge4', 'rougeL', 'rougeLsum'])
    print('rouge2 computed')
    input_output_sacrebleu_score = sacrebleu.compute(predictions=decoded_outputs, references=input_references_for_sacrebleu)
    print('sacrebleu2 computed')
    input_output_bertscore_score = bertscore.compute(predictions=decoded_outputs, references=untokenized_inputs, lang='en')
    print('bertscore2 computed')
    input_output_labse_score = calculate_labSE_score(decoded_outputs, untokenized_inputs)
    print('labsescore2 computed')

    return label_output_rouge_score, label_output_sacrebleu_score, label_output_bertscore_score, label_output_labse_score, input_output_rouge_score, input_output_sacrebleu_score, input_output_bertscore_score, input_output_labse_score, decoded_outputs, untokenized_inputs, untokenized_labels
  else:
    print('else')
    return label_output_rouge_score, label_output_sacrebleu_score, label_output_bertscore_score, label_output_labse_score, decoded_outputs

In [None]:
def evaluate_directly(device, untokenized_labels, untokenized_inputs, untokenized_outputs):

  label_references_for_sacrebleu = [[lab] for lab in untokenized_labels]
  input_references_for_sacrebleu = [[inp] for inp in untokenized_inputs]

  print(f'new len:{len(untokenized_labels)}')

  rouge = load_metric('rouge')
  sacrebleu = load_metric('sacrebleu')
  bertscore = load_metric('bertscore')

  label_output_rouge_score = rouge.compute(predictions=untokenized_outputs, references=untokenized_labels, rouge_types=['rouge1', 'rouge2', 'rouge3', 'rouge4', 'rougeL', 'rougeLsum'])
  print('rouge1 computed')
  label_output_sacrebleu_score = sacrebleu.compute(predictions=untokenized_outputs, references=label_references_for_sacrebleu)
  print('sacrebleu1 computed')
  label_output_bertscore_score = bertscore.compute(predictions=untokenized_outputs, references=untokenized_labels, lang='en')
  print('bertscore1 computed')
  label_output_labse_score = calculate_labSE_score(untokenized_outputs, untokenized_labels)
  print('labsescore1 computed')

  if untokenized_inputs is not None:
    print('vleze')
    references_for_sacrebleu = [[lab] for lab in untokenized_inputs]

    input_output_rouge_score = rouge.compute(predictions=untokenized_outputs, references=untokenized_inputs, rouge_types=['rouge1', 'rouge2', 'rouge3', 'rouge4', 'rougeL', 'rougeLsum'])
    print('rouge2 computed')
    input_output_sacrebleu_score = sacrebleu.compute(predictions=untokenized_outputs, references=input_references_for_sacrebleu)
    print('sacrebleu2 computed')
    input_output_bertscore_score = bertscore.compute(predictions=untokenized_outputs, references=untokenized_inputs, lang='en')
    print('bertscore2 computed')
    input_output_labse_score = calculate_labSE_score(untokenized_outputs, untokenized_inputs)
    print('labsescore2 computed')

    return label_output_rouge_score, label_output_sacrebleu_score, label_output_bertscore_score, label_output_labse_score, input_output_rouge_score, input_output_sacrebleu_score, input_output_bertscore_score, input_output_labse_score
  else:
    print('else')
    return label_output_rouge_score, label_output_sacrebleu_score, label_output_bertscore_score, label_output_labse_score

In [None]:
def get_metrics_dict(label_output_rouge_score, label_output_sacrebleu_score, label_output_bertscore_score, label_output_labse_score,
                     input_output_rouge_score, input_output_sacrebleu_score, input_output_bertscore_score, input_output_labse_score):
  metrics = {}
  for rouge_score_type, rouge_score in zip(['input_output', 'label_output'], [input_output_rouge_score, label_output_rouge_score]):
    for suffix in ['1', '2', '3', '4', 'L', 'Lsum']:
      metrics[f'{rouge_score_type}_rouge{suffix}_precision'] = rouge_score[f'rouge{suffix}'].mid.precision
      metrics[f'{rouge_score_type}_rouge{suffix}_recall'] = rouge_score[f'rouge{suffix}'].mid.recall
      metrics[f'{rouge_score_type}_rouge{suffix}_fmeasure'] = rouge_score[f'rouge{suffix}'].mid.fmeasure

  metrics['input_output_sacrebleu_score'] = input_output_sacrebleu_score['score']
  metrics['label_output_sacrebleu_score'] = label_output_sacrebleu_score['score']

  metrics['input_output_bertscore_f1'] = np.mean(input_output_bertscore_score['f1'])
  metrics['input_output_bertscore_precision'] = np.mean(input_output_bertscore_score['precision'])
  metrics['input_output_bertscore_recall'] = np.mean(input_output_bertscore_score['recall'])

  metrics['label_output_bertscore_f1'] = np.mean(label_output_bertscore_score['f1'])
  metrics['label_output_bertscore_precision'] = np.mean(label_output_bertscore_score['precision'])
  metrics['label_output_bertscore_recall'] = np.mean(label_output_bertscore_score['recall'])

  return metrics

In [None]:
experiments = {
    'ParapharseV13_10E_4B': {
        'experiment_name': 'Paraphraser_Thesis',
        'model_name': 'ThanhJamieAI/ParapharseV8_8E_4B',
        'hyperparameters': {
            'num_epochs': 10,
            'batch_size': 4,
            'optimizer': 'AdamW',
            'dataset': 'combine-',
            'lr_rate': 1e-4,
            'num_warmup_steps_percent_of_all': 0.1,
            'lr_schedule': 'get_constant_schedule_with_warmup',
            'num_grad_acc_steps': 8
        },
        'read_dataset_function' : read_dataset,
        'dataset_path_train': '/kaggle/input/final-train-parapharse/train_set/train_quality_dataset.parquet',
        'dataset_path_valid': '/kaggle/input/final-train-parapharse/validation_set/validation_dataset.parquet',
        'dataset_preprocessing_function': None,
        'training_function': train_grad_acc,
        'tokenizer_class': PegasusTokenizer,
        'model_class': PegasusForConditionalGeneration
    },

}

In [None]:
# import torch

# os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [None]:
torch.cuda.empty_cache()

In [None]:
comet.end()
torch.cuda.empty_cache()

In [None]:
for experiment_name, experiment_data in experiments.items():
  comet = Experiment("eBzP9Wi5IQwGgIShopGb65swt", project_name='Capstone_ProjectFPTU_Fa23')
  comet.set_name(experiment_name)

  comet.log_parameters(experiment_data['hyperparameters'])

  tokenizer = experiment_data['tokenizer_class'].from_pretrained(experiment_data['model_name'])
  model = experiment_data['model_class'].from_pretrained(experiment_data['model_name'])
#   model = PegasusForCausalLM.from_pretrained(model_name_or_path)
#   model = get_peft_model(model, peft_config)
  dataset_train = experiment_data['read_dataset_function'](experiment_data['dataset_path_train'])
  dataset_valid = experiment_data['read_dataset_function'](experiment_data['dataset_path_valid'])

  if experiment_data['dataset_preprocessing_function'] is not None:
    # preprocess the data with the given preprocessing function
    experiment_data['dataset_preprocessing_function'](dataset_train)
    experiment_data['dataset_preprocessing_function'](dataset_valid)

  data_train = tokenize_and_prepare_dataset(dataset_train, tokenizer, model)
  data_val, tokenized_labels, untokenized_labels, untokenized_input = tokenize_and_prepare_test_dataset(dataset_valid, tokenizer)

  train_dataset = DatasetFromDictData(data_train)
  train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=experiment_data['hyperparameters']['batch_size'], num_workers=2)

  val_dataset = DatasetFromDictData(data_val)
  val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=experiment_data['hyperparameters']['batch_size'], num_workers=2)

  # call the corresponding training function as given in the experiment dict
  experiment_data['training_function'](model, train_dataloader, comet, experiment_data['hyperparameters'], tokenized_labels, do_logging=True)
  torch.save(model.state_dict(), f'/kaggle/working/{comet.get_name().replace("/", "_")}.pt')

  # free GPU RAM
  torch.cuda.empty_cache()

  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

  label_output_rouge_score, label_output_sacrebleu_score, label_output_bertscore_score, label_output_labse_score, input_output_rouge_score, input_output_sacrebleu_score, input_output_bertscore_score, input_output_labse_score, untokenized_output, untokenized_input, untokenized_labels = evaluate(model, val_dataloader, device, tokenized_labels, untokenized_labels, untokenized_input)

  # Save data for later computation of RougeWE score
  dict_to_save = {
      'experiment_url': comet.url,
      'experiment_name': comet.get_name(),
      'experiment_key': comet.get_key(),
      'inputs_strings': untokenized_input,
      'model_outputs_strings': untokenized_output,
      'labels_strings': untokenized_labels
  }

  with open(f'/kaggle/working/{comet.get_name().replace("/", "_")}.pickle', 'wb+') as handle:
      pickle.dump(dict_to_save, handle, protocol=pickle.HIGHEST_PROTOCOL)

  to_log = get_metrics_dict(label_output_rouge_score, label_output_sacrebleu_score, label_output_bertscore_score, label_output_labse_score,
  input_output_rouge_score, input_output_sacrebleu_score, input_output_bertscore_score, input_output_labse_score)

  to_log['label_output_labse_score'] = label_output_labse_score
  to_log['input_output_labse_score'] = input_output_labse_score

  comet.log_metrics(to_log, epoch=experiment_data['hyperparameters']['num_epochs'] + 1)

  for key in list(to_log.keys()):
    to_log[f'test_{key}'] = to_log.pop(key)

  comet.log_metrics(to_log)

  comet.log_others(to_log)

  comet.log_other('test_full_input_output_bleu_score', str(input_output_sacrebleu_score))
  comet.log_other('test_full_label_output_bleu_score', str(label_output_sacrebleu_score))

  comet.end()

In [None]:
torch.cuda.empty_cache()

In [None]:
# tokenizer.save_pretrained("/kaggle/working/")


In [None]:

# from huggingface_hub import notebook_login
# notebook_login()

In [None]:
!huggingface-cli repo create "ParapharseV13_10E_4B"

In [None]:
# /kaggle/working/ParapharseV6_8E_2B.pt

In [None]:
# i = input()

In [None]:
model.push_to_hub(repo_id = 'ParapharseV13_10E_4B')
tokenizer.push_to_hub(repo_id = 'ParapharseV13_10E_4B')
