In [2]:
#@title Project Setup

from google.colab import drive
drive.mount('/content/drive')

PROJECT_PATH = "/content/drive/MyDrive/TWM/" #@param {"type": "string"}
DATASET_PATH = "DataEngineering/FinalDataset/large/" #@param {"type": "string"}

DATASET_PATH = PROJECT_PATH + DATASET_PATH
LOCAL_DIR = "FineTuning/T5/" #@param {"type": "string"}
LOCAL_DIR =  PROJECT_PATH + LOCAL_DIR

SEED = 512 #@param {"type": "integer"}

print("Installing dependencies... ")
!pip install -q sentencepiece
!pip install -q torch>=1.7.0,!=1.8.0
!pip install -q transformers==4.16.2
!pip install -q pytorch-lightning==1.5.10
!pip install swifter -q
!pip install evaluate -q
!pip install bert-score -q

# parallize the apply function of pandas
import swifter

# to show the full dialogues in the dataframes
import pandas as pd
pd.set_option('max_colwidth', 1000)

# tensorboard setup
%pylab inline
%load_ext tensorboard

print("Runtime info:- ")
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Mounted at /content/drive
Installing dependencies... 
[K     |████████████████████████████████| 1.2 MB 8.4 MB/s 
[K     |████████████████████████████████| 3.5 MB 8.2 MB/s 
[K     |████████████████████████████████| 101 kB 13.4 MB/s 
[K     |████████████████████████████████| 596 kB 65.9 MB/s 
[K     |████████████████████████████████| 880 kB 67.9 MB/s 
[K     |████████████████████████████████| 6.6 MB 43.0 MB/s 
[?25h  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 527 kB 7.4 MB/s 
[K     |████████████████████████████████| 419 kB 60.9 MB/s 
[K     |████████████████████████████████| 829 kB 50.3 MB/s 
[K     |████████████████████████████████| 140 kB 67.7 MB/s 
[K     |████████████████████████████████| 952 kB 58.7 MB/s 
[K     |████████████████████████████████| 1.1 MB 57.4 MB/s 
[K     |████████████████████████████████| 271 kB 74.1 MB/s 
[K     |████████████████████████████████| 144 kB 59.0 MB/s 
[K     |████████████████

# Code Setup

In [3]:
#@title Imports

import os
import multiprocessing

import torch
import numpy as np

from transformers import (
    T5ForConditionalGeneration,
    MT5ForConditionalGeneration,
    ByT5Tokenizer,
    PreTrainedTokenizer,
    T5TokenizerFast as T5Tokenizer,
    MT5TokenizerFast as MT5Tokenizer,
)

from transformers import AutoTokenizer
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModelWithLMHead, AutoTokenizer
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks.progress import TQDMProgressBar


import matplotlib.pyplot as plt

plt.style.use("seaborn")

In [4]:
#@title PytorchDataset
class PyTorchDataset(Dataset):
    """  PyTorch Dataset class  """
    def __init__(
        self,
        data: pd.DataFrame,
        tokenizer: PreTrainedTokenizer,
        source_max_token_len: int = 512,
        target_max_token_len: int = 512,
    ):
        """
        initiates a PyTorch Dataset Module for input data
        Args:
            data (pd.DataFrame): input pandas dataframe. Dataframe must have 2 column --> "source_text" and "target_text"
            tokenizer (PreTrainedTokenizer): a PreTrainedTokenizer (T5Tokenizer, MT5Tokenizer, or ByT5Tokenizer)
            source_max_token_len (int, optional): max token length of source text. Defaults to 512.
            target_max_token_len (int, optional): max token length of target text. Defaults to 512.
        """
        self.tokenizer = tokenizer
        self.data = data
        self.source_max_token_len = source_max_token_len
        self.target_max_token_len = target_max_token_len

    def __len__(self):
        """ returns length of data """
        return len(self.data)

    def __getitem__(self, index: int):
        """ returns dictionary of input tensors to feed into T5/MT5 model"""

        data_row = self.data.iloc[index]
        source_text = data_row["source_text"]

        source_text_encoding = self.tokenizer(
            source_text,
            max_length=self.source_max_token_len,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            add_special_tokens=True,
            return_tensors="pt",
        )

        target_text_encoding = self.tokenizer(
            data_row["target_text"],
            max_length=self.target_max_token_len,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            add_special_tokens=True,
            return_tensors="pt",
        )

        labels = target_text_encoding["input_ids"]
        # to make sure we have correct labels for T5 text generation
        labels[labels == 0] = -100

        return {
            "source_text_input_ids": source_text_encoding["input_ids"].flatten(),
            "source_text_attention_mask": source_text_encoding["attention_mask"].flatten(),
            "labels": labels.flatten(),
            "labels_attention_mask": target_text_encoding["attention_mask"].flatten(),
        }

In [5]:
#@title LightningDataModule
class LightningDataModule(pl.LightningDataModule):
    """ PyTorch Lightning data class """

    def __init__(
        self,
        train_df: pd.DataFrame,
        test_df: pd.DataFrame,
        eval_df: pd.DataFrame,
        tokenizer: PreTrainedTokenizer,
        batch_size: int = 4,
        source_max_token_len: int = 512,
        target_max_token_len: int = 512,
        num_workers: int = 2,
    ):
        """
        initiates a PyTorch Lightning Data Module
        Args:
            train_df (pd.DataFrame): training dataframe. Dataframe must contain 2 columns --> "source_text" & "target_text"
            test_df (pd.DataFrame): validation dataframe. Dataframe must contain 2 columns --> "source_text" & "target_text"
            tokenizer (PreTrainedTokenizer): PreTrainedTokenizer (T5Tokenizer, MT5Tokenizer, or ByT5Tokenizer)
            batch_size (int, optional): batch size. Defaults to 4.
            source_max_token_len (int, optional): max token length of source text. Defaults to 512.
            target_max_token_len (int, optional): max token length of target text. Defaults to 512.
        """
        super().__init__()

        self.train_df = train_df
        self.test_df = test_df
        self.eval_df = test_df
        self.batch_size = batch_size
        self.tokenizer = tokenizer
        self.source_max_token_len = source_max_token_len
        self.target_max_token_len = target_max_token_len
        self.num_workers = num_workers

    def setup(self, stage=None):
        self.train_dataset = PyTorchDataset(
            self.train_df,
            self.tokenizer,
            self.source_max_token_len,
            self.target_max_token_len,
        )
        self.test_dataset = PyTorchDataset(
            self.test_df,
            self.tokenizer,
            self.source_max_token_len,
            self.target_max_token_len,
        )
        self.val_dataset = PyTorchDataset(
            self.eval_df,
            self.tokenizer,
            self.source_max_token_len,
            self.target_max_token_len,
        )

    def train_dataloader(self):
        """ training dataloader """
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=self.num_workers,
        )

    def test_dataloader(self):
        """ test dataloader """
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
        )

    def val_dataloader(self):
        """ validation dataloader """
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
        )


In [6]:
# @title LightningModel

class LightningModel(pl.LightningModule):
    """ PyTorch Lightning Model class"""

    def __init__(
        self,
        tokenizer,
        model,
        checkpoint_name: str,
        output_dir: str,
        save_only_last_epoch: bool = False,
        learning_rate: float = 0.0001,
    ):
        """
        initiates a PyTorch Lightning Model
        Args:
            tokenizer : T5/MT5/ByT5 tokenizer
            model : T5/MT5/ByT5 model
            output_dir (str, optional): output directory to save model checkpoints. Defaults to "outputs".
            save_only_last_epoch (bool, optional): If True, save just the last epoch else models are saved for every epoch
        """
        super().__init__()
        self.model = model
        self.tokenizer = tokenizer
        self.output_dir = output_dir
        self.average_training_loss = None
        self.average_validation_loss = None
        self.save_only_last_epoch = save_only_last_epoch
        self.learning_rate = learning_rate
        self.checkpoint_name = checkpoint_name

    def forward(self, input_ids, attention_mask, decoder_attention_mask, labels=None):
        """ forward step """
        output = self.model(
            input_ids,
            attention_mask=attention_mask,
            labels=labels,
            decoder_attention_mask=decoder_attention_mask,
        )

        return output.loss, output.logits

    def training_step(self, batch, batch_size):
        """ training step """
        input_ids = batch["source_text_input_ids"]
        attention_mask = batch["source_text_attention_mask"]
        labels = batch["labels"]
        labels_attention_mask = batch["labels_attention_mask"]

        loss, outputs = self(
            input_ids=input_ids,
            attention_mask=attention_mask,
            decoder_attention_mask=labels_attention_mask,
            labels=labels,
        )

        self.log(
            "train_loss", loss, prog_bar=True, logger=True, on_epoch=True, on_step=True
        )
        return loss

    def validation_step(self, batch, batch_size):
        """ validation step """
        input_ids = batch["source_text_input_ids"]
        attention_mask = batch["source_text_attention_mask"]
        labels = batch["labels"]
        labels_attention_mask = batch["labels_attention_mask"]

        loss, outputs = self(
            input_ids=input_ids,
            attention_mask=attention_mask,
            decoder_attention_mask=labels_attention_mask,
            labels=labels,
        )

        self.log(
            "val_loss", loss, prog_bar=True, logger=True, on_epoch=True, on_step=True
        )
        return loss

    def test_step(self, batch, batch_size):
        """ test step """
        input_ids = batch["source_text_input_ids"]
        attention_mask = batch["source_text_attention_mask"]
        labels = batch["labels"]
        labels_attention_mask = batch["labels_attention_mask"]

        loss, outputs = self(
            input_ids=input_ids,
            attention_mask=attention_mask,
            decoder_attention_mask=labels_attention_mask,
            labels=labels,
        )

        self.log("test_loss", loss, prog_bar=True, logger=True,)
        return loss

    def configure_optimizers(self):
        """ configure optimizers """
        return AdamW(self.parameters(), lr=self.learning_rate)

    def training_epoch_end(self, training_step_outputs):
        """ save tokenizer and model on epoch end """
        self.average_training_loss = np.round(
            torch.mean(torch.stack([x["loss"] for x in training_step_outputs])).item(),
            4,
        )

        path = f"{self.output_dir}/{self.checkpoint_name}/t5-epoch-{self.current_epoch}-tloss-{str(self.average_training_loss)}-vloss-{str(self.average_validation_loss)}"
        if self.save_only_last_epoch:
            if self.current_epoch == self.trainer.max_epochs - 1:
                self.tokenizer.save_pretrained(path)
                self.model.save_pretrained(path)
        else:
            self.tokenizer.save_pretrained(path)
            self.model.save_pretrained(path)

    def validation_epoch_end(self, validation_step_outputs):
        _loss = [x.cpu() for x in validation_step_outputs]
        self.average_validation_loss = np.round(
            torch.mean(torch.stack(_loss)).item(),
            4,
        )


In [7]:
#@title SimpleT5 class
class SimpleT5:
    """ Custom SimpleT5 class """

    def __init__(self,
        checkpoint_name: str,
        output_dir: str,
        learning_rate: float,
    ) -> None:
        """ initiates SimpleT5 class """
        self.learning_rate = learning_rate
        self.output_dir = output_dir
        self.checkpoint_name = checkpoint_name
        self.trainer = None

    def from_pretrained(self, model_type="t5", model_name="t5-small") -> None:
        """
        loads T5/MT5 Model model for training/finetuning
        Args:
            model_type (str, optional): "t5" or "mt5" . Defaults to "t5".
            model_name (str, optional): exact model architecture name, "t5-base" or "t5-large". Defaults to "t5-base".
        """
        if model_type == "t5":
            self.tokenizer = T5Tokenizer.from_pretrained(f"{model_name}")
            self.model = T5ForConditionalGeneration.from_pretrained(
                f"{model_name}", return_dict=True
            )
        elif model_type == "mt5":
            self.tokenizer = MT5Tokenizer.from_pretrained(f"{model_name}")
            self.model = MT5ForConditionalGeneration.from_pretrained(
                f"{model_name}", return_dict=True
            )
        elif model_type == "byt5":
            self.tokenizer = ByT5Tokenizer.from_pretrained(f"{model_name}")
            self.model = T5ForConditionalGeneration.from_pretrained(
                f"{model_name}", return_dict=True
            )

    def train(
        self,
        train_df: pd.DataFrame,
        test_df: pd.DataFrame,
        eval_df: pd.DataFrame,
        source_max_token_len: int = 512,
        target_max_token_len: int = 512,
        batch_size: int = 8,
        max_epochs: int = 5,
        use_gpu: bool = True,
        early_stopping_patience_epochs: int = 0,  # 0 to disable early stopping feature
        precision=32,
        logger="default",
        dataloader_num_workers: int = 2,
        save_only_last_epoch: bool = False,
    ):
        """
        trains T5/MT5 model on custom dataset
        Args:
            train_df (pd.DataFrame): training datarame. Dataframe must have 2 column --> "source_text" and "target_text"
            test_df ([type], optional): test datarame. Dataframe must have 2 column --> "source_text" and "target_text"
            eval_df ([type], optional): validation datarame. Dataframe must have 2 column --> "source_text" and "target_text"
            source_max_token_len (int, optional): max token length of source text. Defaults to 512.
            target_max_token_len (int, optional): max token length of target text. Defaults to 512.
            batch_size (int, optional): batch size. Defaults to 8.
            max_epochs (int, optional): max number of epochs. Defaults to 5.
            use_gpu (bool, optional): if True, model uses gpu for training. Defaults to True.
            output_dir (str, optional): output directory to save model checkpoints. Defaults to "outputs".
            early_stopping_patience_epochs (int, optional): monitors val_loss on epoch end and stops training, if val_loss does not improve after the specied number of epochs. set 0 to disable early stopping. Defaults to 0 (disabled)
            precision (int, optional): sets precision training - Double precision (64), full precision (32) or half precision (16). Defaults to 32.
            logger (pytorch_lightning.loggers) : any logger supported by PyTorch Lightning. Defaults to "default". If "default", pytorch lightning default logger is used.
            dataloader_num_workers (int, optional): number of workers in train/test/val dataloader
            save_only_last_epoch (bool, optional): If True, saves only the last epoch else models are saved at every epoch
        """
        self.data_module = LightningDataModule(
            train_df,
            test_df,
            eval_df,
            self.tokenizer,
            batch_size=batch_size,
            source_max_token_len=source_max_token_len,
            target_max_token_len=target_max_token_len,
            num_workers=dataloader_num_workers,
        )

        self.T5Model = LightningModel(
            tokenizer=self.tokenizer,
            model=self.model,
            checkpoint_name=self.checkpoint_name,
            output_dir=self.output_dir,
            save_only_last_epoch=save_only_last_epoch,
        )

        # add callbacks
        callbacks = [TQDMProgressBar(refresh_rate=5)]

        if early_stopping_patience_epochs > 0:
            early_stop_callback = EarlyStopping(
                monitor="val_loss",
                min_delta=0.01,
                patience=early_stopping_patience_epochs,
                verbose=True,
                mode="min",
            )
            callbacks.append(early_stop_callback)

        # add gpu support
        gpus = 1 if use_gpu else 0

        # add logger
        loggers = True if logger == "default" else logger

        # prepare trainer
        self.trainer = pl.Trainer(
            logger=loggers,
            callbacks=callbacks,
            max_epochs=max_epochs,
            gpus=gpus,
            precision=precision,
            log_every_n_steps=1,
        )

        # fit trainer
        self.trainer.fit(self.T5Model, self.data_module)

    def load_model(
        self, model_type: str = "t5", model_dir: str = "outputs", use_gpu: bool = False
    ):
        """
        loads a checkpoint for inferencing/prediction
        Args:
            model_type (str, optional): "t5" or "mt5". Defaults to "t5".
            model_dir (str, optional): path to model directory. Defaults to "outputs".
            use_gpu (bool, optional): if True, model uses gpu for inferencing/prediction. Defaults to True.
        """
        if model_type == "t5":
            self.model = T5ForConditionalGeneration.from_pretrained(f"{model_dir}")
            self.tokenizer = T5Tokenizer.from_pretrained(f"{model_dir}")
        elif model_type == "mt5":
            self.model = MT5ForConditionalGeneration.from_pretrained(f"{model_dir}")
            self.tokenizer = MT5Tokenizer.from_pretrained(f"{model_dir}")
        elif model_type == "byt5":
            self.model = T5ForConditionalGeneration.from_pretrained(f"{model_dir}")
            self.tokenizer = ByT5Tokenizer.from_pretrained(f"{model_dir}")

        if use_gpu:
            if torch.cuda.is_available():
                self.device = torch.device("cuda")
            else:
                raise "exception ---> no gpu found. set use_gpu=False, to use CPU"
        else:
            self.device = torch.device("cpu")

        self.model = self.model.to(self.device)

    def predict(
        self,
        source_text: str,
        max_length: int = 512,
        num_return_sequences: int = 1,
        num_beams: int = 2,
        top_k: int = 50,
        top_p: float = 0.95,
        do_sample: bool = True,
        repetition_penalty: float = 2.5,
        length_penalty: float = 1.0,
        early_stopping: bool = True,
        skip_special_tokens: bool = True,
        clean_up_tokenization_spaces: bool = True,
    ):
        """
        generates prediction for T5/MT5 model
        Args:
            source_text (str): any text for generating predictions
            max_length (int, optional): max token length of prediction. Defaults to 512.
            num_return_sequences (int, optional): number of predictions to be returned. Defaults to 1.
            num_beams (int, optional): number of beams. Defaults to 2.
            top_k (int, optional): Defaults to 50.
            top_p (float, optional): Defaults to 0.95.
            do_sample (bool, optional): Defaults to True.
            repetition_penalty (float, optional): Defaults to 2.5.
            length_penalty (float, optional): Defaults to 1.0.
            early_stopping (bool, optional): Defaults to True.
            skip_special_tokens (bool, optional): Defaults to True.
            clean_up_tokenization_spaces (bool, optional): Defaults to True.
        Returns:
            list[str]: returns predictions
        """
        input_ids = self.tokenizer.encode(
            source_text, return_tensors="pt", add_special_tokens=True
        )
        input_ids = input_ids.to(self.device)
        generated_ids = self.model.generate(
            input_ids=input_ids,
            num_beams=num_beams,
            max_length=max_length,
            repetition_penalty=repetition_penalty,
            length_penalty=length_penalty,
            early_stopping=early_stopping,
            top_p=top_p,
            top_k=top_k,
            num_return_sequences=num_return_sequences,
        )
        preds = [
            self.tokenizer.decode(
                g,
                skip_special_tokens=skip_special_tokens,
                clean_up_tokenization_spaces=clean_up_tokenization_spaces,
            )
            for g in generated_ids
        ]
        return preds

# Executing the Code

In [8]:
# @title ## Loading the dataset
#@markdown The local index refers to the index of the row in it's corresponding dataset
train_df = pd.read_csv(DATASET_PATH + "train.csv").rename(columns={"Unnamed: 0":"local_idx"})
eval_df = pd.read_csv(DATASET_PATH + "dev.csv").rename(columns={"Unnamed: 0":"local_idx"})
test_df = pd.read_csv(DATASET_PATH + "test.csv").rename(columns={"Unnamed: 0":"local_idx"})

# drop this dataset since it's too noisy
train_df = train_df[train_df['dataset']!='Cornell Movie--Dialogs Corpus']
eval_df  = eval_df [eval_df ['dataset']!='Cornell Movie--Dialogs Corpus']
test_df  = test_df [test_df ['dataset']!='Cornell Movie--Dialogs Corpus']

train_df = train_df[train_df['topic']=='Food & Drink']
eval_df  = eval_df [eval_df ['topic']=='Food & Drink']
test_df  = test_df [test_df ['topic']=='Food & Drink']

# train_df = train_df.iloc[:10]
# eval_df  = eval_df.iloc[:10]
# test_df  = test_df.iloc[:10]

print("Train dataframe shape:", train_df.shape)
print("Eval dataframe shape:", eval_df.shape)
print("Test dataframe shape:", test_df.shape)

train_df.head(5)

Train dataframe shape: (15093, 6)
Eval dataframe shape: (3201, 6)
Test dataframe shape: (3252, 6)


Unnamed: 0,local_idx,dialogue,dataset,topic,subtopic1,subtopic2
0,16671,"#Person1#:Find a general practitioner.\n#Person2#:Where?\n#Person1#:In Napa.\n#Person2#:I found 9. Alex De Moraes, MD is in Napa and a general practitioner.\n#Person1#:Ok.\n#Person2#:Do you want to visit them?\n#Person1#:No, get a cab there now for 1, to 935 Trancas Street.\n#Person2#:Ride type?\n#Person1#:Regular.\n#Person2#:Please confirm: regular ride for 1 to 935 Trancas Street.\n#Person1#:Yes.\n#Person2#:Your ride is on its way.\n#Person1#:What's the cost and when will it arrive?\n#Person2#:It's $23.54 and will arrive in 7 minutes.\n#Person1#:Thanks, find a restaurant near there.\n#Person2#:Cuisine? Do you like Mexican, Chinese, something else?\n#Person1#:Mexican is good.\n#Person2#:Don Perico Mexican Restaurant is in Napa.\n#Person1#:Good, make a reservation for 17:15 on the 13th of March.\n#Person2#:Please confirm: table for 2 at Don Perico Mexican restaurant in Napa at 5:15 pm on March 13th.\n#Person1#:Make it 7 pm for 1.\n#Person2#:Please confirm: table for 1 at 7 pm.\n#Pe...",Schema-Guided Dialogue,Food & Drink,Restaurants,unknown
1,2531,"#Person1#:I'm looking for a restaurant in the cheap price range and in the north side of town.\n#Person2#:We have an Italian and an Indian restaurant that are both inexpensive in the north part of town. Would you like the addresses of those?\n#Person1#:I am really looking for italian food.\n#Person2#:Well here is Da Vinci Pizzeria in the north. It's cheap. 20 Milton Road Chesterton.\n#Person1#:Yes, that would be fine. Can you book a table for 4 at 16:00 on Wednesday?\n#Person2#:Absolutely. You're booked for 16:00 and the reference number is I3WAD456 . Can I help you with anything else today?\n#Person1#:Great! Yes, I'll also need to find a hotel with free parking and free wifi.\n#Person2#:I would recommend the ashley hotel.\n#Person1#:Actually, I apologize for not being more specific, I want to stay in a guesthouse\n#Person2#:Is there a price range or area you prefer?\n#Person1#:Yes I am looking for hotel that is cheap have free wifi and free parking.\n#Person2#:There are 9 cheaply ...",MultiWOZ 2.2,Food & Drink,Restaurants,unknown
4,4884,"#Person1#:I'm looking for a British restaurant on the west side.\n#Person2#:There are 3 restaurants that meet your criteria. Is there a particular price range you are wanting to stick with? There are two expensive ones and one moderate priced one.\n#Person1#:Could you recommend one of the expensive ones?\n#Person2#:Graffiti is on the west side of town, serves a wonderful British menu and falls in the expensive price range. Would you like to reserve a table?\n#Person1#:Yes. I need a booking for thursday at 17:00 for 8 people.\n#Person2#:Booking was successful. Can I get you anything else, then?\n#Person1#:Yes I need the reference number please.\n#Person2#:The reference number is ZF7XHMYX .\n#Person1#:Excellent. That is everything!\n#Person2#:Great. Have a great day!",MultiWOZ 2.2,Food & Drink,Restaurants,unknown
18,8405,"#Person1#:Hi how are you?\n#Person2#:I want to order a white iced chocolate mocha from Peet's Coffee.\n#Person1#:what size?\n#Person2#:I want a large.\n#Person1#:what kind of milk?\n#Person2#:soy milk.\n#Person1#:any whipped cream or syrups?\n#Person2#:Whip cream is good.\n#Person1#:and what's the pickup location?\n#Person2#:San Antonio at Vance Jackson.\n#Person1#:So that's a large iced white chocolate mocha with soy milk and whipped cream from the peets on vance jackson in san antonio.\n#Person2#:Yes.\n#Person1#:you're all set, your drink will be ready in 15 minutes and you'll get a receipt via text.\n#Person2#:ok.",Taskmaster-1,Food & Drink,Beverages,Coffee & Tea
22,21171,"#Person1#:I have a plan to eat outside for which I need your help to find some nice places with fancy prices in San Francisco.\n#Person2#:Do you have any category of food in your mind to eat like Chinese, American or any other type of food? If you let me know your interest then it will be easy to search as per your taste.\n#Person1#:I think I like the European Food very much and make your search based on this.\n#Person2#:What about Bar Tartine restaurant in San Francisco which is the best place for European food?. What is your opinion?.\n#Person1#:I want the place with live music and liquor supply. Can you check with the restaurant that they will allow these?\n#Person2#:The will serve Liquor but they do not have any live music there. Is that okay to you?\n#Person1#:Search for something else.\n#Person2#:What about Bon Marche Brasserie & Bar restaurant in San Francisco?\n#Person1#:That I think will work for me and I need you to make a reservation at this restaurant on the 12th.\n#Per...",Schema-Guided Dialogue,Food & Drink,Restaurants,Restaurant Reviews & Reservations


# Preprocessing The Dataset

In [9]:
#@title Checking the tokenizer outputs
#@markdown From the results below, we can see that we need to add the </s> (EOS token) after every sentence manually.
# to add the \<sep\> ([SEP] token) between turns and 
tokenizer = T5Tokenizer.from_pretrained("google/t5-v1_1-small")#, additional_special_tokens=['<sep>'])

def remove_speaker_prefix(dialogue):
    # removes "#Person1#: " "#Person2#: "
    return re.sub('(\#Person[1|2]\#\:\s*)', "", dialogue)

dialogue = train_df.iloc[0]['dialogue']
dialogue = remove_speaker_prefix(dialogue)

print("Before Tokenizing:")
print(repr(dialogue))

print()

print("After Tokenizing:")
inputs = tokenizer(dialogue, add_special_tokens=True)
print(repr(tokenizer.decode(inputs['input_ids'])))

print()
print("Special Tokens in the T5Tokenizer: ")
print(tokenizer.all_special_tokens)

Downloading:   0%|          | 0.00/1.81k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/773k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.74k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/537 [00:00<?, ?B/s]

Before Tokenizing:
"Find a general practitioner.\nWhere?\nIn Napa.\nI found 9. Alex De Moraes, MD is in Napa and a general practitioner.\nOk.\nDo you want to visit them?\nNo, get a cab there now for 1, to 935 Trancas Street.\nRide type?\nRegular.\nPlease confirm: regular ride for 1 to 935 Trancas Street.\nYes.\nYour ride is on its way.\nWhat's the cost and when will it arrive?\nIt's $23.54 and will arrive in 7 minutes.\nThanks, find a restaurant near there.\nCuisine? Do you like Mexican, Chinese, something else?\nMexican is good.\nDon Perico Mexican Restaurant is in Napa.\nGood, make a reservation for 17:15 on the 13th of March.\nPlease confirm: table for 2 at Don Perico Mexican restaurant in Napa at 5:15 pm on March 13th.\nMake it 7 pm for 1.\nPlease confirm: table for 1 at 7 pm.\nYes.\nYour table is booked.\nThanks, that's all I wanted.\nHave a nice day."

After Tokenizing:
"Find a general practitioner. Where? In Napa. I found 9. Alex De Moraes, MD is in Napa and a general practition

In [10]:
#@title ## Preprocessing The Dataset

#@markdown The preprocessing step goes through every dialogue and:

#@markdown 1- Removes speaker prefixes such as "#Person1#:" and "#Person2#:"

#@markdown 2- Tries to split the dialogue into multiple <source, target> sequences

#@markdown 3- Adds the special end of sentence tokens (\</s\>) after each sentence

import re
import spacy
import random
from typing import Callable
from functools import partial

random.seed(SEED)
nlp = spacy.load('en_core_web_sm')

def remove_speaker_prefix(dialogue: str) -> str:
    # removes "#Person1#: ", "#Person2#: ",
    #         "#Person1#:",  "#Person2#:"
    return re.sub('(\#Person[1|2]\#\:\s*)', "", dialogue)

def random_split_in_half(
    dialogue: str,
    turn_sep: str='\n'
    ) -> pd.DataFrame:
    # I know the function is not that efficeint, 
    # but it's really a 1 time job :D
    turns = [t.strip() for t in dialogue.split(turn_sep) if t.strip()]

    # choose a random turn from the list of turns
    idx1 = random.randrange(0, len(turns))
    tokens = nlp(turns[idx1])

    # choose a random word in the random turn to split from
    idx2 = random.randrange(0, len(tokens))

    # create the source and target sequences
    output = pd.DataFrame()
    output['source_text'] = [turn_sep.join(turns[:idx1] + [tokens[:idx2].text])]
    output['target_text'] = [turn_sep.join([tokens[idx2:].text] + turns[idx1+1:])]
    
    return output

def split_and_truncate(
    row: pd.Series,
    max_splits: int=5,
    truncation_token: str='\n',
    turn_sep: str="\n",
    ) -> pd.DataFrame:

    dialogue = row.dialogue
    turns = [t.strip() for t in dialogue.split(turn_sep) if t.strip()]
    n_splits = min(len(turns), max_splits)

    source_text, target_text = [], []
    for idx1 in random.sample(range(len(turns)), n_splits):
        # had to remove the tokenizer for the large dataset
        # the model can handle little noise so it's okay
        # tokens = nlp(turns[idx1])
        tokens = turns[idx1].split(" ")

        idx2 = random.randrange(1, max(len(tokens)//8, 2))

        # source_text.append(turn_sep.join(turns[:idx1] + [tokens[:idx2].text]))
        # target_text.append(turn_sep.join([tokens[idx2:].text] + turns[idx1+1:]))

        source_text.append(turn_sep.join(turns[:idx1] + [' '.join(tokens[:idx2])]))
        target_text.append(turn_sep.join([' '.join(tokens[idx2:])] + turns[idx1+1:]))
        target_text[-1] = target_text[-1].split(truncation_token)[0]
    
    output = pd.DataFrame()
    output['source_text'] = source_text
    output['target_text'] = target_text
    output.index = [row.local_idx]*len(output)

    return output

def preprocessor(
    df: pd.DataFrame, 
    splitter: Callable,
    remove_speaker_prefixes: bool=True,
    eos_token: str="</s>",
    t5_prefix: str="comepelte: ",
    ) -> pd.DataFrame:

    output_df = df.copy()

    if remove_speaker_prefixes:
        output_df.dialogue = output_df.dialogue.swifter.apply(remove_speaker_prefix)
    
    if eos_token:
        output_df.dialogue = output_df.dialogue.swifter.apply(lambda d: d.replace("\n", eos_token))
    
    output_df = pd.concat(output_df.swifter.apply(splitter, axis=1).to_list(), axis=0)

    # T5 expects a prefix describing the task it has to do.
    # we want to compelete the text, so the default prefix is "comepelte: ".
    # add the prefix that T5 expects
    if t5_prefix:
        output_df['source_text'] = t5_prefix + output_df['source_text']

    return output_df

MAX_DIALOGUE_SPLITS = 4 #@param {"type": "integer"}

splitter = partial(
    split_and_truncate,
    max_splits=MAX_DIALOGUE_SPLITS,
    truncation_token=tokenizer.eos_token,
    turn_sep=tokenizer.eos_token,
)

final_preprocessor = lambda df: preprocessor(
    df, 
    splitter,
    remove_speaker_prefixes=True,
    eos_token=tokenizer.eos_token,
    t5_prefix="comepelte: ",
)


train_df = final_preprocessor(train_df)
eval_df  = final_preprocessor(eval_df)
test_df  = final_preprocessor(test_df)

print("Train dataframe shape:", train_df.shape)
print("Eval dataframe shape:", eval_df.shape)
print("Test dataframe shape:", test_df.shape)

print("Examples from the training dataframe")
train_df.sample(10)

Pandas Apply:   0%|          | 0/15093 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/15093 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/15093 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3201 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3201 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3201 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3252 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3252 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3252 [00:00<?, ?it/s]

Train dataframe shape: (60206, 2)
Eval dataframe shape: (12775, 2)
Test dataframe shape: (12972, 2)
Examples from the training dataframe


Unnamed: 0,source_text,target_text
4227,"comepelte: I am looking for an expensive French restaurant.</s>There are two restaurants that fulfill your inquiry, Cote in the Center and Restaurant Two Two in the north. Would you like more information on either?</s>Can I get the address of one place?</s>Cote",is located at Bridge Street City Centre.
12925,"comepelte: Hi, how can I help you?</s>Hi,",I would like to place an order for three small pizzas.
22745,comepelte: I bought a cake from the store and it ended up tasting terrible.</s>you,"sort of gamble on store bought cakes, did you take it back?"
2391,comepelte: I would like to order a pizza from cassano's.</s>What location?</s>Dayton ohio the one off of 741.</s>OK for pick up or delivery?</s>Pick,"up please, about 25 minutes would be good."
3446,comepelte: I need a restaurant in Chicago.</s>What date would you like your reservation?</s>Monday.</s>At what time would you like to dine?</s>9PM.</s>How many people will be attending?</s>3</s>Would you like a specific type of cuisine?</s>Chinese.</s>I've,"found a few suggestions for you! Asian Outpost, Big Bowl, and Hot Woks. Would you like to book one of those?"
4505,"comepelte: I need a restaurant that has a salad bar in LA for 4 at 3pm thursday</s>Simply Salad, Mean Greens, and Mixt Greens are good options for salad bars in LA. Would you like to book one these choices?</s>Simply salad sounds amazingf</s>Unfortunately I cannot find Simply Salad in our database. Would you like to try one of the other options?</s>Mean greens then?</s>Unfortunately","an agent gave you incorrect information. None of those locations are in our system. 3 locations serving salads that are available in our system are. L'Opera, Magnolia - Hollywood, and Malo. Which would you like to book?"
20118,"comepelte: Hello how may I help you?</s>I want to pay my late fees</s>Certainly, I can assist you in paying your late fees! Will you be paying online or in person?</s>I will be paying in person</s>Okay, great! Please ensure that you bring library ID and a credit card when paying</s>I don't have a credit card. Can I pay in cash?</s>Unfortunatly, the library does not accept cash for late fees.</s>What are my late fees total?</s>Your late fees total 56.34.</s>I have $1 billion in cash. The library can keep the change</s>You",are most gracious! Have a good day
14955,comepelte: I want to find a place to eat.</s>What city would you like me to search in?</s>Find me something in SFO.</s>What kind of food are you wanting to eat?</s>I was hoping for some Freshwater Fish.</s>I have 10 restaurants for you. Akiko's Sushi Bar is a nice place in San Francisco.</s>Yeah,that sounds great. I want to make a reservation.
1577,"comepelte: Hello, I would like to order a pizza.</s>We have three different crusts to choose from. Would you like thin, regular or stuffed crust?</s>What is inside the stuffed crust?</s>Mozarella Cheese filling.I think I will go with the stuffed crust pizza.</s>What size would you like? We have 10"", 12"" or large.</s>12"" pizza. OK. what kind of toppings would you like on your pizza?</s>I would like cheese and sasauge for my toppings please.</s>I'm sorry we are out of sausage today. Is there another topping that you would like?</s>What other toppings do you have?</s>Pepperoni, chicken, olives, onions, spinich and green peppers.</s>I would like to get pepperoni as my second topping please.</s>Is there anything else that I can add to your order?</s>No thank you, the pizza is all I need tonight.</s>Thank you for your order is there anything else that I can help you with?</s>Yes, I would like the pizza well done. Nice and golden brown please.</s>Well done it is. Your order will be ready ...",a problem. I will change pepperoni to spinich. Is there anything else that I can help you with?
5217,"comepelte: I need to find a cheap place to eat in Cambridge.</s>I have lots of options for you. Do you prefer a certain area or type of food?</s>Cheap world food, please.</s>We don't have any of those unfortunately. Would you like to try another type of cuisine?</s>Yeah, how about a Mediterranean restaurant in the centre of town?</s>I have three places, would you prefer it still be cheap or would expensive work as well?</s>I would still like cheap please.</s>That would be the gardenia, at 2 Rose Crescent City Centre. Would you like me to make a reservation?</s>Can you give me the postcode, please?</s>Certainly. The post code is cb2311</s>I also need a place to stay in the centre. 1 star, please.</s>I don't have any 1 star hotels but I have a 0 star hotel called El Shaddai in the Centre. Would you like me to book it for you?</s>How",about any with 4 stars?


In [None]:
#@title The reason for the peak in the low number of tokens is related to the splitting operation

tokenizer = T5Tokenizer.from_pretrained("google/t5-v1_1-small")#, additional_special_tokens=['<sep>'])
for label in ['source_text', 'target_text']:
    # temp_data = train_df[label].apply(lambda s: len(s.split(" ")))
    temp_data = train_df[label].swifter.apply(lambda s: len(tokenizer.tokenize(s)))

    ax = temp_data.plot(
        kind='hist',
        bins=50, 
        label=label,
        figsize=(14, 4)
    )
    ax.legend()
    ax.set_xlabel("# of tokens")
    ax.set_xlim(-10, 600)
    # ax.set_ylim(0, 10_000)
    ax.set_title("Histogram of the estimated number of tokens (per sample) in the train dataset")

    print(f"Min={temp_data.min()}, Max={temp_data.max():.2f}, Mean={temp_data.mean()}, column={label}")
    print(f"Number of examples that will be truncated:", sum(temp_data>512))
    print()

  "This pandas object has duplicate indices, "


Pandas Apply:   0%|          | 0/60206 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (521 > 512). Running this sequence through the model will result in indexing errors


# Training The Model

In [None]:
# !tensorboard dev upload --logdir LOCAL_DIR \
#   --name "Team 47 GP Tensorboard" \
#   --description "" \

In [None]:
TENSORBOARD_LOGS = LOCAL_DIR + "tb_logs"
%tensorboard --logdir {TENSORBOARD_LOGS} --reload_interval 1

In [None]:
# clear the cashe
#@title Parameter Configurations
torch.cuda.empty_cache()
pl.seed_everything(SEED)

model_checkpoint = "google/t5-v1_1-base" #@param {"type":"string"}
checkpoint_name = "t5-v1_1-base" #@param {"type":"string"}
LOGS_DIR = LOCAL_DIR + f"{checkpoint_name}_check_points"

batch_size = 16 #@param {"type":"integer"}
max_epochs = 10 #@param {"type":"integer"}

early_stopping_patience_epochs= 5 #@param {"type":"integer"}


DATASET_SIZE = DATASET_PATH.split("/")[-2]

tensorboard_name = (
    f"{checkpoint_name}"
    +f"_BatchSize-{batch_size}"
    +f"_N-Splits-{MAX_DIALOGUE_SPLITS}"
    +f"_DatasetSize-{DATASET_SIZE}"
    +f"_Topic-Food&Drink"
)

model = SimpleT5(
    checkpoint_name=tensorboard_name,
    output_dir=LOGS_DIR, 
    learning_rate=1e-4
)

logger = TensorBoardLogger(
    TENSORBOARD_LOGS, 
    name=tensorboard_name
)

In [None]:
# #@title Train The Model
# torch.cuda.empty_cache()
# pl.seed_everything(SEED)

# model.from_pretrained(
#     model_type="t5", 
#     model_name=model_checkpoint
# )

# # resume_dir = "t5-v1_1-base_check_points"
# # resume_checkpoint = "simplet5-epoch-4-train-loss-2.1554-val-loss-1.9899"
# # resume_path = os.path.join(LOCAL_DIR, resume_dir, resume_checkpoint)

# # model.load_model("t5", resume_path, use_gpu=True)

# model.train(
#     train_df= train_df,
#     test_df = test_df,
#     eval_df = eval_df,
#     source_max_token_len=256, 
#     target_max_token_len=64, 
#     batch_size=batch_size, 
#     max_epochs=max_epochs, 
#     early_stopping_patience_epochs=early_stopping_patience_epochs,
#     use_gpu=True,
#     logger=logger,
#     dataloader_num_workers=multiprocessing.cpu_count(),
# )


# Making Predictions

In [None]:
input_text = "compelete: I want to order a"

sep = ";"
checkpoint_paths = "t5-v1_1-base_check_points/t5-v1_1-base_BatchSize-16_N-Splits-4_DatasetSize-large_Topic-Food&Drink/t5-epoch-3-tloss-2.3091-vloss-2.0588" #@param {"type": "string"}
checkpoint_names = "t5-v1_1-base_BatchSize-16_N-Splits-4_DatasetSize-large_Topic-Food&Drink" #@param {"type": "string"}

# load multiple models and compare their outputs
models_to_load = list(zip(checkpoint_names.split(sep), 
                          checkpoint_paths.split(sep)))


models = []

for name, path in models_to_load:
    path = os.path.join(LOCAL_DIR, path)
    model = SimpleT5(name, output_dir=LOGS_DIR, learning_rate=0.00001)
    model.load_model("t5", path, use_gpu=True)
    models.append((model, name))


def print_samples_predictions(models, df, n_samples=10, max_output_tokens=12):
    results_df = df.sample(n_samples, random_state=SEED)

    for model, name in models:
        results_df[f'prediction-{name}'] = results_df['source_text'].swifter.apply(
            lambda prompt: model.predict(
                prompt, 
                max_length=max_output_tokens
            ),
        )

    return results_df

In [None]:
print("Sample From The Train Set:- ")
print_samples_predictions(models, train_df, 10)

Sample From The Train Set:- 


Pandas Apply:   0%|          | 0/10 [00:00<?, ?it/s]

Unnamed: 0,source_text,target_text,prediction-t5-v1_1-base_BatchSize-16_N-Splits-4_DatasetSize-large_Topic-Food&Drink
2802,comepelte: How can I help you?</s>Hi. I would like to order food for takeout. I would like to get some soup. Can you help me with that?</s>Sure.</s>For,how many people?,[I want to order some soup.]
3754,"comepelte: Hello,",I am looking for a restaurant on the south side of town that serves unusual food.,[how may I help you?]
39699,"comepelte: Hello, how may I help you?</s>Yes",", hello . I 'm looking for a particular attraction called Downing College .",[I need to order a pizza from Bella Luna]
12681,"comepelte: Hi there!</s>Hi. I'd like Hi. I'd like to order four pizzas, please.</s>how can i help?</s>sure.</s>what would you like on them.</s>On one pizza, I would like only mushroom. Another pizza, I would like like a veggie-lover's pizza with, you know, green pepper, mushrooms, tomatoes. Another pizza with like just pepperoni. And the other pizza, I would just like plain.</s>with extra cheese.</s>got it.</s>what sizes?</s>All small, please.</s>sounds",good.,[good.]
11582,"comepelte: I'd like to find a restaurant in Austin, Texas.</s>Okay.",,[What type of food would you like?]
11763,"comepelte: Hi.</s>How can I help you?</s>Hi. I'm in Nashville, Tennessee, and I'm looking for a place to eat.</s>Okay, I can help with your restaurant search.</s>Yes, I'm looking for a steakhouse.</s>Okay, I will search a Steakhouse in Nashville, Tennessee. What preferences would you like the restaurant to have?</s>Takeout service, and that will allow children.</s>Okay. Let me see what I can find.</s>One moment.</s>Do you have a preferred rating for the restaurant?</s>Yeah. It should have atleast four star ratings.</s>And how about a price point?</s>No price point at the moment. Anything is good for me.</s>Okay, let me see what I can find.</s>One moment.</s>Here are a few of the restaurants, I was able to find.</s>Bob's Steak & Chop House.</s>Jeff Ruby's Steakhouse, Nashville.</s>Kayne Prime Steakhouse.</s>Stoney River Steakhouse and Grill.</s>And Jimmy Kelly's</s>Okay, 'Jimmy Kelly's' sounds perfect.</s>Good choice. Jimmy Kelly's is rated 4.2 Stars by 168 Google reviews.</s>It is ...",you very much for your help.,[you.]
262,"comepelte: Jen, can you order a drink at the Starbucks on Sawmill Road?</s>The Starbucks in Dublin, near the Chipotle?</s>That's the one.</s>What can I order for you?</s>I would like a Grande Carmel Macchiato.</s>What kind of milk would you like in that?</s>Could I get it with soy milk?</s>No problem, anything else?</s>Can you make sure the put the caramel drizzle on it?</s>Will do. Will the vanilla syrup be ok? I think that's what it typically comes with.</s>Yea, the vanilla syrup will be good.</s>What time do you want it ready?</s>I",should be able to pick it up in about ten minutes.,[would like it ready for pickup at the Starbucks on Saw]
3310,comepelte: I,would like to book a seafood restaurant in seattle on 03/19/2016 at 1:30pm for 2 people,[am looking for a restaurant in the centre of town]
13330,"comepelte: Hi.</s>How can I help you?</s>Hi.</s>Could you help me get some information about Park Winters in Winters, California? The restaurant I mean.</s>Yes, how can I help you with that?</s>Could you tell me when they are open for dinner on Sunday?</s>They",start dinner by 6:30 p.m.,[are open from 5 p.m. to 9]
22443,comepelte: Hello how may I help you?</s>Hi I would like to name my new restaurant</s>I can help with that. What kind of restaurant is it?</s>we,are an italian food place catered to families,[have a restaurant called The Pink Door.]


In [None]:
print("Sample From The Evaluation Set:- ")
print_samples_predictions(models, eval_df, 10)

In [None]:
print("Sample From The Test Set:- ")
print_samples_predictions(models, test_df, 10)

# Evaluation (On The Test Set)


In [None]:
#@title 

import evaluate 

model_to_test = models[0][0]
references = test_df['target_text']
predictions = test_df['source_text'].swifter.apply(
    lambda prompt: model_to_test.predict(prompt)
)
predictions = predictions.swifter.apply(lambda s: s[0])

  "This pandas object has duplicate indices, "


Pandas Apply:   0%|          | 0/12972 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (541 > 512). Running this sequence through the model will result in indexing errors
  "This pandas object has duplicate indices, "


Pandas Apply:   0%|          | 0/12972 [00:00<?, ?it/s]

In [None]:
#@title Calculating [BERTScore](https://github.com/Tiiiger/bert_score#readme)

bertscore = evaluate.load("bertscore")

bert_score_values = bertscore.compute(
  predictions=predictions, 
  references=references, 
  lang="en"
)

print("Mean Precision Score:", np.array(scores['precision']).mean())
print("Mean Recall Score:", np.array(scores['recall']).mean())
print("Mean F1 Score:", np.array(scores['f1']).mean())

Downloading builder script:   0%|          | 0.00/2.93k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/482 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.33G [00:00<?, ?B/s]



NameError: ignored

In [None]:
#@title Calculating Exact Match

exact_match = evaluate.load("exact_match")

overall_exact_match = exact_match.compute(
    references=references, 
    predictions=predictions, 
    ignore_case=True, 
    ignore_punctuation=True, 
    ignore_numbers=True,
)['exact_match']


MAX_N = 15 #@param {"type":"integer"}
exact_match_df = pd.DataFrame()

# exact match at diffent lengths
for n in range(1, MAX_N+1):
    exact_match_score = exact_match.compute(
        references=references.apply(lambda x: x[:n]), 
        predictions=predictions.apply(lambda x: x[:n]), 
        ignore_case=True, 
        ignore_punctuation=True, 
        ignore_numbers=True,
    )['exact_match']

    exact_match_df[f"N={n}"] = [exact_match_score]

print(f"Overall exact match: {overall_exact_match:.4f}",)
exact_match_df

In [None]:
model_to_test