In [25]:
#!pip install datasets transformers evaluate rouge_score

In [26]:
import pandas
import re, json
import csv

import torch
import torch.nn as nn
from datasets import load_metric,Dataset,DatasetDict, load_dataset, Sequence, Value
from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer, BartForConditionalGeneration
from transformers import AutoTokenizer, Trainer

import evaluate

import numpy as np
import nltk
import os
import random
from sklearn.model_selection import train_test_split
from typing import List, Optional, Tuple, Union, Dict, Any
from jointbart import myBartForConditionalGeneration
from hg_utils import GenerationMixin

In [27]:
seed = 42
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
_numpy_rng = np.random.default_rng(seed)
random.seed(seed)
np.random.seed(seed)
torch.use_deterministic_algorithms(False)
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [28]:
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

In [29]:
def convert_to_iob(d):
    for i in range(len(d)):
        for j in range(len(d[i])):
            if d[i][j] != 'O':
                d[i][j] = 'B-' + d[i][j]

    return d

In [30]:
model_checkpoint = "facebook/bart-large"
metric = evaluate.load("rouge")
f1_metric = evaluate.load("f1")

In [31]:
'''
    Get label names from the dataset features and generate id to label and label to id lookup dictionaries
'''
def get_label_dicts(ds):
    label_list = ds["train"].features[f"tags"].feature.names
    id2label = {}
    label2id = {}

    for i in range(len(label_list)):
        id2label[i] = label_list[i]
        label2id[label_list[i]] = i

    return label_list, id2label, label2id

dataset = load_dataset('pvisnrt/mod_capstone')
label_list, id2label, label2id = get_label_dicts(dataset)    # get id2label and label2id dictionary
dataset

DatasetDict({
    train: Dataset({
        features: ['source', 'summary_target', 'tags'],
        num_rows: 80
    })
    validation: Dataset({
        features: ['source', 'summary_target', 'tags'],
        num_rows: 10
    })
    test: Dataset({
        features: ['source', 'summary_target', 'tags'],
        num_rows: 10
    })
})

In [32]:
EOS_TOKEN = '<EOS>'
max_target_length = 128

In [33]:
model = myBartForConditionalGeneration.from_pretrained(model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)

num_tokens_added = tokenizer.add_special_tokens({'eos_token': EOS_TOKEN})
print(f'Added extra special tokens: {num_tokens_added}')
model.resize_token_embeddings(len(tokenizer))   # adjust emb_dim of model as we added 2 extra tokens to tokenizer

Some weights of myBartForConditionalGeneration were not initialized from the model checkpoint at facebook/bart-large and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Added extra special tokens: 1


Embedding(50266, 1024)

In [34]:
for name, param in model.named_parameters():
    if name == 'classifier.weight' or name == 'classifier.bias':
        continue
    param.requires_grad=False

In [35]:
tokenizer.eos_token_id

50265

In [36]:
dataset['train'] = dataset['train'].cast_column("tags", Sequence(Value("int32")))
dataset['validation'] = dataset['validation'].cast_column("tags", Sequence(Value("int32")))
dataset['test'] = dataset['test'].cast_column("tags", Sequence(Value("int32")))

In [37]:
dataset['train'].features

{'source': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
 'summary_target': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
 'tags': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None)}

In [38]:
def tokenize_and_align_labels(examples):
    inputs = [doc for doc in examples['source']]
    model_inputs = tokenizer(inputs, is_split_into_words=True, return_tensors='pt', padding=True)

    # ADD EOS TOKEN TO SUMMARY TARGET
    summary_targets = []
    for summary_target in examples["summary_target"]:
        summary_target.append(EOS_TOKEN)
        summary_targets.append(summary_target)

    with tokenizer.as_target_tokenizer():
        tokenized_inputs = tokenizer(summary_targets, is_split_into_words=True, return_tensors='pt', padding=True)

    labels = []
    for i, label in enumerate(examples["tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)# Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100.

            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    model_inputs['labels'] = tokenized_inputs['input_ids']

    model_inputs["decoder_tags"] = labels

    return model_inputs

In [39]:
tokenized_datasets = dataset.map(tokenize_and_align_labels, batched=True)

Map:   0%|          | 0/10 [00:00<?, ? examples/s]



In [40]:
tokenized_datasets['train'] = tokenized_datasets['train'].remove_columns(['source','summary_target', 'tags'])
tokenized_datasets['validation'] = tokenized_datasets['validation'].remove_columns(['source','summary_target', 'tags'])
tokenized_datasets['test'] = tokenized_datasets['test'].remove_columns(['source','summary_target', 'tags'])

tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels', 'decoder_tags'],
        num_rows: 80
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels', 'decoder_tags'],
        num_rows: 10
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels', 'decoder_tags'],
        num_rows: 10
    })
})

In [41]:
class MySeq2SeqTrainer(Seq2SeqTrainer):
    def prediction_step(
        self,
        model: nn.Module,
        inputs: Dict[str, Union[torch.Tensor, Any]],
        prediction_loss_only: bool,
        ignore_keys: Optional[List[str]] = None,
    ) -> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
        """
        Perform an evaluation step on `model` using `inputs`.
        Subclass and override to inject custom behavior.
        Args:
            model (`nn.Module`):
                The model to evaluate.
            inputs (`Dict[str, Union[torch.Tensor, Any]]`):
                The inputs and targets of the model.
                The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
                argument `labels`. Check your model's documentation for all accepted arguments.
            prediction_loss_only (`bool`):
                Whether or not to return the loss only.
        Return:
            Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss, logits and
            labels (each being optional).
        """

        if not self.args.predict_with_generate or prediction_loss_only:
            return super().prediction_step(
                model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys
            )

        has_labels = "decoder_tags" in inputs
        inputs = self._prepare_inputs(inputs)

        # print("prediction_step inputs: {}".format(inputs.keys()))

        # XXX: adapt synced_gpus for fairscale as well
        gen_kwargs = self._gen_kwargs.copy()
        if gen_kwargs.get("max_length") is None and gen_kwargs.get("max_new_tokens") is None:
            gen_kwargs["max_length"] = self.model.config.max_length

        #gen_kwargs["num_beams"] = (
        #    gen_kwargs["num_beams"] if gen_kwargs.get("num_beams") is not None else self.model.config.num_beams
        #)
        gen_kwargs["num_beams"] = 1
        gen_kwargs['early_stopping'] = False

        # default_synced_gpus = True if is_deepspeed_zero3_enabled() else False
        default_synced_gpus = False
        gen_kwargs["synced_gpus"] = (
            gen_kwargs["synced_gpus"] if gen_kwargs.get("synced_gpus") is not None else default_synced_gpus
        )

        if "attention_mask" in inputs:
            gen_kwargs["attention_mask"] = inputs.get("attention_mask", None)
        if "global_attention_mask" in inputs:
            gen_kwargs["global_attention_mask"] = inputs.get("global_attention_mask", None)

        # prepare generation inputs
        # some encoder-decoder models can have varying encoder's and thus
        # varying model input names
        if hasattr(self.model, "encoder") and self.model.encoder.main_input_name != self.model.main_input_name:
            generation_inputs = inputs[self.model.encoder.main_input_name]
        else:
            generation_inputs = inputs[self.model.main_input_name]


        tags = inputs["decoder_tags"]
        gen_kwargs.update({"decoder_tags": tags})
        # print(f"Gen kwargs: {gen_kwargs}")
        # print(f"Gen inputs:{generation_inputs}")
       # generated_tokens = self.model.generate(generation_inputs, **gen_kwargs)
        gen_mix = GenerationMixin(model)
        generated_tokens, classification_ids = gen_mix.generate(generation_inputs, **gen_kwargs)

        #print(generated_tokens)
        #print(generated_tokens.shape)

        #print(classification_ids)
        #print(classification_ids.shape)

        generated_summaries = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

        print('Generated Summaries:\n',*generated_summaries, sep='\n' + ('-'*40) + '\n')

        for generated_summarary in generated_summaries:
            print(f'summary length: {len(generated_summarary.split())}')

        classification_labels = []
        classification_ids = classification_ids.cpu().detach().tolist()
        for batch_classification_ids in classification_ids:
            batch_classification_labels = []
            for classification_id in batch_classification_ids:
                classification_id = classification_id - 3
                if classification_id >= 0 and classification_id < len(id2label):
                    batch_classification_labels.append(id2label[classification_id])

            print(f'classification tag length: {len(batch_classification_labels)}')
            classification_labels.append(' '.join(batch_classification_labels))

        print('Classification Labels:\n',*classification_labels, sep='\n' + ('-'*40) + '\n')


        # in case the batch is shorter than max length, the output should be padded
        if gen_kwargs.get("max_length") is not None and generated_tokens.shape[-1] < gen_kwargs["max_length"]:
            generated_tokens = self._pad_tensors_to_max_len(generated_tokens, gen_kwargs["max_length"])
        elif gen_kwargs.get("max_new_tokens") is not None and generated_tokens.shape[-1] < (
            gen_kwargs["max_new_tokens"] + 1
        ):
            generated_tokens = self._pad_tensors_to_max_len(generated_tokens, gen_kwargs["max_new_tokens"] + 1)

        with torch.no_grad():
            if has_labels:
                with self.compute_loss_context_manager():
                    outputs = model(**inputs) # linear_logits as output
                if self.label_smoother is not None:
                    loss = self.label_smoother(outputs, inputs["decoder_tags"]).mean().detach()
                else:
                    loss = (outputs["loss"] if isinstance(outputs, dict) else outputs[0]).mean().detach()
            else:
                loss = None

        if self.args.prediction_loss_only:
            return (loss, None, None)

        if has_labels:
            labels = inputs["decoder_tags"]
            if gen_kwargs.get("max_length") is not None and labels.shape[-1] < gen_kwargs["max_length"]:
                labels = self._pad_tensors_to_max_len(labels, gen_kwargs["max_length"])
            elif gen_kwargs.get("max_new_tokens") is not None and labels.shape[-1] < (
                gen_kwargs["max_new_tokens"] + 1
            ):
                labels = self._pad_tensors_to_max_len(labels, (gen_kwargs["max_new_tokens"] + 1))
        else:
            labels = None
        # print(labels)

        return (loss, generated_tokens, labels)

In [42]:
training_args = Seq2SeqTrainingArguments(
    output_dir="checkpoints/",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    weight_decay=0.01,
    save_total_limit=4,
    num_train_epochs=10,
    predict_with_generate=True,
    do_train=True,
    do_eval=True,
    fp16=True,
    logging_steps=1,
    save_strategy="epoch",
    greater_is_better=True,
    load_best_model_at_end=True,
    seed=42,
    generation_max_length=max_target_length,
)

In [43]:
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [44]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred


    print("In compute metrics")

    print(predictions[0])

    print(labels[0])

    preds = np.where(predictions != -100, predictions, tokenizer.pad_token_id)
    # print(preds[0])
    flattened_preds = [item for sublist in preds for item in sublist]
    # decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    # print(decoded_preds

    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    # print(labels[0])
    flattened_labels = [item for sublist in labels for item in sublist]

    result = f1_metric.compute(predictions=flattened_preds, references=flattened_labels, average='micro')
    # decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # print("Decoded preds and labels")

    # print(decoded_preds)
    # print(decoded_labels)

    # # Rouge expects a newline after each sentence
    # decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
    # decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]

    # result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    # Extract a few results
    print(result.items())
    #result = {key: value.mid.fmeasure * 100 for key, value in result.items()}

    # Add mean generated length
    # prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    # result["gen_len"] = np.mean(prediction_lens)

    return {k: round(v, 4) for k, v in result.items()}

In [45]:
trainer = MySeq2SeqTrainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [46]:
trainer.train()

You're using a BartTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,F1
1,1.5757,1.642988,0.1695
2,1.4074,1.362231,0.1695
3,1.2007,1.148366,0.1695
4,0.9507,0.989329,0.1695
5,1.1391,0.874947,0.1695
6,1.1118,0.795513,0.1695
7,0.732,0.742277,0.1695
8,0.6409,0.706807,0.1695
9,0.6321,0.687166,0.1695
10,0.8881,0.680824,0.1695


Generated Summaries:

----------------------------------------
Adam : Hi, I'm Adam, I was wondering if you could help me with my friend who's depressed. She's in a very bad way and I'm afraid she's going to get worse. Karen : Yes, I know, I'll be right there. Adam : Have you talked to May? Karen : No, yesterday, why? Adam : I just talked to her and I must admit I worry about her Karen : Me too, I suggested she should see a specialist, but she wasn't very happy about it Adam : No wonder... Karen : I know – but I think this is serious.
summary length: 101
classification tag length: 115
Classification Labels:

----------------------------------------
OB OB O OB OB O O O O O O O O O O O O O O O O O M O O O O O O O O O O O O O O O M O O O O O O O O O O O M O O O O O O O O O O O O O O O M O OB O O O O O O O M O O O M O O O OB O O O O OB O OB OB C O O N O O O O O O O O M C OB O C O O C O N
Generated Summaries:

----------------------------------------
Emma : I'm going to sleep now. Ben : Wher

Generated Summaries:

----------------------------------------
Adam : Hi, I'm Adam, I was wondering if you could help me with my friend who's depressed. She's in a very bad way and I'm afraid she's going to get worse. Karen : Yes, I know, I'll be right there. Adam : Have you talked to May? Karen : No, yesterday, why? Adam : I just talked to her and I must admit I worry about her Karen : Me too, I suggested she should see a specialist, but she wasn't very happy about it Adam : No wonder... Karen : I know – but I think this is serious.
summary length: 101
classification tag length: 123
Classification Labels:

----------------------------------------
OB OB O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O M O O O O O O O OB O O O O O O O O OB O C O O O O O O O O O O O O M C O O O O O O C O N
Generated Summaries:

----------------------------------------
Emma : I'm going to sleep now. 

Generated Summaries:

----------------------------------------
Adam : Hi, I'm Adam, I was wondering if you could help me with my friend who's depressed. She's in a very bad way and I'm afraid she's going to get worse. Karen : Yes, I know, I'll be right there. Adam : Have you talked to May? Karen : No, yesterday, why? Adam : I just talked to her and I must admit I worry about her Karen : Me too, I suggested she should see a specialist, but she wasn't very happy about it Adam : No wonder... Karen : I know – but I think this is serious.
summary length: 101
classification tag length: 126
Classification Labels:

----------------------------------------
O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O C O O O O O O O O O
Generated Summaries:

----------------------------------------
Emma : I'm going to sleep now

Generated Summaries:

----------------------------------------
Adam : Hi, I'm Adam, I was wondering if you could help me with my friend who's depressed. She's in a very bad way and I'm afraid she's going to get worse. Karen : Yes, I know, I'll be right there. Adam : Have you talked to May? Karen : No, yesterday, why? Adam : I just talked to her and I must admit I worry about her Karen : Me too, I suggested she should see a specialist, but she wasn't very happy about it Adam : No wonder... Karen : I know – but I think this is serious.
summary length: 101
classification tag length: 127
Classification Labels:

----------------------------------------
O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
Generated Summaries:

----------------------------------------
Emma : I'm going to sleep n

Generated Summaries:

----------------------------------------
Adam : Hi, I'm Adam, I was wondering if you could help me with my friend who's depressed. She's in a very bad way and I'm afraid she's going to get worse. Karen : Yes, I know, I'll be right there. Adam : Have you talked to May? Karen : No, yesterday, why? Adam : I just talked to her and I must admit I worry about her Karen : Me too, I suggested she should see a specialist, but she wasn't very happy about it Adam : No wonder... Karen : I know – but I think this is serious.
summary length: 101
classification tag length: 127
Classification Labels:

----------------------------------------
O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
Generated Summaries:

----------------------------------------
Emma : I'm going to sleep n

Generated Summaries:

----------------------------------------
Adam : Hi, I'm Adam, I was wondering if you could help me with my friend who's depressed. She's in a very bad way and I'm afraid she's going to get worse. Karen : Yes, I know, I'll be right there. Adam : Have you talked to May? Karen : No, yesterday, why? Adam : I just talked to her and I must admit I worry about her Karen : Me too, I suggested she should see a specialist, but she wasn't very happy about it Adam : No wonder... Karen : I know – but I think this is serious.
summary length: 101
classification tag length: 127
Classification Labels:

----------------------------------------
O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
Generated Summaries:

----------------------------------------
Emma : I'm going to sleep n

Generated Summaries:

----------------------------------------
Adam : Hi, I'm Adam, I was wondering if you could help me with my friend who's depressed. She's in a very bad way and I'm afraid she's going to get worse. Karen : Yes, I know, I'll be right there. Adam : Have you talked to May? Karen : No, yesterday, why? Adam : I just talked to her and I must admit I worry about her Karen : Me too, I suggested she should see a specialist, but she wasn't very happy about it Adam : No wonder... Karen : I know – but I think this is serious.
summary length: 101
classification tag length: 127
Classification Labels:

----------------------------------------
O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
Generated Summaries:

----------------------------------------
Emma : I'm going to sleep n

Generated Summaries:

----------------------------------------
Adam : Hi, I'm Adam, I was wondering if you could help me with my friend who's depressed. She's in a very bad way and I'm afraid she's going to get worse. Karen : Yes, I know, I'll be right there. Adam : Have you talked to May? Karen : No, yesterday, why? Adam : I just talked to her and I must admit I worry about her Karen : Me too, I suggested she should see a specialist, but she wasn't very happy about it Adam : No wonder... Karen : I know – but I think this is serious.
summary length: 101
classification tag length: 127
Classification Labels:

----------------------------------------
O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
Generated Summaries:

----------------------------------------
Emma : I'm going to sleep n

Generated Summaries:

----------------------------------------
Adam : Hi, I'm Adam, I was wondering if you could help me with my friend who's depressed. She's in a very bad way and I'm afraid she's going to get worse. Karen : Yes, I know, I'll be right there. Adam : Have you talked to May? Karen : No, yesterday, why? Adam : I just talked to her and I must admit I worry about her Karen : Me too, I suggested she should see a specialist, but she wasn't very happy about it Adam : No wonder... Karen : I know – but I think this is serious.
summary length: 101
classification tag length: 127
Classification Labels:

----------------------------------------
O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
Generated Summaries:

----------------------------------------
Emma : I'm going to sleep n

Generated Summaries:

----------------------------------------
Adam : Hi, I'm Adam, I was wondering if you could help me with my friend who's depressed. She's in a very bad way and I'm afraid she's going to get worse. Karen : Yes, I know, I'll be right there. Adam : Have you talked to May? Karen : No, yesterday, why? Adam : I just talked to her and I must admit I worry about her Karen : Me too, I suggested she should see a specialist, but she wasn't very happy about it Adam : No wonder... Karen : I know – but I think this is serious.
summary length: 101
classification tag length: 127
Classification Labels:

----------------------------------------
O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
Generated Summaries:

----------------------------------------
Emma : I'm going to sleep n

TrainOutput(global_step=800, training_loss=1.0589077426865696, metrics={'train_runtime': 1122.1338, 'train_samples_per_second': 0.713, 'train_steps_per_second': 0.713, 'total_flos': 870250701705600.0, 'train_loss': 1.0589077426865696, 'epoch': 10.0})

In [47]:
torch.cuda.empty_cache()

In [48]:
!nvitop


'nvitop' is not recognized as an internal or external command,
operable program or batch file.
