### Model Training Setup

In [1]:
%load_ext autoreload
%autoreload 2
%load_ext jupyter_black

In [2]:
cd ..

c:\Users\Matheus\Documents\Git\knowledge-graph-completion


### Load data

In [3]:
import pandas as pd
from src.utils import load_fb15k237

pd.set_option("display.max_columns", None)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("max_colwidth", None)

PATH_FB15k237 = "data/datasets_knowledge_embedding/FB15k-237"

train, valid, test, entity2wikidata = load_fb15k237(PATH_FB15k237)
processed_data = pd.read_csv(PATH_FB15k237 + "/processed_data_v2.csv")

### Load the model

In [6]:
from transformers import (
    BartForConditionalGeneration,
    BartTokenizer,
    DataCollatorForSeq2Seq,
)

import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Select model
# MODEL = "facebook/bart-large"
# MODEL = "facebook/bart-base"
MODEL = "lucadiliello/bart-small"

# Load model and tokenizer
model = BartForConditionalGeneration.from_pretrained(MODEL).cuda().float().to(device)
tokenizer = BartTokenizer.from_pretrained(MODEL)

DEV_BATCH = 100
MAX_LENGTH = 50  # model.config.d_model
BATCH_SIZE = 1

dev = True

You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1'}. The number of labels wil be overwritten to 2.


### Masking data

In [7]:
processed_data["data_input"] = (
    processed_data["demonstration_input"] + "%s." % tokenizer.mask_token
)
processed_data["data_label"] = (
    processed_data["demonstration_input"] + processed_data["tail_text"] + "."
)

if dev:
    if DEV_BATCH == -1:
        pass
    else:
        processed_data = processed_data.head(DEV_BATCH)

In [8]:
from torch.utils.data import Dataset
import copy
import random
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from transformers import DataCollatorForLanguageModeling, DataCollatorWithPadding
from tqdm.auto import tqdm


random.seed(42)


class DatasetKGC(Dataset):
    def __init__(self, data):
        self.data = data
        self.data["input_ids"] = self.data["input_ids"]
        self.data["labels"] = self.data["labels"]
        self.num_rows = self.data["input_ids"].shape[0]

    def __len__(self):
        return self.num_rows

    def __getitem__(self, idx):
        _input = self.data["input_ids"][idx].squeeze(0)
        label = self.data["labels"][idx].squeeze(0)

        return {"input_ids": _input, "labels": label}


def encode_data(data):
    # Codifica as strings de entrada e rótulos como sequências de tokens BART
    encoded_input = tokenizer(
        list(data["data_input"]),
        padding="max_length",
        truncation=True,
        return_tensors="pt",
        max_length=MAX_LENGTH,
        add_special_tokens=True,
        return_attention_mask=False,
    )
    encoded_label = tokenizer(
        list(data["data_label"]),
        padding="max_length",
        truncation=True,
        return_tensors="pt",
        max_length=MAX_LENGTH,
        add_special_tokens=True,
        return_attention_mask=False,
    )

    # Cria uma lista de exemplos
    examples = []
    for i in range(len(data)):
        input_ids = encoded_input["input_ids"][i]
        labels = encoded_label["input_ids"][i]
        examples.append({"input_ids": input_ids, "labels": labels})

    # Cria um objeto DataCollatorForLanguageModeling
    data_collator = DataCollatorWithPadding(
        tokenizer=tokenizer, padding="max_length", max_length=MAX_LENGTH
    )

    # Prepara os dados de treinamento
    prepared_data = data_collator(examples)

    return prepared_data


def train_valid_split(data):
    train, valid = train_test_split(data, test_size=0.2, random_state=42)
    return encode_data(train), encode_data(valid)


def generate_train_valid_dataloader(data):
    train, valid = train_valid_split(data)

    train_loader = DataLoader(DatasetKGC(train), batch_size=BATCH_SIZE, shuffle=False)

    valid_loader = DataLoader(DatasetKGC(valid), batch_size=BATCH_SIZE, shuffle=False)

    return train_loader, valid_loader


def generate_train_valid_dataset(data):
    train, valid = train_valid_split(data)

    train_loader = DatasetKGC(train)

    valid_loader = DatasetKGC(valid)

    return train_loader, valid_loader


def _decode(tokens, tokenizer=tokenizer, batch=True):
    # Remove padding tokens and decode
    # tokens = tokens[tokens != -100]
    if batch:
        return tokenizer.batch_decode(tokens, skip_special_tokens=True)

    return tokenizer.decode(tokens, skip_special_tokens=True)


def _decode_error(tokens_i, tokens_j, tokenizer=tokenizer):
    """ """
    # Remove padding tokens
    text_i = _decode(tokens_i)
    text_j = _decode(tokens_j)

    # print(text_i)
    # print(text_j)

    error = 0

    len_i = len(tokens_i)
    len_j = len(tokens_j)
    total_tokens = max(len_i, len_j)

    diff_shape = abs(len_i - len_j)

    error += diff_shape

    for i in range(min(len_i, len_j)):
        if text_i[i] != text_j[i]:
            error += 1

    return (error, total_tokens, error / total_tokens)


def _decode_error_epoch(model, valid_dataset, debug=False):
    error_tokens = 0
    total_tokens = 0

    with torch.no_grad():
        for _input, label in tqdm(valid_dataset, desc="Analyzing decoder error"):
            model_output = model.generate(_input.to(device), max_length=MAX_LENGTH)

            for i in range(len(model_output)):
                a, b, percentage = _decode_error(label[i], model_output[i].to("cpu"))
                error_tokens += a
                total_tokens += b

        if debug:
            print("Total tokens analyzed: %d" % total_tokens)
            print("Total erroneous tokens predicted: %d" % error_tokens)
            print("Percentage of error: %.3f%%" % ((error_tokens / total_tokens) * 100))

        return error_tokens, total_tokens, error_tokens / total_tokens

In [None]:
# Main pipeline
# train_loader, valid_loader = generate_train_valid_dataloader(processed_data)

# Overfitting test
# train_loader = DataLoader(
#     DatasetKGC(encode_data(processed_data)), batch_size=BATCH_SIZE, shuffle=False
# )
# valid_loader = train_loader

train_ds, valid_ds = generate_train_valid_dataset(processed_data)

In [None]:
# We have a small error because the mask token is not filled at this point.
# sample_feature = valid_features[0]
# sample_label = valid_labels[0]
# sample_output = model.generate(
#     sample_feature.reshape(1, -1).to(device), max_length=MAX_LENGTH
# )

# print(_decode(sample_feature, batch=False))
# print(_decode(sample_label, batch=False))
# print(_decode(sample_output[0], batch=False))

### Training Model

In [None]:
from transformers import AdamW, get_scheduler

epochs = 60
epoch_accuracy_frequency = 30  # if epoch % accuracy -> compute
loss_epoch = []
lr = 1e-5
cross = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
# # Train

# from tqdm.auto import tqdm

# pbar = tqdm(range(1, epochs + 1), desc="Epochs")
# decoder_error = []

# decoder_error.append(_decode_error_epoch(model, valid_loader, debug=True))

# for epoch in pbar:
#     epoch_loss = 0

#     pbar.set_description("Epoch %s" % epoch)
#     pbar.refresh()

#     for _input, label in train_loader:
#         model.zero_grad()

#         _dt = model(_input.to(device), labels=label.to(device), return_dict=True)

#         # _dt_label = model(label.to(device), return_dict=True)

#         # logits_input = _dt.logits
#         # logits_label = _dt_label.logits

#         # loss = cross(
#         #     logits_input.view(-1, logits_input.size(-1)).softmax(dim=-1),
#         #     logits_label.view(-1, logits_input.size(-1)),
#         # )

#         loss = _dt.loss

#         epoch_loss += loss.item()

#         loss.backward()
#         optimizer.step()

#     if epoch % epoch_accuracy_frequency == 0:
#         decoder_error.append(_decode_error_epoch(model, valid_loader, debug=True))

#     pbar.set_postfix(loss=epoch_loss)
#     loss_epoch.append(epoch_loss)

#     torch.cuda.empty_cache()

In [None]:
loss_values = []


def compute_metrics(eval_pred):
    global loss_values
    loss = float(eval_pred["loss"])
    loss_values.append(eval_pred)

    return {"loss": loss}

In [None]:
from transformers import Trainer
from transformers import TrainingArguments

params = {
    # Dir
    "output_dir": "model/model_bart",
    # Batch
    "per_device_train_batch_size": 2,
    "per_device_eval_batch_size": 2,
    # Learning rate
    "learning_rate": 5e-5,
    "seed": 42,
    # Epochs
    "num_train_epochs": 50,
    # Logging
    "logging_dir": "model/logs",
    "logging_strategy": "epoch",
    "logging_steps": 10,
    # Evaluation
    "evaluation_strategy": "epoch",
    "eval_steps": 10,
    # Checkpoint
    "save_strategy": "epoch",
    "save_steps": 10,
    "save_total_limit": 2,
    "report_to": "tensorboard",
    "ddp_find_unused_parameters": False,
    "warmup_steps": 2,
}

training_args = TrainingArguments(
    # Dir
    output_dir=params["output_dir"],
    # Batch
    per_device_train_batch_size=params["per_device_train_batch_size"],
    per_device_eval_batch_size=params["per_device_eval_batch_size"],
    # Learning Rate
    learning_rate=params["learning_rate"],
    seed=params["seed"],
    # Epoch
    num_train_epochs=params["num_train_epochs"],
    # logging
    logging_dir=params["logging_dir"],
    logging_strategy=params["logging_strategy"],
    logging_steps=params["logging_steps"],
    # Evaluation
    # evaluation_strategy=params["evaluation_strategy"],
    # eval_steps=params["eval_steps"],
    # Checkpoint
    save_strategy=params["save_strategy"],
    save_steps=params["save_steps"],
    save_total_limit=params["save_total_limit"],
    # pretraining
    ddp_find_unused_parameters=params["ddp_find_unused_parameters"],
    warmup_steps=params["warmup_steps"],
    fp16=True,
    fp16_full_eval=True,
    # test
    eval_accumulation_steps=1,
)


data_collator = DataCollatorWithPadding(
    tokenizer=tokenizer, padding="max_length", max_length=MAX_LENGTH
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=valid_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [34]:
trainer.train()

Saving model checkpoint to model/model_bart\checkpoint-200
Configuration saved in model/model_bart\checkpoint-200\config.json


{'loss': 0.0035, 'learning_rate': 4.507007007007007e-05, 'epoch': 5.0}


Model weights saved in model/model_bart\checkpoint-200\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-200\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-200\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-120] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-240
Configuration saved in model/model_bart\checkpoint-240\config.json


{'loss': 0.0028, 'learning_rate': 4.406906906906907e-05, 'epoch': 6.0}


Model weights saved in model/model_bart\checkpoint-240\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-240\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-240\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-160] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-280
Configuration saved in model/model_bart\checkpoint-280\config.json


{'loss': 0.0044, 'learning_rate': 4.306806806806807e-05, 'epoch': 7.0}


Model weights saved in model/model_bart\checkpoint-280\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-280\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-280\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-200] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-320
Configuration saved in model/model_bart\checkpoint-320\config.json


{'loss': 0.0024, 'learning_rate': 4.2067067067067065e-05, 'epoch': 8.0}


Model weights saved in model/model_bart\checkpoint-320\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-320\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-320\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-240] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-360
Configuration saved in model/model_bart\checkpoint-360\config.json


{'loss': 0.0016, 'learning_rate': 4.1066066066066066e-05, 'epoch': 9.0}


Model weights saved in model/model_bart\checkpoint-360\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-360\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-360\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-280] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-400
Configuration saved in model/model_bart\checkpoint-400\config.json


{'loss': 0.0026, 'learning_rate': 4.006506506506507e-05, 'epoch': 10.0}


Model weights saved in model/model_bart\checkpoint-400\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-400\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-400\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-320] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-440
Configuration saved in model/model_bart\checkpoint-440\config.json


{'loss': 0.0028, 'learning_rate': 3.906406406406406e-05, 'epoch': 11.0}


Model weights saved in model/model_bart\checkpoint-440\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-440\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-440\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-360] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-480
Configuration saved in model/model_bart\checkpoint-480\config.json


{'loss': 0.0013, 'learning_rate': 3.8063063063063064e-05, 'epoch': 12.0}


Model weights saved in model/model_bart\checkpoint-480\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-480\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-480\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-400] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-520
Configuration saved in model/model_bart\checkpoint-520\config.json


{'loss': 0.0015, 'learning_rate': 3.7062062062062065e-05, 'epoch': 13.0}


Model weights saved in model/model_bart\checkpoint-520\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-520\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-520\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-440] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-560
Configuration saved in model/model_bart\checkpoint-560\config.json


{'loss': 0.0024, 'learning_rate': 3.6061061061061066e-05, 'epoch': 14.0}


Model weights saved in model/model_bart\checkpoint-560\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-560\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-560\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-480] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-600
Configuration saved in model/model_bart\checkpoint-600\config.json


{'loss': 0.0013, 'learning_rate': 3.506006006006006e-05, 'epoch': 15.0}


Model weights saved in model/model_bart\checkpoint-600\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-600\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-600\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-520] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-640
Configuration saved in model/model_bart\checkpoint-640\config.json


{'loss': 0.0016, 'learning_rate': 3.405905905905906e-05, 'epoch': 16.0}


Model weights saved in model/model_bart\checkpoint-640\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-640\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-640\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-560] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-680
Configuration saved in model/model_bart\checkpoint-680\config.json


{'loss': 0.0018, 'learning_rate': 3.3058058058058064e-05, 'epoch': 17.0}


Model weights saved in model/model_bart\checkpoint-680\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-680\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-680\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-600] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-720
Configuration saved in model/model_bart\checkpoint-720\config.json


{'loss': 0.0018, 'learning_rate': 3.205705705705706e-05, 'epoch': 18.0}


Model weights saved in model/model_bart\checkpoint-720\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-720\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-720\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-640] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-760
Configuration saved in model/model_bart\checkpoint-760\config.json


{'loss': 0.0018, 'learning_rate': 3.105605605605606e-05, 'epoch': 19.0}


Model weights saved in model/model_bart\checkpoint-760\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-760\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-760\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-680] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-800
Configuration saved in model/model_bart\checkpoint-800\config.json


{'loss': 0.0012, 'learning_rate': 3.0055055055055058e-05, 'epoch': 20.0}


Model weights saved in model/model_bart\checkpoint-800\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-800\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-800\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-720] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-840
Configuration saved in model/model_bart\checkpoint-840\config.json


{'loss': 0.0012, 'learning_rate': 2.9054054054054052e-05, 'epoch': 21.0}


Model weights saved in model/model_bart\checkpoint-840\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-840\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-840\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-760] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-880
Configuration saved in model/model_bart\checkpoint-880\config.json


{'loss': 0.001, 'learning_rate': 2.8053053053053054e-05, 'epoch': 22.0}


Model weights saved in model/model_bart\checkpoint-880\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-880\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-880\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-800] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-920
Configuration saved in model/model_bart\checkpoint-920\config.json


{'loss': 0.0005, 'learning_rate': 2.7052052052052052e-05, 'epoch': 23.0}


Model weights saved in model/model_bart\checkpoint-920\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-920\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-920\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-840] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-960
Configuration saved in model/model_bart\checkpoint-960\config.json


{'loss': 0.0004, 'learning_rate': 2.605105105105105e-05, 'epoch': 24.0}


Model weights saved in model/model_bart\checkpoint-960\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-960\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-960\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-880] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1000
Configuration saved in model/model_bart\checkpoint-1000\config.json


{'loss': 0.0004, 'learning_rate': 2.505005005005005e-05, 'epoch': 25.0}


Model weights saved in model/model_bart\checkpoint-1000\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1000\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1000\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-920] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1040
Configuration saved in model/model_bart\checkpoint-1040\config.json


{'loss': 0.0005, 'learning_rate': 2.404904904904905e-05, 'epoch': 26.0}


Model weights saved in model/model_bart\checkpoint-1040\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1040\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1040\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-960] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1080
Configuration saved in model/model_bart\checkpoint-1080\config.json


{'loss': 0.0005, 'learning_rate': 2.3048048048048047e-05, 'epoch': 27.0}


Model weights saved in model/model_bart\checkpoint-1080\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1080\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1080\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1000] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1120
Configuration saved in model/model_bart\checkpoint-1120\config.json


{'loss': 0.0009, 'learning_rate': 2.204704704704705e-05, 'epoch': 28.0}


Model weights saved in model/model_bart\checkpoint-1120\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1120\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1120\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1040] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1160
Configuration saved in model/model_bart\checkpoint-1160\config.json


{'loss': 0.0003, 'learning_rate': 2.1046046046046047e-05, 'epoch': 29.0}


Model weights saved in model/model_bart\checkpoint-1160\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1160\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1160\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1080] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1200
Configuration saved in model/model_bart\checkpoint-1200\config.json


{'loss': 0.0004, 'learning_rate': 2.0045045045045048e-05, 'epoch': 30.0}


Model weights saved in model/model_bart\checkpoint-1200\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1200\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1200\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1120] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1240
Configuration saved in model/model_bart\checkpoint-1240\config.json


{'loss': 0.0003, 'learning_rate': 1.9044044044044046e-05, 'epoch': 31.0}


Model weights saved in model/model_bart\checkpoint-1240\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1240\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1240\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1160] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1280
Configuration saved in model/model_bart\checkpoint-1280\config.json


{'loss': 0.0001, 'learning_rate': 1.8043043043043044e-05, 'epoch': 32.0}


Model weights saved in model/model_bart\checkpoint-1280\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1280\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1280\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1200] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1320
Configuration saved in model/model_bart\checkpoint-1320\config.json


{'loss': 0.0002, 'learning_rate': 1.7042042042042042e-05, 'epoch': 33.0}


Model weights saved in model/model_bart\checkpoint-1320\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1320\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1320\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1240] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1360
Configuration saved in model/model_bart\checkpoint-1360\config.json


{'loss': 0.0002, 'learning_rate': 1.604104104104104e-05, 'epoch': 34.0}


Model weights saved in model/model_bart\checkpoint-1360\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1360\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1360\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1280] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1400
Configuration saved in model/model_bart\checkpoint-1400\config.json


{'loss': 0.0002, 'learning_rate': 1.504004004004004e-05, 'epoch': 35.0}


Model weights saved in model/model_bart\checkpoint-1400\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1400\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1400\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1320] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1440
Configuration saved in model/model_bart\checkpoint-1440\config.json


{'loss': 0.0005, 'learning_rate': 1.403903903903904e-05, 'epoch': 36.0}


Model weights saved in model/model_bart\checkpoint-1440\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1440\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1440\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1360] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1480
Configuration saved in model/model_bart\checkpoint-1480\config.json


{'loss': 0.0003, 'learning_rate': 1.3038038038038039e-05, 'epoch': 37.0}


Model weights saved in model/model_bart\checkpoint-1480\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1480\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1480\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1400] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1520
Configuration saved in model/model_bart\checkpoint-1520\config.json


{'loss': 0.0002, 'learning_rate': 1.2037037037037037e-05, 'epoch': 38.0}


Model weights saved in model/model_bart\checkpoint-1520\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1520\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1520\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1440] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1560
Configuration saved in model/model_bart\checkpoint-1560\config.json


{'loss': 0.0001, 'learning_rate': 1.1036036036036037e-05, 'epoch': 39.0}


Model weights saved in model/model_bart\checkpoint-1560\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1560\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1560\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1480] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1600
Configuration saved in model/model_bart\checkpoint-1600\config.json


{'loss': 0.0001, 'learning_rate': 1.0035035035035035e-05, 'epoch': 40.0}


Model weights saved in model/model_bart\checkpoint-1600\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1600\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1600\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1520] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1640
Configuration saved in model/model_bart\checkpoint-1640\config.json


{'loss': 0.0001, 'learning_rate': 9.034034034034034e-06, 'epoch': 41.0}


Model weights saved in model/model_bart\checkpoint-1640\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1640\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1640\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1560] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1680
Configuration saved in model/model_bart\checkpoint-1680\config.json


{'loss': 0.0001, 'learning_rate': 8.033033033033032e-06, 'epoch': 42.0}


Model weights saved in model/model_bart\checkpoint-1680\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1680\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1680\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1600] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1720
Configuration saved in model/model_bart\checkpoint-1720\config.json


{'loss': 0.0001, 'learning_rate': 7.032032032032032e-06, 'epoch': 43.0}


Model weights saved in model/model_bart\checkpoint-1720\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1720\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1720\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1640] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1760
Configuration saved in model/model_bart\checkpoint-1760\config.json


{'loss': 0.0001, 'learning_rate': 6.031031031031031e-06, 'epoch': 44.0}


Model weights saved in model/model_bart\checkpoint-1760\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1760\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1760\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1680] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1800
Configuration saved in model/model_bart\checkpoint-1800\config.json


{'loss': 0.0002, 'learning_rate': 5.03003003003003e-06, 'epoch': 45.0}


Model weights saved in model/model_bart\checkpoint-1800\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1800\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1800\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1720] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1840
Configuration saved in model/model_bart\checkpoint-1840\config.json


{'loss': 0.0001, 'learning_rate': 4.0290290290290296e-06, 'epoch': 46.0}


Model weights saved in model/model_bart\checkpoint-1840\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1840\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1840\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1760] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1880
Configuration saved in model/model_bart\checkpoint-1880\config.json


{'loss': 0.0001, 'learning_rate': 3.0280280280280284e-06, 'epoch': 47.0}


Model weights saved in model/model_bart\checkpoint-1880\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1880\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1880\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1800] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1920
Configuration saved in model/model_bart\checkpoint-1920\config.json


{'loss': 0.0001, 'learning_rate': 2.0270270270270273e-06, 'epoch': 48.0}


Model weights saved in model/model_bart\checkpoint-1920\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1920\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1920\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1840] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-1960
Configuration saved in model/model_bart\checkpoint-1960\config.json


{'loss': 0.0001, 'learning_rate': 1.0260260260260261e-06, 'epoch': 49.0}


Model weights saved in model/model_bart\checkpoint-1960\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-1960\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-1960\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1880] due to args.save_total_limit
Saving model checkpoint to model/model_bart\checkpoint-2000
Configuration saved in model/model_bart\checkpoint-2000\config.json


{'loss': 0.0001, 'learning_rate': 2.5025025025025025e-08, 'epoch': 50.0}


Model weights saved in model/model_bart\checkpoint-2000\pytorch_model.bin
tokenizer config file saved in model/model_bart\checkpoint-2000\tokenizer_config.json
Special tokens file saved in model/model_bart\checkpoint-2000\special_tokens_map.json
Deleting older checkpoint [model\model_bart\checkpoint-1920] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)




{'train_runtime': 320.2615, 'train_samples_per_second': 12.49, 'train_steps_per_second': 6.245, 'train_loss': 0.001519653731258586, 'epoch': 50.0}


TrainOutput(global_step=2000, training_loss=0.001519653731258586, metrics={'train_runtime': 320.2615, 'train_samples_per_second': 12.49, 'train_steps_per_second': 6.245, 'train_loss': 0.001519653731258586, 'epoch': 50.0})

In [67]:
data = train_ds[0]

In [68]:
data["input_ids"].shape

torch.Size([512])

In [69]:
with torch.no_grad():
    print("Sample input: ")
    print(tokenizer.decode(data["input_ids"]).replace("<pad>", ""), "\n")

    print("Expected output: ")
    print(tokenizer.decode(data["labels"], skip_special_tokens=True), "\n")

    print("Model Output: ")
    print(
        tokenizer.decode(
            model.generate(
                data["input_ids"].to(device).reshape(1, -1), max_length=MAX_LENGTH
            )[0]
        )
    )

Sample input: 
<s>Jenna Ushkowitz has award winner of Josh Sussman. Paul Dini has award winner of Adam Horowitz. Don Cheadle has award winner of<mask>.</s> 

Expected output: 
Jenna Ushkowitz has award winner of Josh Sussman. Paul Dini has award winner of Adam Horowitz. Don Cheadle has award winner of Larenz Tate. 

Model Output: 
</s><s>Jenna Ushkowitz has award winner of Josh Sussman. Paul Dini has award winners of Adam Horowitz. Don Cheadle has award loser of Larenz Tate.</s>


In [45]:
model.save_pretrained("model/saves")

Configuration saved in model/saves\config.json
Model weights saved in model/saves\pytorch_model.bin


### Train plots

In [None]:
import matplotlib.pyplot as plt

plt.title("Transformer Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
# plt.xticks(list(range(epochs + 1)))
plt.plot(loss_epoch)
print(loss_epoch[-1])

In [None]:
import matplotlib.pyplot as plt

plt.title("Decoder Error")
plt.xlabel("Epoch")
plt.ylabel("token error rate")
plt.xticks(
    list(map(lambda i: i * epoch_accuracy_frequency, list(range(len(decoder_error)))))
)
plt.plot(
    list(map(lambda i: i * epoch_accuracy_frequency, list(range(len(decoder_error))))),
    [i[2] for i in decoder_error],
)