Import dependencies

In [None]:
import torch as t
import pandas as pd
import numpy as np
from transformers import T5Tokenizer, T5ForConditionalGeneration, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from pathlib import Path
import transformers, datasets, pickle, multiprocessing, peft, evaluate, py7zr, functools 


Global GPU access

In [None]:
output_device = t.device('cpu')
model_run_device = t.device('cuda') if t.cuda.is_available() else t.device('cpu')

Configure a LoRA mostly the same for all the trainers, with the exception of file name and which training set is used.  

In [None]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16, 
    target_modules= ['q', 'v'],  
    lora_dropout=0.1,  
)
base_model = T5ForConditionalGeneration.from_pretrained("t5-base")
lora_model = get_peft_model(base_model, lora_config).to(model_run_device)
def trainer_inator(file_name, tokens):
    training_args = TrainingArguments(
        output_dir=f"./checkpoints/t5_summarization_lora_{file_name}",
        eval_strategy="epoch",
        learning_rate=1e-4,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=3,
        weight_decay=0.01,
        logging_dir="./logs",
        log_level="info",
        save_total_limit=1,
        overwrite_output_dir=True,
        disable_tqdm=False,
        use_cpu=False,
        fp16=True
    )
    trainer = Trainer(
        model=lora_model,
        args=training_args,
        train_dataset=tokens['train'],
        eval_dataset=tokens['validation'],
        tokenizer=T5Tokenizer.from_pretrained("t5-base")
    )
    trainer.model.to(model_run_device)
    return trainer

Create a blank trainer untrained model

In [None]:
b_tokens = {
    "train": [],
    "validation": []
}
base_trainer = trainer_inator("base", b_tokens)
with open(f"./models/base_trainer.pickle", "wb") as file:
    pickle.dump(base_trainer, file)

create models for cnn and samsum

In [None]:
with open(f'./preprocessing/cnn_tokens.pickle', "rb") as file:
    cnn_tokens = pickle.load(file)
cnn_trainer = trainer_inator("cnn", cnn_tokens)

In [None]:
with open(f'./preprocessing/samsum_tokens.pickle', "rb") as file:
    samsum_tokens = pickle.load(file)
samsum_trainer = trainer_inator("samsum", samsum_tokens)

Train the Models!! (and save them to pickles)

In [None]:
cnn_trainer.train()
with open(f"./models/cnn_trainer.pickle", "wb") as file:
    pickle.dump(cnn_trainer, file)

In [None]:
samsum_trainer.train()
with open(f"./models/samsum_trainer.pickle", "wb") as file:
    pickle.dump(samsum_trainer, file)