In [None]:
import wandb
from kaggle_secrets import UserSecretsClient
wandb_key = UserSecretsClient().get_secret("wannabe")

wandb.login(key=wandb_key)

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from transformers import DataCollatorForSeq2Seq
import torch
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model_name = 'facebook/bart-large'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

Using device: cpu


In [3]:
from datasets import load_dataset

dataset = load_dataset("csv", data_files='/kaggle/input/qag-wop/QAG_Train_wop.csv')

print(dataset)

FileNotFoundError: Unable to find '/kaggle/input/qag-wop/QAG_Train_wop.csv'

In [None]:
def get_features(batch):
    encodings = tokenizer(batch['question'], 
                          text_target=batch['context'], 
                          max_length=1024, truncation=True)
    
    return {"input_ids": encodings["input_ids"],
            "attention_mask": encodings["attention_mask"],
            "labels": encodings["input_ids"]}

dataset_enc = dataset.map(get_features, batched=True)
columns = ['input_ids', 'labels', 'attention_mask']
dataset_enc.set_format(type='torch', columns=columns)

print(dataset_enc)

Map:   0%|          | 0/2803 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['question', 'context', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 2803
    })
})


In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    run_name='bart-large-finetuning',
    output_dir='./results',
    logging_dir='./logs',
    per_device_train_batch_size=1,
    logging_steps=200,
    num_train_epochs=8,
    warmup_steps=500,
    weight_decay=0.01,
    learning_rate=5e-5,
    max_grad_norm=1.0,
    gradient_accumulation_steps=16,
    fp16=True
)

trainer = Trainer(model=model,
                args=training_args,
                processing_class=tokenizer,
                data_collator=data_collator,
                train_dataset=dataset_enc['train'])

trainer.train()

  batch["labels"] = torch.tensor(batch["labels"], dtype=torch.int64)


Step,Training Loss
200,0.1124
400,0.0734
600,0.1639
800,0.1827
1000,0.0826
1200,0.0832
1400,0.03




TrainOutput(global_step=1400, training_loss=0.10404654775347029, metrics={'train_runtime': 1989.2767, 'train_samples_per_second': 11.272, 'train_steps_per_second': 0.704, 'total_flos': 531998784061440.0, 'train_loss': 0.10404654775347029, 'epoch': 7.95897252943275})

In [None]:
dialogue = "Introduce yourself as detailed as possible."

input_ids = tokenizer(dialogue, return_tensors='pt', 
                      max_length=1024, truncation=True).input_ids.to(device)
output = model.generate(input_ids, max_length=1024, early_stopping=False)

summary = tokenizer.decode(output[0], skip_special_tokens=True)
print(f"Summary: {summary}")

Summary: Introduce yourself as detailed as possible.


In [None]:
model.save_pretrained("bart-baseline")

In [None]:
%cd /kaggle/working
from IPython.display import FileLink
FileLink('bart-baseline/model.safetensors')

/kaggle/working


In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from transformers import DataCollatorForSeq2Seq
import torch
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model_path = "./bart-baseline"

if os.path.exists(model_path):
    print(f"Path '{model_path}' exists. Loading the model...")
    model = AutoModelForSeq2SeqLM.from_pretrained(model_path, use_safetensors=True).to(device)
    dialogue = "Introduce yourself as detailed as possible."
    input_ids = tokenizer(dialogue, return_tensors='pt', 
                        max_length=1024, truncation=True).input_ids.to(device)
    output = model.generate(input_ids, max_length=1024, early_stopping=False)
    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    print(f"Summary: {summary}")
else:
    print(f"Path '{model_path}' does not exist. Please check the directory.")
    exit()