In [15]:
import torch
from transformers import BartTokenizer, BartForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer
import pandas as pd
from torch.utils.data import Dataset, DataLoader

# torch.cuda.get_device_name(0)

In [16]:
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-base')
data = pd.read_csv('./pwkp.csv')[:20]
model.eval()

BartForConditionalGeneration(
  (model): BartModel(
    (shared): Embedding(50265, 768, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(50265, 768, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
      (layers): ModuleList(
        (0-5): 6 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_layer_norm): LayerNorm((768,), eps=

In [8]:
class CustomDataset(Dataset):
    def __init__(self, data, tokenizer, max_in_length=1024, max_out_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_in_length = max_in_length
        self.max_out_length = max_out_length

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        in_text = self.data.iloc[idx]["Original"]
        out_text = self.data.iloc[idx]["Summary"]

        in_encoding = self.tokenizer(
            in_text,
            max_length=self.max_in_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        out_encoding = self.tokenizer(
            out_text,
            max_length=self.max_out_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            "input_ids": in_encoding["input_ids"].squeeze(),
            "attention_mask": in_encoding["attention_mask"].squeeze(),
            "decoder_input_ids": out_encoding["input_ids"].squeeze(),
            "labels": out_encoding["input_ids"].squeeze()
        }
    
train_dataset = CustomDataset(data, tokenizer)

In [12]:
training_args = Seq2SeqTrainingArguments(
    per_device_train_batch_size=1,
    output_dir='./bart-fine-tuned',
    save_steps=1000,
    num_train_epochs=1,
    overwrite_output_dir=True,
    logging_dir='./logs'
)

In [13]:
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    data_collator=None,
    train_dataset=train_dataset
)

In [None]:
trainer.train()
torch.save(model.state_dict(), 'custom-bart')

In [7]:
# Testing fine-tuned model
input_text = "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."
input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=1024, truncation=True).to('cuda:0')
summary_ids = model.generate(input_ids, max_length=512, num_beams=4, length_penalty=2.0, early_stopping=True)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print(summary)

The tower is 324 metres (1,063 ft) tall, 4-1 height height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. Its construction, the Eiffel Tower surpassed the Washington Monument Monument to become the tallest man-made structure in the the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chiefs Call to the Tower by 5.2 metres (17 ft). Excluding transmitters, the eiffel tower is the second tallest tallest free-standing structure in France after the Millau Viaduct, which is the tallest in the highest building in the United States of the World’s tallest building in its history.


In [None]:
# Export model to ONNX format
model.to('cpu')
model.eval()
dummy_input = tokenizer("This is a sample", return_tensors='pt', max_length=1024, truncation=True)
torch.onnx.export(model, tuple(dummy_input.values()), f='custom-bart.onnx', input_names=['input_ids', 'attention_mask', 'decoder_input_ids'], output_names=['logits'])

In [1]:
# TEst loading model back in
import onnx
model = onnx.load('../aphasia-ai/src/components/custom-bart.onnx')

