### Script to finetune BART model

This script generates a fine tuned BART model from the fine tuning data

In [None]:
import sys
from BART_utilities import *
sys.path.insert(0, '../')
from utilities import *
import transformers
import pandas as pd
import numpy as np
import glob
import math
import random
import re
import argparse
import nltk
from transformers import Trainer, TrainingArguments

### Change the filename variable to the fine tuning data excel

In [None]:
# Use your fine tuning file
filename = "./BART_data_CLS.xlsx"

df = pd.read_excel(filename,index_col=0)
df.rename(columns = {'data':'source', 'summary':'target'}, inplace = True)
len(df)

In [None]:
# Loading Model and tokenizer
from transformers import BartTokenizer, BartForConditionalGeneration, AdamW, BartConfig

tokenizer = BartTokenizer.from_pretrained('facebook/bart-large', add_prefix_space=True)

bart_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large")

In [None]:
# Add special tokens if required

new_tokens = ['<F>', '<RLC>', '<A>', '<S>', '<P>', '<R>', '<RPC>']

special_tokens_dict = {'additional_special_tokens': new_tokens}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
bart_model.resize_token_embeddings(len(tokenizer))

In [None]:
summary_data = SummaryDataModule(tokenizer, df, batch_size = 1)
model = LitModel(learning_rate = 2e-5, tokenizer = tokenizer, model = bart_model)

In [None]:
trainer = pl.Trainer(gpus = 1,
                     max_epochs = 3,
                     min_epochs = 2,
                     auto_lr_find = False,
                     progress_bar_refresh_rate = 5,
                     precision = 16)

In [None]:
trainer.fit(model, summary_data)

In [None]:
#Save the model
trainer.save_checkpoint("output.ckpt")

In [None]:
!zip -r output.zip ./output.ckpt