In [48]:
!pip install peft
!pip install accelerate
!pip install -i https://pypi.org/simple/ bitsandbytes
!pip install trl
!pip install langchain
!pip install langchain_community
!pip install rouge-score
!pip install py7zr
!pip install evaluate

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting evaluate
  Downloading evaluate-0.4.2-py3-none-any.whl.metadata (9.3 kB)
Downloading evaluate-0.4.2-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.2


In [49]:
# Imports
import torch
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer, AutoModelForSeq2SeqLM, AutoTokenizer, Trainer, TrainingArguments, EarlyStoppingCallback, AutoModelForSequenceClassification
from datasets import load_dataset, load_metric, concatenate_datasets, DatasetDict
from sklearn.model_selection import train_test_split
from peft import get_peft_model, PeftModel, PeftConfig, LoraConfig, TaskType
from trl import PPOTrainer, PPOConfig
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
from tqdm import tqdm
from rouge_score import rouge_scorer
import numpy as np
from random import randrange
from evaluate import load

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [30]:
dataset = load_dataset("samsum")
print(dataset)

model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 14732
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 819
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 818
    })
})


In [31]:
max_source_length = 512  
max_target_length = 128  

# Preprocess function
def preprocess_function(sample, padding="max_length"):
    inputs = ["summarize: " + item for item in sample["dialogue"]]
    model_inputs = tokenizer(inputs, max_length=max_source_length, padding=padding, truncation=True)
    
    labels = tokenizer(text_target=sample["summary"], max_length=max_target_length, padding=padding, truncation=True)
    
    if padding == "max_length":
        labels["input_ids"] = [
            [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
        ]
    
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs


tokenized_datasets = {}
for split in dataset.keys():
    tokenized_datasets[split] = dataset[split].map(
        preprocess_function,
        batched=True,
        remove_columns=["dialogue", "summary", "id"]
    )

In [32]:
peft_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM 
)

peft_model = get_peft_model(base_model, peft_config)

In [33]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    auto_find_batch_size=True,
    num_train_epochs=5,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=1e-3,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    greater_is_better=False,
    logging_dir='./logs'
)

# Initialize the trainer
trainer = Seq2SeqTrainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
)

# Start training
trainer.train()



Epoch,Training Loss,Validation Loss
1,1.4942,1.412887
2,1.4221,1.400144
3,1.3789,1.391535
4,1.3383,1.38907
5,1.3325,1.387629




TrainOutput(global_step=9210, training_loss=1.3945142847448426, metrics={'train_runtime': 7169.4617, 'train_samples_per_second': 10.274, 'train_steps_per_second': 1.285, 'total_flos': 5.124003128672256e+16, 'train_loss': 1.3945142847448426, 'epoch': 5.0})

In [34]:
trainer.model.save_pretrained('./lora_model')



### Qualitative evaluation

In [56]:
peft_model.eval()
sample = dataset['test'][randrange(len(dataset["test"]))]

input_ids = tokenizer("summarize: " + sample["dialogue"], return_tensors="pt", truncation=True).input_ids.cuda()
print(f"input sentence: {sample['dialogue']}\n{'---'* 20}")
outputs = base_model.generate(input_ids=input_ids, do_sample=True, top_p=0.9)
print(f"Summary by base model:\n{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]} \n")

outputs = peft_model.generate(input_ids=input_ids, do_sample=True, top_p=0.9)
print(f"Summary by peft model:\n{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]}")

input sentence: Tomas: Has anybody received the grant yet?
Sierra: no, not yet
Jeremy: I haven't checked even
Tomas: I'm completely broke
Tomas: checking my bank account every hour
Tomas: but nothing's happening there
Sierra: lol
Sierra: be patient. If you need money I can lend you some, don't worry
Tomas: Thanks, I hope they'll arrive any minute
------------------------------------------------------------
Summary by base model:
Sierra hasn't received the grant yet. Jeremy hasn't checked yet and 

Summary by peft model:
Tomas has not received the grant yet. Sierra will lend him some money if needed


### Quantitative evaluation

In [None]:
# ROUGE metric to access quantitatively
rouge = load('rouge')

# Function to compute ROUGE scores
def compute_rouge_scores(predictions, references):
    return rouge.compute(predictions=predictions, references=references, use_stemmer=True)

# Function to generate summaries
def generate_summaries(model, tokenizer, dataset):
    summaries = []
    for example in tqdm(dataset):
        input_text = "summarize: " + example['dialogue']
        inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(model.device)
        
        with torch.no_grad():
            outputs = model.generate(**inputs, do_sample=True)
        
        summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
        summaries.append(summary)
    return summaries

# Generate summaries using base model
print("Generating summaries with base model...")
base_summaries = generate_summaries(base_model, tokenizer, dataset['test'])

# Compute ROUGE scores for base model
base_rouge_scores = compute_rouge_scores(base_summaries, dataset['test']['summary'])
print("Base Model ROUGE Scores:")
print(base_rouge_scores)

# Generate summaries using PEFT model
print("Generating summaries with PEFT model...")
peft_summaries = generate_summaries(peft_model, tokenizer, dataset['test'])

# Compute ROUGE scores for PEFT model
peft_rouge_scores = compute_rouge_scores(peft_summaries, dataset['test']['summary'])
print("PEFT Model ROUGE Scores:")
print(peft_rouge_scores)