In [None]:
import warnings 
warnings.filterwarnings("ignore")

# 0. Preparing Data

In [2]:
import pandas as pd

In [3]:
import torch
from torch.utils.data import Dataset, DataLoader

In [4]:
test_filename = "samsum-test.csv"
train_filename = "samsum-train.csv"
val_filename = "samsum-validation.csv"

# test_filename = "/kaggle/input/samsum-dataset-text-summarization/samsum-test.csv"
# train_filename = "/kaggle/input/samsum-dataset-text-summarization/samsum-train.csv"
# val_filename = "/kaggle/input/samsum-dataset-text-summarization/samsum-validation.csv"

df_train = pd.read_csv(train_filename)
df_train = df_train.drop(columns=['id'])

df_test = pd.read_csv(test_filename)
df_test = df_test.drop(columns=['id'])

df_val = pd.read_csv(val_filename)
df_val = df_val.drop(columns=['id'])

In [5]:
df_train = df_train.map(str)
df_test = df_test.map(str)
df_val = df_val.map(str)

In [6]:
train_data_dialogues = [dialogue for dialogue in df_train.dialogue.tolist()]
train_data_summaries = [summary for summary in df_train.summary.tolist()]
val_data_dialogues = [dialogue for dialogue in df_val.dialogue.tolist()]
val_data_summaries = [summary for summary in df_val.summary.tolist()]

# 1. Fine-Tuning Models for Text Summarization

## 1.1 Fine-Tuning BERT

In [7]:
from transformers import BertTokenizer, EncoderDecoderModel

In [10]:
model_name = "bert-base-uncased"

In [11]:
tokenizer = BertTokenizer.from_pretrained(model_name)
model = EncoderDecoderModel.from_encoder_decoder_pretrained(model_name, model_name)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertLMHeadModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.encoder.layer.0.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.0.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.0.crossattention.output.dense.bias', 'bert.encoder.layer.0.crossattention.output.dense.weight', 'bert.encoder.layer.0.crossattention.self.key.bias', 'bert.encoder.layer.0.crossattention.self.key.weight', 'bert.encoder.layer.0.crossattention.self.query.bias', 'bert.encoder.layer.0.crossattention.self.query.weight', 'bert.encoder.layer.0.crossattention.self.value.bias', 'bert.encoder.layer.0.crossattention.self.value.weight', 'bert.encoder.layer.1.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.1.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.1.crossattention.output.dense.bias', 'bert.encoder.layer.1.crossattention.output.dense.weight', 'bert.encoder.layer.1.crossattention.self.key.bias', 'bert.e

In [12]:
model.config.decoder_start_token_id = tokenizer.cls_token_id
model.config.eos_token_id = tokenizer.sep_token_id
model.config.pad_token_id = tokenizer.pad_token_id
model.config.vocab_size = model.config.encoder.vocab_size

In [8]:
class SummarizationDataset(Dataset):
    def __init__(self, dialogue, summary, tokenizer, max_input_length=512, max_output_length=128):
        self.dialogue = dialogue
        self.summary = summary
        self.tokenizer = tokenizer
        self.max_input_length = max_input_length
        self.max_output_length = max_output_length

    def __len__(self):
        return len(self.dialogue)

    def __getitem__(self, idx):
        input_text = str(self.dialogue[idx])
        target_text = str(self.summary[idx])

        inputs = self.tokenizer.encode_plus(
            input_text,
            max_length=self.max_input_length,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )

        targets = self.tokenizer.encode_plus(
            target_text,
            max_length=self.max_output_length,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )

        input_ids = inputs["input_ids"].squeeze()
        attention_mask = inputs["attention_mask"].squeeze()
        labels = targets["input_ids"].squeeze()

        labels[labels == self.tokenizer.pad_token_id] = -100

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels
        }

In [14]:
train_dataset = SummarizationDataset(
    train_data_dialogues,
    train_data_summaries,
    tokenizer
)

In [15]:
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

EncoderDecoderModel(
  (encoder): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, el

In [17]:
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

In [18]:
from tqdm import tqdm

In [None]:
model.train()
epochs = 3
for epoch in range(epochs):
    loop = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{epochs}", leave=True)
    for batch in loop:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        loss.backward()
        optimizer.step()

        loop.set_postfix(loss=loss.item())

Epoch 1/3: 100%|██████████| 1842/1842 [21:20<00:00,  1.44it/s, loss=3.14]
Epoch 2/3: 100%|██████████| 1842/1842 [21:21<00:00,  1.44it/s, loss=2.31]
Epoch 3/3: 100%|██████████| 1842/1842 [21:22<00:00,  1.44it/s, loss=2.17]


In [20]:
model.save_pretrained("fine-tuned-bert-summarizer")
tokenizer.save_pretrained("fine-tuned-bert-summarizer")

('fine-tuned-bert-summarizer/tokenizer_config.json',
 'fine-tuned-bert-summarizer/special_tokens_map.json',
 'fine-tuned-bert-summarizer/vocab.txt',
 'fine-tuned-bert-summarizer/added_tokens.json')

## 2.2 Fine-Tuning GPT 2

In [9]:
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from transformers import GPT2Tokenizer
import torch

In [10]:
class GPT2Dataset(Dataset):

    def __init__(self, csv_file, mode='train',length=None):
        self.data = pd.read_csv(csv_file).map(str)
        self.data.drop(columns=['id'], inplace=True)

        self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
        self.tokenizer.add_special_tokens({'pad_token':'<|pad|>','sep_token':'<|sep|>'})
        self.mode = mode

        if length == None:
            self.len = len(self.data)
        else:
            self.len = length

    def __len__(self):
        return self.len

    def __getitem__(self,idx):

        if self.mode=='valid':
            idx = -idx
        elif self.mode=='test':
            idx = -idx - self.len
        else:
            idx = idx

        dialogue = self.data.iloc[idx]['dialogue']
        summary = self.data.iloc[idx]['summary']

        max_length = 1024
        max_dialogue_length = 900
        max_summary_length = max_length - max_dialogue_length - 1  

        dialogue_tokens = self.tokenizer.encode(dialogue, truncation=True, max_length=max_dialogue_length)
        summary_tokens = self.tokenizer.encode(summary, truncation=True, max_length=max_summary_length)

        content = dialogue_tokens + [self.tokenizer.sep_token_id] + summary_tokens

        if len(content) < max_length:
            content += [self.tokenizer.pad_token_id] * (max_length - len(content))
        else:
            content = content[:max_length]

        text = torch.tensor(content)
        sum_idx = len(dialogue_tokens)

        sample = {'dialogue': text, 'sum_idx': sum_idx}
        return sample

In [11]:
from transformers import GPT2LMHeadModel
from transformers import AdamW, get_linear_schedule_with_warmup
from torch.nn import CrossEntropyLoss
from tqdm import tqdm

In [12]:
# gpt_train_data = GPT2Dataset("/kaggle/input/samsum-dataset-text-summarization/samsum-train.csv",mode='train')
# gpt_valid_data = GPT2Dataset("/kaggle/input/samsum-dataset-text-summarization/samsum-validation.csv",mode='valid')

gpt_train_data = GPT2Dataset("samsum-train.csv",mode='train')
gpt_valid_data = GPT2Dataset("samsum-validation.csv",mode='valid')

In [11]:
gpt_train_dataloader = DataLoader(
    gpt_train_data,
    batch_size=4,
    shuffle=True,
    # collate_fn=collate_fn
)

# gpt_val_dataloader = DataLoader(
#     gpt_valid_data,
#     batch_size=4,
#     shuffle=False,
#     # collate_fn=collate_fn
# )

In [12]:
gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
gpt_tokenizer.add_special_tokens({'pad_token':'<|pad|>','sep_token':'<|sep|>'})

ignore_idx = gpt_tokenizer.pad_token_id
gpt_model = GPT2LMHeadModel.from_pretrained('gpt2')
gpt_model.resize_token_embeddings(len(gpt_tokenizer))

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Embedding(50259, 768)

In [13]:
gpt_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
gpt_model.to(gpt_device)

gpt_optimizer = AdamW(gpt_model.parameters(), lr=5e-5)
epochs = 2
total_steps = len(gpt_train_dataloader) * epochs
gpt_scheduler = get_linear_schedule_with_warmup(gpt_optimizer, num_warmup_steps=0, num_training_steps=total_steps)

In [14]:
gpt_loss_fn = CrossEntropyLoss(ignore_index=gpt_tokenizer.pad_token_id)

In [15]:
gpt_model.train()
for epoch in range(epochs):
    total_loss = 0
    for batch in tqdm(gpt_train_dataloader):
        gpt_optimizer.zero_grad()
        input_ids = batch['dialogue'].to(gpt_device)
        labels = input_ids.clone()
        outputs = gpt_model(input_ids, labels=labels)
        loss = outputs.loss
        loss.backward()
        gpt_optimizer.step()
        gpt_scheduler.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(gpt_train_dataloader)
    print(f"Epoch {epoch+1}, Loss: {avg_loss}")

100%|██████████| 3683/3683 [39:44<00:00,  1.54it/s]


Epoch 1, Loss: 0.6375366425970602


100%|██████████| 3683/3683 [39:44<00:00,  1.54it/s]

Epoch 2, Loss: 0.44881896143604677





In [16]:
gpt_model.save_pretrained('fine-tuned-gpt2-summarizer')
gpt_tokenizer.save_pretrained('fine-tuned-gpt2-summarizer')

('fine-tuned-gpt2-summarizer/tokenizer_config.json',
 'fine-tuned-gpt2-summarizer/special_tokens_map.json',
 'fine-tuned-gpt2-summarizer/vocab.json',
 'fine-tuned-gpt2-summarizer/merges.txt',
 'fine-tuned-gpt2-summarizer/added_tokens.json')

## 3.3 Fine-Tuning Llama

In [13]:
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, TrainingArguments, AutoTokenizer
from peft import LoraConfig

In [None]:
model_name = "meta-llama/Llama-2-7b-hf"

In [33]:
from huggingface_hub import login
login("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")

In [None]:
llama_tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"

In [14]:
import torch

In [None]:
!pip install bitsandbytes



In [None]:
import bitsandbytes

In [None]:
llama_compute_dtype = getattr(torch, "float16")

llama_quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=llama_compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [None]:
llama_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=llama_quant_config,
    device_map='auto',
    use_auth_token=True
)
llama_model.config.use_cache = False
llama_model.config.pretraining_tp = 1

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

In [None]:
!pip install accelerate



In [None]:
# llama_model = LlamaForCausalLM.from_pretrained(
#     'meta-llama/Llama-2-7b-hf',
#     quantization_config=llama_bnb_config,
#     device_map='auto'
# )

In [None]:
llama_peft_params = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
llama_training_params = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

In [None]:
!pip install trl

Collecting trl
  Downloading trl-0.12.1-py3-none-any.whl.metadata (10 kB)
Collecting datasets>=2.21.0 (from trl)
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.21.0->trl)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets>=2.21.0->trl)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets>=2.21.0->trl)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets>=2.21.0->trl)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading trl-0.12.1-py3-none-any.whl (310 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.9/310.9 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━

In [None]:
from trl import SFTTrainer

In [None]:
from datasets import Dataset

In [None]:
# Convert to Hugging Face Dataset
hf_dataset = Dataset.from_pandas(df_train)

In [None]:
llama_trainer = SFTTrainer(
    model=llama_model,
    train_dataset=hf_dataset,
    peft_config=llama_peft_params,
    dataset_text_field="dialogue",
    max_seq_length=None,
    tokenizer=llama_tokenizer,
    args=llama_training_params,
    packing=False,
)

In [None]:
llama_trainer.train()

Step,Training Loss
25,2.0502
50,2.0812
75,1.9081
100,2.0061
125,1.8365
150,1.9782
175,1.81
200,2.0025
225,1.8437
250,1.9813


TrainOutput(global_step=250, training_loss=1.9497795867919923, metrics={'train_runtime': 2921.7301, 'train_samples_per_second': 0.342, 'train_steps_per_second': 0.086, 'total_flos': 6942979218407424.0, 'train_loss': 1.9497795867919923, 'epoch': 1.0})

In [None]:
llama_trainer.save_model('fine-tuned-llama-quantized-summarizer')
llama_tokenizer.save_pretrained('fine-tuned-llama-quantized-summarizer')

In [None]:
llama_peft_params.save_pretrained('fine-tuned-llama-quantized-summarizer')
llama_model.config.save_pretrained('fine-tuned-llama-quantized-summarizer')

# 2. Sample Text and Predicted Summaries

In [16]:
tokenizer = BertTokenizer.from_pretrained("results/fine-tuned-bert-summarizer")
model = EncoderDecoderModel.from_pretrained("results/fine-tuned-bert-summarizer")

model.config.decoder_start_token_id = tokenizer.cls_token_id
model.config.bos_token_id = tokenizer.cls_token_id 
model.config.eos_token_id = tokenizer.sep_token_id
model.config.pad_token_id = tokenizer.pad_token_id
model.config.vocab_size = model.config.encoder.vocab_size

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

EncoderDecoderModel(
  (encoder): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [17]:
def bert_pipeline(text, max_length=512, out_length=128):
    inputs = tokenizer.encode_plus(
        text,
        max_length=max_length,
        padding='max_length',
        truncation=True,
        return_tensors="pt"
    )

    input_ids = inputs["input_ids"].to(device)
    attention_mask = inputs["attention_mask"].to(device)

    model.eval()
    with torch.no_grad():
        summary_ids = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_length=out_length,
            num_beams=4,
            early_stopping=True,
            decoder_start_token_id=tokenizer.cls_token_id,
            bos_token_id=tokenizer.cls_token_id,
            eos_token_id=tokenizer.sep_token_id,
            pad_token_id=tokenizer.pad_token_id
        )

    predicted_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return predicted_summary

In [18]:
for i in range(5):
    sample_text = df_test.dialogue.iloc[i]
    print("\nOriginal Text:")
    print(sample_text)

    predicted_summary = bert_pipeline(sample_text)
    print("\nPredicted Summary:")   
    print(predicted_summary)


Original Text:
Hannah: Hey, do you have Betty's number?
Amanda: Lemme check
Hannah: <file_gif>
Amanda: Sorry, can't find it.
Amanda: Ask Larry
Amanda: He called her last time we were at the park together
Hannah: I don't know him well
Hannah: <file_gif>
Amanda: Don't be shy, he's very nice
Hannah: If you say so..
Hannah: I'd rather you texted him
Amanda: Just text him 🙂
Hannah: Urgh.. Alright
Hannah: Bye
Amanda: Bye bye

Predicted Summary:
betty will text larry about betty's phone number.

Original Text:
Eric: MACHINE!
Rob: That's so gr8!
Eric: I know! And shows how Americans see Russian ;)
Rob: And it's really funny!
Eric: I know! I especially like the train part!
Rob: Hahaha! No one talks to the machine like that!
Eric: Is this his only stand-up?
Rob: Idk. I'll check.
Eric: Sure.
Rob: Turns out no! There are some of his stand-ups on youtube.
Eric: Gr8! I'll watch them now!
Rob: Me too!
Eric: MACHINE!
Rob: MACHINE!
Eric: TTYL?
Rob: Sure :)

Predicted Summary:
rob and rob are talking a

In [19]:
gpt_tokenizer = GPT2Tokenizer.from_pretrained("results/fine-tuned-gpt2-summarizer")
gpt_model = GPT2LMHeadModel.from_pretrained("results/fine-tuned-gpt2-summarizer")
gpt_tokenizer.pad_token = gpt_tokenizer.eos_token

gpt_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
gpt_model.to(gpt_device)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50259, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50259, bias=False)
)

In [20]:
def gpt2_pipeline(dialogue, out_length=100):
    input_ids = dialogue['dialogue'][:dialogue['sum_idx']].unsqueeze(0).to(gpt_device)
    generated_ids = gpt_model.generate(
        input_ids, 
        max_new_tokens=out_length,
        num_beams=5, 
        early_stopping=True, 
        no_repeat_ngram_size=2
    )
    generated_text = gpt_tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    dialogue_text = gpt_tokenizer.decode(dialogue['dialogue'][:dialogue['sum_idx']], skip_special_tokens=True)
    predicted_summary = generated_text[len(dialogue_text):]
    return predicted_summary, dialogue_text

In [21]:
gpt_model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50259, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50259, bias=False)
)

In [22]:
for i in range(5):
    sample = gpt_valid_data[i]
    predicted_summary, dialogue_text = gpt2_pipeline(sample)
    print("\nOriginal Text:")
    print(dialogue_text)
    print("\nPredicted Summary:")
    print(predicted_summary)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Original Text:
A: Hi Tom, are you busy tomorrow’s afternoon?
B: I’m pretty sure I am. What’s up?
A: Can you go with me to the animal shelter?.
B: What do you want to do?
A: I want to get a puppy for my son.
B: That will make him so happy.
A: Yeah, we’ve discussed it many times. I think he’s ready now.
B: That’s good. Raising a dog is a tough issue. Like having a baby ;-) 
A: I'll get him one of those little dogs.
B: One that won't grow up too big;-)
A: And eat too much;-))
B: Do you know which one he would like?
A: Oh, yes, I took him there last Monday. He showed me one that he really liked.
B: I bet you had to drag him away.
A: He wanted to take it home right away ;-).
B: I wonder what he'll name it.
A: He said he’d name it after his dead hamster – Lemmy  - he's  a great Motorhead fan :-)))

Predicted Summary:
A is going to a shelter tomorrow. Tom will take care of the dog for his son, who will be born in the next few days. They are discussing the issue of raising a pet dog. <|pad|> 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Original Text:
Paige: I asked them to wait and send the declaration later
Paige: Even end of March if it's possible
Maddy: What did they say?
Paige: They want to close it asap cause Lisa is afraid she forgets about it later
Paige: But I can remind her in a couple of weeks
Paige: It's my responsibility after all
Maddy: But does it really matter? I mean the declaration
Maddy: I think the deadline for payment is 31 March anyway
Paige: I'm not sure, that's what I asked her
Paige: Hope she confirms

Predicted Summary:
 it in the next few days, I don't know if she will be able to do it on her own
Micha: That's not a good idea, she has to pay for the whole thing, so she can't just send it to the bank and wait for her to confirm it
Mary: You're right, it would be better if they didn't wait until the end, otherwise she would have to give it back to them in order to get the money back
Maria: So


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Original Text:
Marry: I broke my nail ;(
Tina: oh, no!
Marry: u know I have that party tomorrow!!!
Tina: I know, let me think...
Tina: I got it!. My sister friend is a cosmetitian, maybe she 'll help
Marry: anyone will be good, I'm desperate!
Tina: I'll call her and let u know, ok?
Marry: ok, I'll wait, but hurry!

Predicted Summary:
 The nail broke. Marry broke his nail. Tina will call him and help him with the party next Tuesday.   <|pad|> <|pad|>  <file_photo> ’s a picture of Tina's sister, who's a Cosmetist, and he's desperate to get it back. He's going to call Tina and tell her that he broke her nail, so she'll let him know if he can help her. She will wait for him, because he has a party on


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Original Text:
Julia: Greg just texted me
Robert: ugh, delete him already
Julia: He's saying he's sorry
Robert: damn girl, delete the bastard
Julia: it's not that simple, you know it
Robert: No Julia, it is pretty simple
Robert: go and delete him
Julia: But he apologised, ok? He's never done it before
Robert: srsly?
Robert: do I need to remind you he cheated on you?
Robert: Julia I'm not going through this again with you
Julia: People change, I do believe it, maybe he changed. He apologised
Robert: and that's it? That' ok? how's different from two other times?
Julia: i told you - he apologised! he's sorry, he wants to meet
Robert: don't, honey, really. We've been through this
Julia: I know, but it's not easy. I think I love him
Robert: i know you do, but you need to be strong. do you want to come over?
Julia: no, thank you love, but i have to get up early tomorrow
Robert: ok, you should go to sleep then
Julia: what about Greg?
Robert: don't text him, he's using you, he cheated on you 

In [55]:
from peft import PeftModel

In [None]:
llama_compute_dtype = getattr(torch, "float16")
llama_quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=llama_compute_dtype,
    bnb_4bit_use_double_quant=False,
)

llama_tokenizer = AutoTokenizer.from_pretrained('results/fine-tuned-llama-quantized-summarizer')
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"

llama_model = AutoModelForCausalLM.from_pretrained(
    'results/fine-tuned-llama-quantized-summarizer',
    quantization_config=llama_quant_config,
    device_map='auto',
    use_auth_token=True
)

In [None]:
llama_model = PeftModel.from_pretrained(llama_model, 'results/fine-tuned-llama-quantized-summarizer')


In [None]:
llama_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
llama_model.to(llama_device)

In [None]:
def llama_pipeline(text, max_length=512, out_length=128):
    inputs = llama_tokenizer.encode_plus(
        text,
        max_length=max_length,
        padding='max_length',
        truncation=True,
        return_tensors="pt"
    )
    input_ids = inputs["input_ids"].to(llama_device)
    attention_mask = inputs["attention_mask"].to(llama_device)

    llama_model.eval()
    with torch.no_grad():
        generated_ids = llama_model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=out_length,
            num_beams=4,
            early_stopping=True,
            no_repeat_ngram_size=2,
            repetition_penalty=1.5,
            length_penalty=1.0
        )
    generated_text = llama_tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return generated_text

In [None]:
for i in range(5):
    sample_text = df_test.dialogue.iloc[i]
    print("\nOriginal Text:")
    print(sample_text)

    predicted_summary = llama_pipeline(sample_text)
    print("\nPredicted Summary:")
    print(predicted_summary)

# 3. Comparison Results

In [28]:
from rouge_score import rouge_scorer

In [None]:
def evaluate(model, test_dataset):
    model.eval()
    generated_summaries = []
    reference_summaries = []
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    
    for i in range(test_dataset.__len__()):
        sample_text = df_test.dialogue.iloc[i]
        predicted_summary = bert_pipeline(sample_text)
        
        reference_summaries.append(test_dataset.summary[i])
        generated_summaries.append(predicted_summary)

    rouge_scores = {
        'rouge1': [],
        'rouge2': [],
        'rougeL': []
    }
    for pred, ref in zip(generated_summaries, reference_summaries):
        scores = scorer.score(ref[0], pred)
        rouge_scores['rouge1'].append(scores['rouge1'].fmeasure)
        rouge_scores['rouge2'].append(scores['rouge2'].fmeasure)
        rouge_scores['rougeL'].append(scores['rougeL'].fmeasure)

    avg_rouge_scores = {key: sum(values) / len(values) for key, values in rouge_scores.items()}

    return avg_rouge_scores, generated_summaries, reference_summaries

In [44]:
bert_rouge, bert_preds, bert_refs = evaluate(model, df_test.head(100))

In [50]:
len(gpt_valid_data)

818

In [51]:
def evaluate_gpt(model, test_dataset):
    model.eval()
    generated_summaries = []
    reference_summaries = []
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    
    for i in range(30):
        sample_text = test_dataset[i]
        predicted_summary, dialogue_text = gpt2_pipeline(sample_text)
        
        reference_summaries.append(dialogue_text)
        generated_summaries.append(predicted_summary)

    rouge_scores = {
        'rouge1': [],
        'rouge2': [],
        'rougeL': []
    }
    for pred, ref in zip(generated_summaries, reference_summaries):
        scores = scorer.score(ref[0], pred)
        rouge_scores['rouge1'].append(scores['rouge1'].fmeasure)
        rouge_scores['rouge2'].append(scores['rouge2'].fmeasure)
        rouge_scores['rougeL'].append(scores['rougeL'].fmeasure)

    avg_rouge_scores = {key: sum(values) / len(values) for key, values in rouge_scores.items()}

    return avg_rouge_scores, generated_summaries, reference_summaries

In [52]:
gpt_rouge, gpt_preds, gpt_refs = evaluate_gpt(gpt_model, gpt_valid_data)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


In [None]:
def evaluate_llama(model, test_dataset):
    model.eval()
    generated_summaries = []
    reference_summaries = []
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

    for i in range(len(test_dataset)):
        sample_text = test_dataset.dialogue.iloc[i]
        reference_summary = test_dataset.summary.iloc[i]
        predicted_summary = llama_pipeline(sample_text)

        reference_summaries.append(reference_summary)
        generated_summaries.append(predicted_summary)

    rouge_scores = {
        'rouge1': [],
        'rouge2': [],
        'rougeL': []
    }
    for pred, ref in zip(generated_summaries, reference_summaries):
        scores = scorer.score(ref, pred)
        rouge_scores['rouge1'].append(scores['rouge1'].fmeasure)
        rouge_scores['rouge2'].append(scores['rouge2'].fmeasure)
        rouge_scores['rougeL'].append(scores['rougeL'].fmeasure)

    avg_rouge_scores = {key: sum(values) / len(values) for key, values in rouge_scores.items()}

    return avg_rouge_scores, generated_summaries, reference_summaries

In [None]:
llama_rouge, llama_preds, llama_refs = evaluate_llama(llama_model, df_test.head(30))

In [53]:
print("BERT Model:")
print(f"ROUGE Scores: {bert_rouge}")

BERT Model:
ROUGE Scores: {'rouge1': 0.009870813397129186, 'rouge2': 0.0, 'rougeL': 0.009870813397129186}


In [54]:
print("\nGPT-2 Model:")
print(f"ROUGE Scores: {gpt_rouge}")


GPT-2 Model:
ROUGE Scores: {'rouge1': 0.0062575431864077555, 'rouge2': 0.0, 'rougeL': 0.0062575431864077555}


In [None]:
print("\nLLAMA Model:")
print(f"ROUGE Scores: {llama_rouge}")