In [None]:
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:

# Loading the model and tokenizer
model_save_path = "/content/drive/MyDrive/bart_model"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = AutoModelForSeq2SeqLM.from_pretrained(model_save_path).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_save_path)


In [None]:
def generate_summary(batch):

    inputs = tokenizer(batch["dialogue"], truncation=True, padding="longest", return_tensors="pt")

    input_ids = inputs["input_ids"].to(device)
    attention_mask = inputs.get("attention_mask", None).to(device)

    # Generate summaries
    summary_ids = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_length=128,
        min_length=32,
        num_beams=8,
        length_penalty=0.8,
        early_stopping=True
    )

    # Decode summaries
    batch["predicted_summary"] = tokenizer.batch_decode(summary_ids, skip_special_tokens=True)

    return batch


In [None]:
!pip install py7zr

Collecting py7zr
  Downloading py7zr-0.22.0-py3-none-any.whl.metadata (16 kB)
Collecting texttable (from py7zr)
  Downloading texttable-1.7.0-py2.py3-none-any.whl.metadata (9.8 kB)
Collecting pycryptodomex>=3.16.0 (from py7zr)
  Downloading pycryptodomex-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting pyzstd>=0.15.9 (from py7zr)
  Downloading pyzstd-0.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.4 kB)
Collecting pyppmd<1.2.0,>=1.1.0 (from py7zr)
  Downloading pyppmd-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.7 kB)
Collecting pybcj<1.1.0,>=1.0.0 (from py7zr)
  Downloading pybcj-1.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)
Collecting multivolumefile>=0.2.3 (from py7zr)
  Downloading multivolumefile-0.2.3-py3-none-any.whl.metadata (6.3 kB)
Collecting inflate64<1.1.0,>=1.0.0 (from py7zr)
  Downloading inflate64-1.0.0-cp310-cp310-manylinux_2_17_

In [None]:
!pip install datasets
from datasets import load_dataset

dataset_samsum = load_dataset("samsum")
test_dataset = dataset_samsum['test']




Downloading data:   0%|          | 0.00/2.94M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/14732 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/819 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/818 [00:00<?, ? examples/s]

In [None]:
# Generating summaries for the test dataset
predicted_test_dataset = test_dataset.map(generate_summary, batched=True, batch_size=8)

# Previewing a few entries to check the results
predicted_test_dataset[:3]


Map:   0%|          | 0/819 [00:00<?, ? examples/s]

{'id': ['13862856', '13729565', '13680171'],
 'dialogue': ["Hannah: Hey, do you have Betty's number?\nAmanda: Lemme check\nHannah: <file_gif>\nAmanda: Sorry, can't find it.\nAmanda: Ask Larry\nAmanda: He called her last time we were at the park together\nHannah: I don't know him well\nHannah: <file_gif>\nAmanda: Don't be shy, he's very nice\nHannah: If you say so..\nHannah: I'd rather you texted him\nAmanda: Just text him 🙂\nHannah: Urgh.. Alright\nHannah: Bye\nAmanda: Bye bye",
  "Eric: MACHINE!\r\nRob: That's so gr8!\r\nEric: I know! And shows how Americans see Russian ;)\r\nRob: And it's really funny!\r\nEric: I know! I especially like the train part!\r\nRob: Hahaha! No one talks to the machine like that!\r\nEric: Is this his only stand-up?\r\nRob: Idk. I'll check.\r\nEric: Sure.\r\nRob: Turns out no! There are some of his stand-ups on youtube.\r\nEric: Gr8! I'll watch them now!\r\nRob: Me too!\r\nEric: MACHINE!\r\nRob: MACHINE!\r\nEric: TTYL?\r\nRob: Sure :)",
  "Lenny: Babe, can y

In [None]:
import pandas as pd

# Converting to pandas DataFrame
df = pd.DataFrame(predicted_test_dataset)

# Saving to CSV
df.to_csv("/content/drive/MyDrive/generated_summaries.csv", index=False)


In [None]:
from google.colab import files
files.download('/content/drive/MyDrive/generated_summaries.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/generated_summaries.csv')

# Printing few examples
num_examples = 5
for i in range(num_examples):
    print(f"Example {i+1}:")
    print(f"Dialogue: {df.iloc[i]['dialogue']}")
    print(f"Reference Summary: {df.iloc[i]['summary']}")
    print(f"Generated Summary: {df.iloc[i]['predicted_summary']}")
    print("\n" + "-"*80 + "\n")


Example 1:
Dialogue: Hannah: Hey, do you have Betty's number?
Amanda: Lemme check
Hannah: <file_gif>
Amanda: Sorry, can't find it.
Amanda: Ask Larry
Amanda: He called her last time we were at the park together
Hannah: I don't know him well
Hannah: <file_gif>
Amanda: Don't be shy, he's very nice
Hannah: If you say so..
Hannah: I'd rather you texted him
Amanda: Just text him 🙂
Hannah: Urgh.. Alright
Hannah: Bye
Amanda: Bye bye
Reference Summary: Hannah needs Betty's number but Amanda doesn't have it. She needs to contact Larry.
Generated Summary: Larry called Betty the last time Amanda and Hannah were at the park together. Amanda can't find Betty's number. She will text Larry.

--------------------------------------------------------------------------------

Example 2:
Dialogue: Eric: MACHINE!
Rob: That's so gr8!
Eric: I know! And shows how Americans see Russian ;)
Rob: And it's really funny!
Eric: I know! I especially like the train part!
Rob: Hahaha! No one talks to the machine li

In [None]:
!pip install rouge_score



Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=2c97dbb5a8e6c29997aff0c7d6157e4307703ec1d3dd55fa471111d1751c91e5
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [None]:
from datasets import load_metric
import numpy as np

# Loading the ROUGE metric
rouge_metric = load_metric("rouge")

def compute_metrics(predictions):

    references = predictions["summary"]
    predictions = predictions["predicted_summary"]

    # Compute ROUGE scores
    results = rouge_metric.compute(predictions=predictions, references=references)
    return results

results = compute_metrics({
    "summary": predicted_test_dataset["summary"],
    "predicted_summary": predicted_test_dataset["predicted_summary"]
})
from datasets import load_metric
import numpy as np

rouge_metric = load_metric("rouge")

def compute_metrics(predictions):

    references = predictions["summary"]
    predictions = predictions["predicted_summary"]

    results = rouge_metric.compute(predictions=predictions, references=references)
    return results

results = compute_metrics({
    "summary": predicted_test_dataset["summary"],
    "predicted_summary": predicted_test_dataset["predicted_summary"]
})



In [None]:

print("ROUGE Scores:")
for key, value in results.items():
    print(f"{key}:")
    print(f"  Precision: {value.mid.precision:.4f}")
    print(f"  Recall:    {value.mid.recall:.4f}")
    print(f"  F1-Score:  {value.mid.fmeasure:.4f}")


ROUGE Scores:
rouge1:
  Precision: 0.4296
  Recall:    0.6192
  F1-Score:  0.4796
rouge2:
  Precision: 0.2211
  Recall:    0.3268
  F1-Score:  0.2468
rougeL:
  Precision: 0.3434
  Recall:    0.5014
  F1-Score:  0.3853
rougeLsum:
  Precision: 0.3431
  Recall:    0.5013
  F1-Score:  0.3850
