# Inspect model output

In [1]:
from datasets import load_dataset

output = load_dataset("ThatsGroes/LLM-summary-evaluation")
output

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Generating test split: 100%|██████████| 10000/10000 [00:00<00:00, 73247.60 examples/s]


DatasetDict({
    test: Dataset({
        features: ['summary', 'dialog', 'system_prompt', 'messages', 'text', 'prompt', 'summary_by_SmolLM2-360M-Instruct-summarizer', 'summary_by_SmolLM2-1.7B-Instruct-summarizer', 'summary_by_SmolLM2-1.7B-Instruct', 'summary_by_SmolLM2-360M-Instruct'],
        num_rows: 10000
    })
})

In [12]:
output["test"]["prompt"][0]

[{'content': "Write one sentence that summarizes this conversation, emphasizing any meetings, persons or places mentioned in the conversation. \n\n **Conversation:** \n\n He's the driving force behind the Prodigy, you know. Absolutely legendary producer. Remember that track, Firestarter? Blew my mind the first time I heard it. Yeah, totally groundbreaking stuff.  They've had a few lineup changes over the years, but Howlett's always been the constant. Did you see them at Glastonbury a few years back? Epic. I think he even did a DJ set at some festival in Spain last year. Still going strong.",
  'role': 'user'}]

# Are there any identical summaries?

In [4]:
df = output["test"].to_pandas()
identicals = []
for i, row in df.iterrows():
    if len(
        set(
            [row["summary_by_SmolLM2-360M-Instruct-summarizer"].lower().strip(), 
             row["summary_by_SmolLM2-360M-Instruct"].lower().strip(), 
             row["summary_by_SmolLM2-1.7B-Instruct"].lower().strip(), 
             row["summary_by_SmolLM2-1.7B-Instruct-summarizer"].lower().strip()]
             )
    ) == 1:
        identicals.append(True)
    else:
        identicals.append(False)
        
    #print(row["summary_by_SmolLM2-360M-Instruct-summarizer"])
    #print(row["summary_by_SmolLM2-360M-Instruct-summarizer"])

In [5]:
any(identicals)

False

# Create dataset for human evaluation

In [55]:
import uuid
import json
import copy

sample = df.sample(n=100)

summary_columns = [col for col in sample.columns if "summary_by_" in col]
random.shuffle(summary_columns)

#uuids = [str(uuid.uuid1()) for i in range(len(summary_columns))]
uuids = ["a", "b", "c", "d"]

#mapping = [{"model" : model, "uuid" : uid} for model, uid in zip(summary_columns, uuids)]
mapping = {}
for model, uid in zip(summary_columns, uuids):
    mapping[model] = uid

with open('model_to_uuid_map.json', 'w') as fout:
    json.dump(mapping, fout)

# Rename columns
sample = sample.rename(columns=mapping)

# Reorder DataFrame

keep_col = ["dialog", "summary"]
keep_col.extend(uuids)
print(keep_col)
# drop cols and reorder
sample = sample[keep_col]
print(sample.head())
sample.to_csv("samples_human_eval.csv", index=False)


['dialog', 'summary', 'a', 'b', 'c', 'd']
                                                 dialog  \
3610  Write one sentence that summarizes this conver...   
4260  Write one sentence that summarizes this conver...   
2215  Write one sentence that summarizes this conver...   
578   Write one sentence that summarizes this conver...   
961   Write one sentence that summarizes this conver...   

                                                summary  \
3610  The conversation discusses the Natalia Grace c...   
4260  The conversation discusses the recent performa...   
2215  The conversation revolved around Jared Goff's ...   
578   The conversation focused on the significance o...   
961   The speakers discussed the competitive housing...   

                                                      a  \
3610  This conversation revolves around the Natalia ...   
4260  This conversation discusses the Warriors' impr...   
2215  In this conversation, the speaker discusses th...   
578   This c

# Manually inspect a random subset 

In [11]:
sample = df.sample(n=20)

for i, row in sample.iterrows():

    print(f"Dialog:{row['dialog']}\nGround truth summary:\n{row['summary']}\n")

    print("summary_by_SmolLM2-360M-Instruct-summarizer:")
    print(row["summary_by_SmolLM2-360M-Instruct-summarizer"])
    print("\n")
    print("summary_by_SmolLM2-360M-Instruct")
    print(row["summary_by_SmolLM2-360M-Instruct"])
    print("\n")
    print("summary_by_SmolLM2-1.7B-Instruct")
    print(row["summary_by_SmolLM2-1.7B-Instruct"])
    print("\n")
    print("summary_by_SmolLM2-1.7B-Instruct-summarizer")
    print(row["summary_by_SmolLM2-1.7B-Instruct-summarizer"])
    print("\n")
    print("----------------")


Dialog:Write one sentence that summarizes this conversation, emphasizing any meetings, persons or places mentioned in the conversation. 

 **Conversation:** 

 Remember that big push last month for the flu shots? What a nightmare. So many people were convinced they were gonna get microchipped. Honestly, some of the things people come up with… it's like they think we're all part of some elaborate government conspiracy. You know what I mean? And the paperwork. I swear, it took longer to document everything than it did to actually give the shot.Speaking of paperwork, have you seen the new intake forms? They’re even longer than before. It’s ridiculous, honestly. They want every detail about a patient’s life history. Did they have a pet hamster when they were five? Did their grandmother ever eat broccoli? I mean, come on.Oh, did you hear about Mrs. Henderson? The one who was in 2B last week? Apparently, she snuck out of the hospital at 3 in the morning, walked all the way downtown, and boug

# Analyse evaluation scores (BERT & ROGUE-L)

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("results/evaluation_scores.csv")

In [11]:
identicals = []
for i, row in df.iterrows():
    if len(
        set(
            [row["summary_by_SmolLM2-360M-Instruct-summarizer"], 
             row["summary_by_SmolLM2-360M-Instruct"], 
             row["summary_by_SmolLM2-1.7B-Instruct"], 
             row["summary_by_SmolLM2-1.7B-Instruct-summarizer"]]
             )
    ) == 1:
        identicals.append(True)
    else:
        identicals.append(False)
        
    #print(row["summary_by_SmolLM2-360M-Instruct-summarizer"])
    #print(row["summary_by_SmolLM2-360M-Instruct-summarizer"])