In [None]:
from sacrebleu.metrics import BLEU
from rouge import Rouge
import json
import joblib

ground_truth_summary = """The Simpsons are robbed by a burglar. They discover that every house in Springfield has also been robbed, and the police can't figure out who did it. In an attempt to catch the thief, who is an expert at sneaking into homes, Homer decides to lead a vigilante group. However, he ends up becoming a corrupt vigilante who abuses his power. Due to his new behavior, he's invited to give an interview, which is interrupted by a call from the burglar announcing his next heist: a rare diamond at the Springfield Museum at midnight.

Homer and other men prepare to stake out the museum and prevent the robbery. Homer gets distracted, and the thief escapes with the diamond. The town turns against Homer, and he goes home, where his father shows up claiming to know who the burglar is. He suspects it's one of the elderly residents at the Springfield Retirement Castle because he saw a senior wearing silent sneakers. During the night of the robbery, he saw the same senior leaving unexpectedly, and after the robbery, he saw the diamond in the senior's room.

He tells Homer, and they all go to the retirement home and find the burglar with all the stolen items. He says he'll return everything, and the Springfield police arrest him. During his imprisonment, the burglar says there's a million-dollar treasure buried in Springfield: $100,000 that he himself stole. Everyone hears about this and starts desperately searching for the described location. Once again, they are persuaded to find the treasure with the burglar revealing its whereabouts. Everyone rushes to the spot and starts digging frantically. They find a briefcase with a note inside. In his final trick, the burglar reveals the truth: the treasure story was a ruse to distract everyone while he escaped from jail."""



try:
    with open('../data/results/simpsons_episode_summary.joblib', 'rb') as f:
        llm_results = joblib.load(f)
    llm_final_summary = llm_results['final_summary']['summary']  # Accessing the 'summary' field
    llm_chunk_summaries = llm_results['chunk_summaries']
except (FileNotFoundError, KeyError):
    print("Error loading LLM generated summaries. Check file path and keys.")
    exit()

bleu_scorer = BLEU()
rouge_scorer = Rouge()

def evaluate_summary(hypothesis, reference):
    bleu_score = bleu_scorer.sentence_score(hypothesis=hypothesis, references=[reference]).score / 100
    rouge_scores = rouge_scorer.get_scores(hyps=hypothesis, refs=reference)[0]
    return bleu_score, rouge_scores

# Evaluate Final Summary
bleu_final, rouge_final = evaluate_summary(llm_final_summary, ground_truth_summary)

print("\nFinal Summary Evaluation:")
print(f"BLEU: {bleu_final:.4f}")
print(f"ROUGE-1-F: {rouge_final['rouge-1']['f']:.4f}")
print(f"ROUGE-1-R: {rouge_final['rouge-1']['r']:.4f}")
print(f"ROUGE-1-P: {rouge_final['rouge-1']['p']:.4f}")
print(f"ROUGE-L-F: {rouge_final['rouge-l']['f']:.4f}")
print(f"ROUGE-L-R: {rouge_final['rouge-l']['r']:.4f}")
print(f"ROUGE-L-P: {rouge_final['rouge-l']['p']:.4f}")



chunks = [
    "The Simpsons' home is robbed by the Springfield Cat Burglar, who steals Lisa's saxophone, Bart's stamp collection, and Marge's necklace.  Homer is unconcerned, but the family discovers many of their neighbors have also been targeted.  Chief Wiggum's inept investigation leads to a city-wide panic.  Professor Frink demonstrates a high-tech security system that inadvertently causes chaos in the streets.  The burglar is eventually revealed to be Grampa Simpson, who was stealing to fund his gambling habit.  Despite the chaos and loss, Homer promises to get Lisa a replacement saxophone, and he is elected leader of the neighborhood watch, despite his questionable methods and lack of experience. The episode begins with the discovery of the robbery and ends with Homer taking charge of the neighborhood watch, showcasing his incompetence and the absurdity of the situation.",
    "The episode starts with the Springfield residents needing a leader for a community group.  Despite Ned Flanders's initial nomination, Homer Simpson unexpectedly wins the role by promising action without thinking.  He recruits a ragtag team including Moe, Barney, Skinner, and even Grampa (initially rejected due to his age).  Their vigilante group acquires weapons from Herman Hermann's military surplus store, engaging in petty vandalism and heavy-handed 'justice'.  Their antics include harassing people, confiscating saxophones, and generally causing more chaos than they solve.  The group's actions lead to an interview with Kent Brockman, where Homer admits to their criminal activities.  The episode culminates with a phone call from the cat burglar, who taunts Homer by revealing he has Marge's pearls and Lisa's saxophone.  Homer vows to protect the Springfield Museum's cubic zirconia, highlighting the absurdity of the situation and the group's overall ineffectiveness.",
    "The episode starts with Homer's self-proclaimed \"gang\" inadvertently causing more crime than they prevent, while Lisa's saxophone is stolen by a cat burglar.  Homer, appearing on a TV interview with Kent Brockman, hilariously denies his gang's criminal activities. The cat burglar calls into the show, taunting Homer and revealing his next target: the world's largest cubic zirconia at the Springfield Museum.  Homer vows to protect it, but his efforts are hampered by his own incompetence and his father's antics.  Grandpa Simpson, despite his age and frequent falls, unexpectedly solves the mystery, deducing the burglar's identity (Molloy, a resident of his retirement home) through astute observation.  The climax sees Molloy apprehended at the retirement home, returning all the stolen goods, including Lisa's saxophone.  Despite Molloy's charming confession and the crowd's plea for leniency, Chief Wiggum arrests him, only to be bribed by Mayor Quimby. The episode ends with Homer mistakenly believing he caught the burglar himself, highlighting the comedic absurdity of the situation and Homer's self-delusion.",
    "The episode starts at the Springfield Retirement Castle, where the notorious cat burglar, Molloy, is apprehended, not by the police, but by Grampa Simpson.  Molloy, surprisingly charming, returns the stolen goods, including Selma's lock of MacGyver's hair and Lisa's saxophone.  Despite the heartwarming moment, Chief Wiggum arrests Molloy, only to receive a bribe from Mayor Quimby.  The plot thickens when Molloy reveals he buried his loot under a big \"T\" somewhere in Springfield.  Homer, convinced he'll find the treasure, causes chaos searching for it.  Molloy gives increasingly vague directions, leading to a town-wide panic and a bizarre scene with Kent Brockman suggesting cannibalism.  The search ends in a park, with nothing found but a note signed by someone Homer can't identify.  The episode concludes with the group digging futilely, highlighting the absurdity of the situation and the incompetence of the Springfield authorities.",
]
i = 1
for chunk in chunks:
    
    bleu_chunk, rouge_chunk = evaluate_summary(llm_final_summary, chunk)

    print(f"\nChunk {i}:")
    i += 1
    print(f"  BLEU: {bleu_chunk:.4f}")
    print(f"  ROUGE-1-F: {rouge_chunk['rouge-1']['f']:.4f}")
    print(f"  ROUGE-1-R: {rouge_chunk['rouge-1']['r']:.4f}")
    print(f"  ROUGE-1-P: {rouge_chunk['rouge-1']['p']:.4f}")
    print(f"  ROUGE-L-F: {rouge_chunk['rouge-l']['f']:.4f}")
    print(f"  ROUGE-L-R: {rouge_chunk['rouge-l']['r']:.4f}")
    print(f"  ROUGE-L-P: {rouge_chunk['rouge-l']['p']:.4f}")




It is recommended to enable `effective_order` for sentence-level BLEU.
It is recommended to enable `effective_order` for sentence-level BLEU.
It is recommended to enable `effective_order` for sentence-level BLEU.
It is recommended to enable `effective_order` for sentence-level BLEU.
It is recommended to enable `effective_order` for sentence-level BLEU.



Final Summary Evaluation:
BLEU: 0.0250
ROUGE-1-F: 0.1975
ROUGE-1-R: 0.1808
ROUGE-1-P: 0.2177
ROUGE-L-F: 0.1975
ROUGE-L-R: 0.1808
ROUGE-L-P: 0.2177

Chunk 1:
  BLEU: 0.1266
  ROUGE-1-F: 0.3485
  ROUGE-1-R: 0.4468
  ROUGE-1-P: 0.2857
  ROUGE-L-F: 0.3402
  ROUGE-L-R: 0.4362
  ROUGE-L-P: 0.2789

Chunk 2:
  BLEU: 0.2059
  ROUGE-1-F: 0.4264
  ROUGE-1-R: 0.4955
  ROUGE-1-P: 0.3741
  ROUGE-L-F: 0.3798
  ROUGE-L-R: 0.4414
  ROUGE-L-P: 0.3333

Chunk 3:
  BLEU: 0.2847
  ROUGE-1-F: 0.5130
  ROUGE-1-R: 0.5656
  ROUGE-1-P: 0.4694
  ROUGE-L-F: 0.4610
  ROUGE-L-R: 0.5082
  ROUGE-L-P: 0.4218

Chunk 4:
  BLEU: 0.1260
  ROUGE-1-F: 0.4016
  ROUGE-1-R: 0.4766
  ROUGE-1-P: 0.3469
  ROUGE-L-F: 0.3465
  ROUGE-L-R: 0.4112
  ROUGE-L-P: 0.2993

Perform Manual Analysis (compare summaries, look for omissions, etc.)
