In [None]:
import json
import numpy as np
import pandas as pd
import torch
from pathlib import Path
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from rouge_score import rouge_scorer
from transformers import CLIPProcessor, CLIPModel
from PIL import Image

In [None]:
# 1) Carica il set di test
recs = json.load(open("data/splits/test.json", encoding="utf-8"))
# 2) Carica il tuo adapter pretrained
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("experiments/lora_llama")
model     = AutoModelForCausalLM.from_pretrained("experiments/lora_llama").eval().to("cuda")


In [None]:
# 2) Carica il tuo adapter pretrained
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("experiments/lora_llama")
model     = AutoModelForCausalLM.from_pretrained("experiments/lora_llama").eval().to("cuda")


In [None]:
# 3) Genera le caption
out = []
smooth = SmoothingFunction().method1
rouge  = rouge_scorer.RougeScorer(["rougeL"], use_stemmer=True)
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
clip_model     = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").eval().to("cuda")

def compute_clip_score(img_path, text):
    inputs = clip_processor(text=[text], images=[Image.open(img_path).convert("RGB")],
                            return_tensors="pt", padding=True).to("cuda")
    with torch.no_grad():
        out = clip_model(**inputs)
    img_emb = out.image_embeds / out.image_embeds.norm(dim=-1,keepdim=True)
    txt_emb = out.text_embeds  / out.text_embeds.norm(dim=-1,keepdim=True)
    return (img_emb * txt_emb).sum().item()

for r in tqdm(recs, total=len(recs)):
    prompt = f"Base caption: {r['caption']}\nEmotion: {r['emotion']}\nRewrite the above caption to fit the specified emotion:"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    gen_ids = model.generate(**inputs, max_length=40, temperature=0.7)
    pred    = tokenizer.decode(gen_ids[0], skip_special_tokens=True)

    out.append({
        "img_name": r["img_name"],
        "gold":     r["caption"],
        "pred":     pred,
        "emotion":  r["emotion"]
    })

df = pd.DataFrame(out)
df.to_csv("results/lora_test_preds.csv", index=False)

In [None]:
#  4) Calcola BLEU/ROUGE/CLIPScore per ciascun modello
refs = [[g.split()] for g in df.gold]
hyps = df.pred.tolist()

bleu4 = corpus_bleu(refs, [h.split() for h in hyps],
                    weights=(0.25,)*4, smoothing_function=smooth)
rougeL = np.mean([rouge.score(r, h)["rougeL"].fmeasure
                  for r,h in zip(df.gold, df.pred)])
clip_scores = [compute_clip_score(Path("data/images")/img, txt)
               for img,txt in zip(df.img_name, df.pred)]
clip_mean   = float(np.round(np.mean(clip_scores),4))

print(f"LoRA → BLEU-4: {bleu4:.4f}  ROUGE-L: {rougeL:.4f}  CLIPScore: {clip_mean:.4f}")