## Hybrid Pull Request Summarization with Gemini

# This notebook demonstrates:
# 1. Loading a small set of GitHub PRs
# 2. Cleaning / normalizing diffs
# 3. Generating PR descriptions using Gemini API
# 4. Evaluating generated descriptions using ROUGE-L

In [None]:
# 1. Imports
import os
from dotenv import load_dotenv
load_dotenv("../.env") 

import pandas as pd
from tqdm import tqdm

pd.options.display.max_colwidth = 400

In [None]:
# 2. Load PR dataset
df = pd.read_csv("../data/pull_requests.csv")
df_small = df.head(5).copy()
df_small.head(3)

In [None]:
# 3. Preprocessing
import re

def trim_diff(diff: str, max_len: int = 4000) -> str:
    if not isinstance(diff, str):
        return ""
    return diff[:max_len]

def normalize_text(text: str) -> str:
    text = text.strip()
    text = re.sub(r"\s+", " ", text)
    return text

df_small["diff_clean"] = df_small["diff"].apply(lambda d: normalize_text(trim_diff(d)))

In [None]:
# 4. Gemini PR summarization
from src.llm_client import safe_generate

df_small["description_generated"] = ""
for idx, row in tqdm(df_small.iterrows(), total=len(df_small)):
    diff_text = row["diff_clean"]
    if diff_text:
        df_small.at[idx, "description_generated"] = safe_generate(diff_text)

# Save results
df_small.to_csv("../results/notebook_summaries.csv", index=False)

# Preview
df_small[["pr_number", "description_original", "description_generated"]]

In [None]:
# 5. ROUGE-L evaluation
from src.evaluate import compute_rouge_l

refs = df_small["description_original"].fillna("").tolist()
preds = df_small["description_generated"].fillna("").tolist()

rouge_score = compute_rouge_l(refs, preds)
print("Average ROUGE-L:", rouge_score)

# Next steps
# - Run full scripts in terminal for bigger datasets.
# - Collect human evaluation using Google Forms and aggregate results.
# - Compare ROUGE-L across multiple PRs.