# SemEval 2026 Task 8 - Task B: Generation

This notebook focuses on **Task B: Generation**. 
Goal: Given a question (and potentially contexts), generate an accurate answer.

**Output Format**:
The submission file must contain a `predictions` field (List of Objects), where each object has:
- `text`: The generated answer string.

In [None]:
import os
import json
import sys
from tqdm import tqdm

# Add project root
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

try:
    from src.graph import app
except ImportError:
    print("ERROR: Check imports.")

In [None]:
# --- CONFIGURATION ---
TEAM_NAME = "Gbgers"
TASK_TYPE = "TaskB"
# Input typically has questions (and maybe contexts)
INPUT_FILE = "../dataset/human/generation_tasks/reference.jsonl"
OUTPUT_FILE = f"../data/submissions/submission_{TASK_TYPE}_{TEAM_NAME}.jsonl"

os.makedirs("../data/submissions", exist_ok=True)
print(f"Target: {OUTPUT_FILE}")

In [None]:
with open(INPUT_FILE) as f:
    test_data = [json.loads(line) for line in f if line.strip()]
print(f"Loaded {len(test_data)} items.")

In [None]:
results = []

print("Running Generation Task...")
for item in tqdm(test_data):
    question = item.get("question")
    
    try:
        # Invoke Graph
        # NOTE: If your graph supports 'documents' as input, you could pass 
        # item.get('contexts') here to do purely generation.
        # Otherwise, this runs Retrieval+Generation (which is also valid).
        resp = app.invoke({"question": question})
        gen_text = resp.get("generation", "")
        if not gen_text: gen_text = "No Answer"
    except Exception as e:
        print(f"Error on '{question}': {e}")
        gen_text = "Error"
    
    output_item = item.copy()
    # Task B requires 'predictions' list
    output_item["predictions"] = [{"text": gen_text}]
    
    results.append(output_item)

print(f"Done. Generated {len(results)} answers.")

In [None]:
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
    for x in results:
        json.dump(x, f)
        f.write('\n')
print(f"Saved to {OUTPUT_FILE}")

# Validation
sample = results[0]
if "predictions" in sample and isinstance(sample["predictions"], list) and "text" in sample["predictions"][0]:
    print("\033[92mVALIDATION PASS: Structure correct.\033[0m")
else:
    print("\033[91mVALIDATION FAIL: 'predictions' formatting error.\033[0m")