# SemEval 2026 Task 8 - Submission Pipeline

This notebook generates valid submission files for **Task A**, **Task B**, and **Task C** in a **single run**.

**Workflow**:
1.  **Load Data**: Reads the test dataset.
2.  **Run Pipeline**: Executes your RAG system ONCE per question.
3.  **Format Outputs**: Creates three separate lists of results (one per task).
4.  **Save Files**: Writes `submission_TaskA...`, `submission_TaskB...`, `submission_TaskC...`.
5.  **Validate**: Checks all files against requirements.

In [None]:
import os
import json
import sys
from typing import List, Dict, Any
from tqdm import tqdm

# Add project root to path
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

# Import RAG pipeline
try:
    from src.graph import app
except ImportError:
    print("ERROR: Cannot import 'src.graph.app'. Run from 'eval/' directory.")

## 1. Configuration

In [None]:
# --- CONFIGURATION ---
TEAM_NAME = "Gbgers"

INPUT_FILE = "../dataset/human/generation_tasks/reference.jsonl"
OUTPUT_DIR = "../data/submissions"

os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"Team: {TEAM_NAME}\nInput: {INPUT_FILE}\nOutput Dir: {OUTPUT_DIR}")

In [None]:
def load_jsonl(file_path: str) -> List[Dict[str, Any]]:
    data = []
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                if line.strip():
                    data.append(json.loads(line))
        print(f"Loaded {len(data)} items.")
    except FileNotFoundError:
        print(f"ERROR: File {file_path} not found.")
    return data

test_data = load_jsonl(INPUT_FILE)

## 2. Execution Loop (Single Pass)
We run the inference once and structure the data for all 3 tasks:
*   **Task A**: `contexts` only.
*   **Task B**: `contexts` + `predictions`.
*   **Task C**: `contexts` + `predictions`.

In [None]:
subs_A = []
subs_B = []
subs_C = []

print("Running pipeline...")

for item in tqdm(test_data):
    question = item.get("question")
    
    # --- 1. Run Pipeline ---
    try:
        result = app.invoke({"question": question})
    except Exception as e:
        print(f"ERROR processing '{question}': {e}")
        result = {}
    
    # --- 2. Format Contexts ---
    raw_docs = result.get("documents", [])
    formatted_contexts = []
    for doc in raw_docs:
        meta = getattr(doc, "metadata", {})
        content = getattr(doc, "page_content", "")
        doc_id = meta.get("id", meta.get("document_id", "unknown"))
        score = meta.get("relevance_score", meta.get("score", 0.0))
        
        formatted_contexts.append({
            "document_id": doc_id,
            "text": content,
            "score": float(score)
        })
        
    # --- 3. Format Predictions ---
    gen_text = result.get("generation", "")
    if not gen_text: gen_text = "No answer generated."
    formatted_predictions = [{"text": gen_text}]

    # --- 4. Build Task Items ---
    
    # Task A: Contexts only
    item_A = item.copy()
    item_A["contexts"] = formatted_contexts
    subs_A.append(item_A)
    
    # Task B: Contexts + Predictions
    item_B = item.copy()
    item_B["contexts"] = formatted_contexts
    item_B["predictions"] = formatted_predictions
    subs_B.append(item_B)
    
    # Task C: Contexts + Predictions
    item_C = item.copy()
    item_C["contexts"] = formatted_contexts
    item_C["predictions"] = formatted_predictions
    subs_C.append(item_C)

print(f"Finished. Processed {len(subs_A)} items.")

## 3. Save Files

In [None]:
def save_jsonl(data: List[Dict], filename: str):
    path = os.path.join(OUTPUT_DIR, filename)
    with open(path, 'w', encoding='utf-8') as f:
        for x in data:
            json.dump(x, f)
            f.write('\n')
    print(f"Saved: {path}")
    return path

file_A = save_jsonl(subs_A, f"submission_TaskA_{TEAM_NAME}.jsonl")
file_B = save_jsonl(subs_B, f"submission_TaskB_{TEAM_NAME}.jsonl")
file_C = save_jsonl(subs_C, f"submission_TaskC_{TEAM_NAME}.jsonl")

## 4. Validate All

In [None]:
def validate_task_format(file_path: str, task_type: str):
    try:
        with open(file_path, 'r') as f:
            data = [json.loads(line) for line in f if line.strip()]
        if not data: return False, "Empty"
        sample = data[0]
        errors = []

        # Check Requirements
        # Task A/C need contexts
        if task_type in ["TaskA", "TaskC"]:
            if "contexts" not in sample: errors.append("Missing 'contexts'")
            elif not isinstance(sample["contexts"], list): errors.append("'contexts' not list")

        # Task B/C need predictions
        if task_type in ["TaskB", "TaskC"]:
            if "predictions" not in sample: errors.append("Missing 'predictions'")
            elif not isinstance(sample["predictions"], list): errors.append("'predictions' not list")
        
        if errors: return False, "; ".join(errors)
        return True, "OK"
    except Exception as e:
        return False, str(e)

print("--- Validation ---")
for task, fpath in [("TaskA", file_A), ("TaskB", file_B), ("TaskC", file_C)]:
    valid, msg = validate_task_format(fpath, task)
    status = "\033[92mPASS\033[0m" if valid else "\033[91mFAIL\033[0m"
    print(f"[{task}] {status} : {msg}")