# SemEval 2026 Task 8 - Unified Pipeline (All Tasks)

This notebook executes the complete RAG pipeline once and generates submission files for **all three tasks** simultaneously.

**Efficiency**: Instead of running the pipeline 3 times (which is slow & costly), we run it once per question and extract:
- **Task A (Retrieval)**: The retrieved documents (`contexts`).
- **Task B (Generation)**: The generated answer (`predictions`).
- **Task C (RAG)**: Both contexts and predictions.

## Output Files
Files will be saved in `data/submissions/`:
1. `submission_TaskA_YOURTEAM.jsonl`
2. `submission_TaskB_YOURTEAM.jsonl`
3. `submission_TaskC_YOURTEAM.jsonl`

In [None]:
# --- KAGGLE SETUP ---
# Uncomment and run this cell FIRST if you are running on Kaggle.
# It clones the repo, installs dependencies, and sets the working directory.

# import os
# if not os.path.exists("llm-semeval-task8"):
#     !git clone https://github.com/LookUpMark/llm-semeval-task8.git

# %cd llm-semeval-task8
# !git checkout dev
# !pip install -c scripts/evaluation/constraints.txt -r scripts/evaluation/requirements.txt

In [None]:
import os
import json
import sys
from tqdm import tqdm

# Locate Project Root (Robust for Local vs Kaggle)
if os.path.exists("src"):
    project_root = os.getcwd()
else:
    project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))

if project_root not in sys.path:
    sys.path.append(project_root)

# Import RAG pipeline
try:
    from src.graph import app
except ImportError:
    print("ERROR: Run this notebook from the 'notebooks/' directory or repo root.")

In [None]:
# --- CONFIGURATION ---
TEAM_NAME = "Gbgers"
BASE_PATH = "." if os.path.exists("dataset") else ".."
INPUT_FILE = os.path.join(BASE_PATH, "dataset/human/generation_tasks/reference.jsonl")
SUBMISSION_DIR = os.path.join(BASE_PATH, "data/submissions")

os.makedirs(SUBMISSION_DIR, exist_ok=True)

FILE_A = os.path.join(SUBMISSION_DIR, f"submission_TaskA_{TEAM_NAME}.jsonl")
FILE_B = os.path.join(SUBMISSION_DIR, f"submission_TaskB_{TEAM_NAME}.jsonl")
FILE_C = os.path.join(SUBMISSION_DIR, f"submission_TaskC_{TEAM_NAME}.jsonl")

print(f"Inputs: {INPUT_FILE}")
print(f"Outputs:\n 1. {FILE_A}\n 2. {FILE_B}\n 3. {FILE_C}")

In [None]:
def load_data(path):
    with open(path, 'r', encoding='utf-8') as f:
        return [json.loads(line) for line in f if line.strip()]

test_data = load_data(INPUT_FILE)
print(f"Loaded {len(test_data)} questions to process.")

In [None]:
# --- EXECUTION ---
results_A = []
results_B = []
results_C = []

print("Starting Unified Pipeline Execution...")

for item in tqdm(test_data):
    question = item.get("question")
    
    # 1. RUN PIPELINE ONCE
    try:
        resp = app.invoke({"question": question})
        raw_docs = resp.get("documents", [])
        gen_text = resp.get("generation", "")
    except Exception as e:
        print(f"Error on '{question}': {e}")
        raw_docs = []
        gen_text = "Error"
        
    # 2. FORMAT DATA
    # Format Contexts for A and C
    contexts = []
    for doc in raw_docs:
        meta = getattr(doc, "metadata", {})
        contexts.append({
            "document_id": meta.get("id", meta.get("document_id", "unknown")),
            "text": getattr(doc, "page_content", ""),
            "score": float(meta.get("relevance_score", meta.get("score", 0.0)))
        })
        
    # Format Predictions for B and C
    predictions = [{"text": gen_text if gen_text else "No Answer"}]
    
    # 3. BUILD RESULT OBJECTS
    # Task A: contexts only
    item_A = item.copy()
    item_A["contexts"] = contexts
    results_A.append(item_A)
    
    # Task B: predictions only
    item_B = item.copy()
    item_B["predictions"] = predictions
    results_B.append(item_B)
    
    # Task C: contexts + predictions
    item_C = item.copy()
    item_C["contexts"] = contexts
    item_C["predictions"] = predictions
    results_C.append(item_C)

print("Execution Complete.")

In [None]:
# --- SAVE OUTPUTS ---

def save_jsonl(data, path):
    with open(path, 'w', encoding='utf-8') as f:
        for x in data:
            json.dump(x, f)
            f.write('\n')
    print(f"Saved: {path} ({len(data)} items)")

save_jsonl(results_A, FILE_A)
save_jsonl(results_B, FILE_B)
save_jsonl(results_C, FILE_C)

In [None]:
# --- VALIDATION ---
print("Validating outputs...")

def check(data, task):
    if not data: return False
    s = data[0]
    if task == "A": return "contexts" in s
    if task == "B": return "predictions" in s
    if task == "C": return "contexts" in s and "predictions" in s
    return False

print(f"Task A Valid: {check(results_A, 'A')}")
print(f"Task B Valid: {check(results_B, 'B')}")
print(f"Task C Valid: {check(results_C, 'C')}")