# SemEval 2026 Task 8: Multi-Turn RAG Evaluation

## Task B: Standalone Generation

This notebook implements **Task B**: answering questions using the LLM's parametric knowledge without retrieval.

**Note:** Uses the centralized `src.generation` module for LLM loading to ensure consistency.

In [None]:
import os
import sys
import json
from tqdm import tqdm

# Project Root
if os.path.exists("src"):
    PROJECT_ROOT = os.getcwd()
else:
    PROJECT_ROOT = os.path.abspath("..")
if PROJECT_ROOT not in sys.path: sys.path.insert(0, PROJECT_ROOT)

from src.generation import create_generation_components
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [None]:
TEAM_NAME = "Gbgers"
DOMAINS = ["govt", "clapnq", "fiqa", "cloud"]
TEST_MODE = True
TEST_QUERY_LIMIT = 5

CONVERSATIONS_FILE = os.path.join(PROJECT_ROOT, "dataset/human/conversations/conversations.json")
OUTPUT_DIR = os.path.join(PROJECT_ROOT, "data/submissions")
OUTPUT_FILE = os.path.join(OUTPUT_DIR, f"submission_TaskB_{TEAM_NAME}.jsonl")

os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
# Load centralized components
print("Loading LLM...")
components = create_generation_components()

# Custom Prompt for Task B (No RAG)
task_b_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert assistant. Answer based on your knowledge.
Be concise and professional.<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{question}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question"]
)

chain = task_b_prompt | components.llm | StrOutputParser()

In [None]:
def extract_last_query(messages):
    for msg in reversed(messages):
        if msg.get("speaker") == "user": return msg.get("text", "")
    return ""

with open(CONVERSATIONS_FILE) as f: all_convs = json.load(f)

results = []
for domain in DOMAINS:
    convs = [c for c in all_convs if domain in c.get("domain", "").lower()]
    if TEST_MODE: convs = convs[:TEST_QUERY_LIMIT]
    
    for conv in tqdm(convs, desc=domain):
        q = extract_last_query(conv.get("messages", []))
        if not q: continue
        try:
            ans = chain.invoke({"question": q})
        except Exception as e:
            ans = str(e)
        
        results.append({
            "conversation_id": conv.get("author"),
            "task_id": f"{conv.get('author')}::1",
            "Collection": f"mt-rag-{domain}",
            "predictions": [{"text": ans}]
        })

with open(OUTPUT_FILE, 'w') as f:
    for r in results: f.write(json.dumps(r) + '\n')
print("Saved.")