# SemEval 2026 Task 8: Multi-Turn RAG Pipeline

**Team:** Gbgers  
**Architecture:** Self-CRAG with LangGraph Orchestration

---

| Task | Description | Method |
|------|-------------|--------|
| **A** | Retrieval | BGE-M3 → Cross-Encoder Reranking (Top-20 → Top-5) |
| **B** | Generation | Direct LLM (Llama 3.1 8B, No Context) |
| **C** | RAG | Self-CRAG Graph with Hallucination Check |

In [1]:
# ============================================================================
# IMPORTS
# ============================================================================
import os, sys, json, zipfile
from tqdm import tqdm

# --- Project Root Detection ---
if os.path.exists("src"): PROJECT_ROOT = os.getcwd()
elif os.path.exists("llm-semeval-task8"): PROJECT_ROOT = "llm-semeval-task8"
else: PROJECT_ROOT = os.path.abspath("..")
if PROJECT_ROOT not in sys.path: sys.path.insert(0, PROJECT_ROOT)

# --- Core Modules ---
from src.ingestion import load_and_chunk_data, build_vector_store
from src.retrieval import get_retriever, get_qdrant_client
from src.generation import create_generation_components
from src.graph import initialize_graph

# --- LangChain ---
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage, AIMessage

print(f"Project Root: {PROJECT_ROOT}")

Project Root: /home/marcantoniolopez/Documenti/github/projects/llm-semeval-task8


In [2]:
# ============================================================================
# CONFIGURATION
# ============================================================================
TEAM_NAME = "Gbgers"
DOMAINS = ["govt", "clapnq", "fiqa", "cloud"]  # 4 domain corpora
COLLECTION_NAME = "mtrag_unified"              # Unified Qdrant collection

# --- Execution Mode ---
TEST_MODE = False  # False = Full submission (~3h)

# --- Limits ---
TEST_CHUNK_LIMIT = 1000   # Chunks/domain (test mode)
TEST_QUERY_LIMIT = 5      # Conversations/domain (test mode)
MAX_DOCS_PER_DOMAIN = 25000  # Full mode: 25k * 4 = 100k total

# --- Paths ---
CORPUS_DIR = os.path.join(PROJECT_ROOT, "dataset/corpora/passage_level")
CONV_FILE = os.path.join(PROJECT_ROOT, "dataset/human/conversations/conversations.json")
QDRANT_PATH = os.path.join(PROJECT_ROOT, "qdrant_db")
OUTPUT_DIR = os.path.join(PROJECT_ROOT, "data/submissions")

FILE_A = os.path.join(OUTPUT_DIR, f"submission_TaskA_{TEAM_NAME}.jsonl")
FILE_B = os.path.join(OUTPUT_DIR, f"submission_TaskB_{TEAM_NAME}.jsonl")
FILE_C = os.path.join(OUTPUT_DIR, f"submission_TaskC_{TEAM_NAME}.jsonl")

os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(QDRANT_PATH, exist_ok=True)

print(f"Mode: {'TEST' if TEST_MODE else 'FULL'} | Max Docs/Domain: {MAX_DOCS_PER_DOMAIN}")

Mode: FULL | Max Docs/Domain: 25000


In [3]:
# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================

def extract_last_query(msgs):
    """Extract the most recent user query from conversation history."""
    return next((m["text"] for m in reversed(msgs) if m.get("speaker") == "user"), "")


def get_corpus(domain):
    """Get or extract corpus file path (auto-unzip if needed)."""
    p = os.path.join(CORPUS_DIR, f"{domain}.jsonl")
    z = p + ".zip"
    if not os.path.exists(p) and os.path.exists(z):
        print(f"Extracting {domain}.jsonl...")
        with zipfile.ZipFile(z) as zf:
            zf.extractall(CORPUS_DIR)
    return p if os.path.exists(p) else None


def save_jsonl(data, path):
    """Save list of dicts to JSONL file."""
    with open(path, 'w', encoding='utf-8') as f:
        for d in data:
            f.write(json.dumps(d, ensure_ascii=False) + '\n')
    print(f"Saved {len(data)} items -> {path}")

In [4]:
# ============================================================================
# VECTOR INDEX VERIFICATION
# ============================================================================
# Check if the unified Qdrant collection exists; if not, build it.

client = get_qdrant_client(QDRANT_PATH)
collections = [c.name for c in client.get_collections().collections]

if COLLECTION_NAME in collections:
    info = client.get_collection(COLLECTION_NAME)
    print(f"Collection '{COLLECTION_NAME}' found: {info.points_count} vectors")
else:
    # Build index from scratch (only runs if collection missing)
    print("Building vector index...")
    all_docs = []
    for domain in DOMAINS:
        path = get_corpus(domain)
        if path:
            docs = load_and_chunk_data(path)
            # Tag with domain metadata for filtered retrieval
            for d in docs:
                d.metadata["domain"] = domain
            if not TEST_MODE:
                docs = docs[:MAX_DOCS_PER_DOMAIN]
            all_docs.extend(docs)
    build_vector_store(all_docs, QDRANT_PATH, COLLECTION_NAME)

Collection 'mtrag_unified' found: 100000 vectors


In [5]:
# ============================================================================
# MODEL INITIALIZATION
# ============================================================================

# --- Load quantized LLM (shared across Task B & C) ---
print("Loading LLM: Llama 3.1 8B (4-bit NF4)...")
gen_components = create_generation_components()

# --- Initialize Self-CRAG Graph for Task C ---
print("Initializing Self-CRAG Graph...")
import src.graph
src.graph._components = gen_components  # Share model to avoid OOM
graph_app = initialize_graph()

# --- Task B Chain (Direct LLM, no context) ---
task_b_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert assistant. Answer based on your knowledge. Be concise.<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{question}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question"]
)
task_b_chain = task_b_prompt | gen_components.llm | StrOutputParser()

print("All Systems Ready.")

Loading LLM: Llama 3.1 8B (4-bit NF4)...
Creating Generation Components with model: meta-llama/Llama-3.1-8B-Instruct...
Generation Components Ready.
Initializing Self-CRAG Graph...
All Systems Ready.


In [6]:
# ============================================================================
# MAIN PIPELINE: TASK A, B, C
# ============================================================================

# --- Load Conversations ---
with open(CONV_FILE) as f:
    conversations = json.load(f)
print(f"Loaded {len(conversations)} conversations from dataset.")

# --- Result Containers ---
results_A, results_B, results_C = [], [], []

# --- Process Each Domain ---
for domain in DOMAINS:
    print(f"\n{'='*20} DOMAIN: {domain.upper()} {'='*20}")

    # Filter conversations by domain
    convs_with_idx = [(i, c) for i, c in enumerate(conversations)
                      if domain in c.get("domain", "").lower()]
    if TEST_MODE:
        convs_with_idx = convs_with_idx[:TEST_QUERY_LIMIT]

    # CRITICAL: Initialize domain-filtered retriever
    print(f"Initializing retriever for domain: {domain}")
    retriever = get_retriever(
        qdrant_path=QDRANT_PATH,
        collection_name=COLLECTION_NAME,
        top_k_retrieve=20,  # BGE-M3 candidates
        top_k_rerank=5,     # Cross-encoder final selection
        domain=domain       # Domain filter for isolation
    )

    print(f"Processing {len(convs_with_idx)} conversations...")

    for idx_in_domain, (global_idx, conv) in enumerate(tqdm(convs_with_idx, desc=domain, leave=False)):
        msgs = conv.get("messages", [])
        q = extract_last_query(msgs)
        if not q:
            continue

        conv_id = f"{domain}_{idx_in_domain}"

        # ===================== TASK A: RETRIEVAL =====================
        docs = retriever.invoke(q)
        contexts = [{
            "document_id": str(d.metadata.get("doc_id", f"{domain}_{i}")),
            "score": float(d.metadata.get("relevance_score", 0.0)),
            "text": d.metadata.get("parent_text") or d.page_content
        } for i, d in enumerate(docs)]

        # ===================== TASK B: GENERATION (No Context) =====================
        try:
            ans_b = task_b_chain.invoke({"question": q})
        except Exception as e:
            ans_b = str(e)

        # ===================== TASK C: RAG (Self-CRAG Graph) =====================
        try:
            chat_history = [
                HumanMessage(content=m["text"]) if m.get("speaker") == "user"
                else AIMessage(content=m["text"])
                for m in msgs
            ]
            response = graph_app.invoke({
                "question": q,
                "domain": domain,
                "messages": chat_history
            })
            ans_c = response.get("generation", "I_DONT_KNOW")
            reason_c = response.get("fallback_reason", "none")
        except Exception as e:
            print(f"Task C Error: {e}")
            ans_c, reason_c = "I_DONT_KNOW", "pipeline_error"

        # ===================== APPEND RESULTS =====================
        results_A.append({"conversation_id": conv_id, "original_index": global_idx, "ranking": contexts})
        results_B.append({"conversation_id": conv_id, "original_index": global_idx, "answer": ans_b})
        results_C.append({
            "conversation_id": conv_id,
            "original_index": global_idx,
            "answer": ans_c,
            "fallback_reason": reason_c,
            "references": contexts
        })

print(f"\nPipeline Complete. Total: {len(results_A)} results per task.")

Loaded 110 conversations from dataset.

Initializing retriever for domain: govt
Processing 28 conversations...

Initializing retriever for domain: clapnq
Processing 29 conversations...

Initializing retriever for domain: fiqa
Processing 27 conversations...

Initializing retriever for domain: cloud
Processing 26 conversations...

Pipeline Complete. Total: 110 results per task.


In [None]:
# ============================================================================
# SAVE SUBMISSION FILES
# ============================================================================

save_jsonl(results_A, FILE_A)
save_jsonl(results_B, FILE_B)
save_jsonl(results_C, FILE_C)

print("\nDone.")

Saved 110 items -> .../data/submissions/submission_TaskA_Gbgers.jsonl
Saved 110 items -> .../data/submissions/submission_TaskB_Gbgers.jsonl
Saved 110 items -> .../data/submissions/submission_TaskC_Gbgers.jsonl

Done.
