In [None]:
import sys
import os

sys.path.append(os.path.abspath(".."))

from src.ingestion import build_vector_store, load_and_chunk_data

In [None]:
def test_vector_store_creation(docs):
    print("\n===== VECTOR STORE CREATION TEST =====")

    vectorstore = build_vector_store(
        docs=docs,
        persist_dir="../qdrant_test_db"
    )

    print("âœ… Vector store created successfully")
    return vectorstore


def test_qdrant_stats(vectorstore):
    client = vectorstore.client
    collection = "mtrag_collection"

    info = client.get_collection(collection)
    print(info)
    print("\n===== QDRANT COLLECTION INFO =====")
    print(f"âœ… Total Loaded Chunks (Points): {info.points_count}")
    # Status ti dice se la collezione Ã¨ pronta (Green)
    print(f"ðŸš¦ Collection Status: {info.status}")


def test_similarity_search(vectorstore):
    print("\n===== SIMILARITY SEARCH TEST =====")

    # Usa questa query per testare le cause economiche
    query = "Chi ha ucciso Jean-Paul Marat e come?"
    
    print(f"ðŸ”Ž Query: {query}")

    results = vectorstore.similarity_search_with_score(
        query=query,
        k=3 # Ne bastano 3 per vedere se funziona
    )

    for i, (doc, score) in enumerate(results):
        print(f"\n--- RESULT {i+1} ---")
        # 1. Stampa il Child Chunk (quello che ha fatto match matematico)
        print(f"ðŸ”¹ CHILD CHUNK (Match): \n{doc.page_content[:150]}...")
        
        # 2. Stampa il Parent Chunk (il contesto reale che passerai all'LLM)
        # Nota: recuperiamolo dai metadati dove l'hai salvato nella funzione load_and_chunk_data
        parent_text = doc.metadata.get("parent_text", "N/A")
        print(f"PARENT CONTEXT (To LLM): \n{parent_text[:150]}...")
        
        print(f"Source: {doc.metadata.get('title')}")
        print(f"Score/Similarity: {score:.4f}")


def test_parent_child_reconstruction(vectorstore):
    print("\n===== PARENTâ€“CHILD TEST =====")

    query = "Chi ha ucciso Jean-Paul Marat e come?"

    results = vectorstore.similarity_search(query, k=3)

    for doc in results:
        print("\n--- CHILD CHUNK ---")
        print(doc.page_content)

        print("\n--- PARENT CONTEXT ---")
        print(doc.metadata["parent_text"][:600], "...")


In [None]:
json_path = "../data/corpus/test.jsonl"
docs = load_and_chunk_data(json_path)

In [None]:
vectorstore = test_vector_store_creation(docs)

In [None]:
test_qdrant_stats(vectorstore)

In [None]:
test_similarity_search(vectorstore)

In [None]:
test_parent_child_reconstruction(vectorstore)