In [None]:
import sys
sys.path.insert(0, 'src/')

from app.data_ingestion import run_data_ingestion

In [None]:
index = run_data_ingestion()

In [None]:
# Verify chunks
import json

with open("database/metadata.json", "r") as f:
    metadata = json.load(f)

print(f"Total chunks: {len(metadata)}")
print("\nFirst 10 chunks:")
for i in range(min(10, len(metadata))):
    print(f"\n--- Chunk {i} ({metadata[i]['source']}) ---")
    print(metadata[i]['text'][:2000])

In [None]:
# Test embedding search
from sentence_transformers import SentenceTransformer
import faiss
import json

# Load
model = SentenceTransformer("all-MiniLM-L6-v2")
index = faiss.read_index("database/vector_store.faiss")
with open("database/metadata.json", "r") as f:
    metadata = json.load(f)

# Test query
query = "What documents does a Danish citizen need?"
query_emb = model.encode([query])
faiss.normalize_L2(query_emb)

# Search
distances, indices = index.search(query_emb, 3)

print(f"Query: {query}\n")
for i, (idx, dist) in enumerate(zip(indices[0], distances[0])):
    print(f"Result {i+1} (similarity: {dist:.3f}):")
    print(f"Source: {metadata[idx]['source']}")
    print(f"Text: {metadata[idx]['text'][:150]}...\n")

In [None]:
import sqlite3
import json

def get_all_customers():
    conn = sqlite3.connect("database/customers.db")
    c = conn.cursor()
    c.execute("SELECT id, data FROM customers")

    rows = c.fetchall()
    conn.close()

    # Convert JSON back to dict
    customers = [
        {"id": row[0], "data": json.loads(row[1])}
        for row in rows
    ]

    return customers

customers = get_all_customers()
for c in customers:
    print(c)
