# Read embeddings

In [1]:
import numpy as np

# 0 - 334 FIFA Equipment Regulations_2025_EN
# 334 - 800 Laws of the Game 25/26
# 800 - 1034 FIFA Disciplinary Code
# 1034 - 1318 FWC26_Competition Regulations_EN
fifa_equipment_regulations = np.load("embeddings/embedding_FIFA Equipment Regulations_2025_EN.npy")
laws_of_the_game = np.load("embeddings/embedding_Laws of the Game 25-26.npy")
fifa_disciplinary_code = np.load("embeddings/embedding_FIFA Disciplinary Code 25-26.npy")
fwc26_competition_regulations = np.load("embeddings/embedding_FWC26_Competition Regulations_EN.npy")

# Read chunks

In [2]:
import json

with open('chunks.json', 'r') as fout:
    chunks = json.load(fout)

In [3]:
all_embeddings = np.concatenate([
    fifa_equipment_regulations,
    laws_of_the_game,
    fifa_disciplinary_code,
    fwc26_competition_regulations
], axis=0)
all_embeddings.shape


(1318, 1024)

In [4]:
for i, chunk in enumerate(chunks):
    chunk["embedding"] = all_embeddings[i].tolist()

In [5]:
chunks[0].keys()

dict_keys(['chunk', 'metadata', 'embedding'])

# Milvus
https://milvus.io/docs/full_text_search_with_milvus.md

In [6]:
from pymilvus import (
    MilvusClient,
    DataType,
    Function,
    FunctionType,
    AnnSearchRequest,
    RRFRanker,
)

In [7]:
uri = "http://localhost:19530"
collection_name = "football_docs"
client = MilvusClient(uri=uri)

In [8]:
analyzer_params = {"tokenizer": "standard", "filter": ["lowercase"]}

In [9]:
schema = MilvusClient.create_schema()
schema.add_field(
    field_name="id",
    datatype=DataType.VARCHAR,
    is_primary=True,
    auto_id=True,
    max_length=100,
)
schema.add_field(
    field_name="content",
    datatype=DataType.VARCHAR,
    max_length=65535,
    analyzer_params=analyzer_params,
    enable_match=True,  # Enable text matching
    enable_analyzer=True,  # Enable text analysis
)
schema.add_field(field_name="sparse_vector", datatype=DataType.SPARSE_FLOAT_VECTOR)
schema.add_field(
    field_name="dense_vector",
    datatype=DataType.FLOAT_VECTOR,
    dim=1024,  # Dimension for Qwen3-Embedding-0.6B
)
schema.add_field(field_name="metadata", datatype=DataType.JSON)

bm25_function = Function(
    name="bm25",
    function_type=FunctionType.BM25,
    input_field_names=["content"],
    output_field_names="sparse_vector",
)

schema.add_function(bm25_function)


{'auto_id': False, 'description': '', 'fields': [{'name': 'id', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 100}, 'is_primary': True, 'auto_id': True}, {'name': 'content', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 65535, 'enable_match': True, 'enable_analyzer': True, 'analyzer_params': '{"tokenizer":"standard","filter":["lowercase"]}'}}, {'name': 'sparse_vector', 'description': '', 'type': <DataType.SPARSE_FLOAT_VECTOR: 104>, 'is_function_output': True}, {'name': 'dense_vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 1024}}, {'name': 'metadata', 'description': '', 'type': <DataType.JSON: 23>}], 'enable_dynamic_field': False, 'enable_namespace': False, 'functions': [{'name': 'bm25', 'description': '', 'type': <FunctionType.BM25: 1>, 'input_field_names': ['content'], 'output_field_names': ['sparse_vector'], 'params': {}}]}

In [10]:
index_params = MilvusClient.prepare_index_params()
index_params.add_index(
    field_name="sparse_vector",
    index_type="SPARSE_INVERTED_INDEX",
    metric_type="BM25",
)
index_params.add_index(field_name="dense_vector", index_type="FLAT", metric_type="IP")

if client.has_collection(collection_name):
    client.drop_collection(collection_name)
client.create_collection(
    collection_name=collection_name,
    schema=schema,
    index_params=index_params,
)
print(f"Collection '{collection_name}' created successfully")


Collection 'football_docs' created successfully


In [11]:
entities = []
for i, c in enumerate(chunks):
    entities.append(
        {
            "content": c["chunk"],
            "dense_vector": c["embedding"],
            "metadata": c.get("metadata", {}),
        }
    )

# Insert data
client.insert(collection_name, entities)
print(f"Inserted {len(entities)} documents")


Inserted 1318 documents


In [44]:
import requests
import json

OLLAMA_HOST = "http://localhost:12434"
LLM_MODEL = "ai/qwen3:0.6B-Q4_0"
EMBEDDING_MODEL = "ai/qwen3-embedding:0.6B-F16"

def embed(text: str) -> list:
    """Simple embedding call"""
    headers = {
        "Content-Type": "application/json"
    }
    response = requests.post(f"{OLLAMA_HOST}/engines/llama.cpp/v1/embeddings", headers=headers, json={
        "model": EMBEDDING_MODEL,
        "input": text
    })
    return {
        "status_code": response.status_code,
        "embedding": response.json()['data'][0]["embedding"],
    }

def chat(prompt: str) -> str:
    """Simple chat call"""
    headers = {
        "Content-Type": "application/json"
    }
    response = requests.post(f"{OLLAMA_HOST}/api/chat", headers=headers, json={
        "model": LLM_MODEL,
        "messages": [{"role": "user", "content": prompt}],
        "stream": False
    })
    return {
        "status_code": response.status_code,
        "content": response.json()['message']['content']
    }

In [53]:
query = "What is the ament after getting a red card?"

response = embed(query)
query_embedding = response["embedding"]
print(query_embedding)

[-0.04808444157242775, 0.033120591193437576, -0.010175050236284733, -0.03590354695916176, -0.009193859063088894, -0.023869633674621582, -0.05076601356267929, -0.04460233449935913, -0.011820903979241848, 0.04492608830332756, -0.060736168175935745, -0.06246226280927658, 0.031127510592341423, -0.011370335705578327, -0.0518057644367218, 0.07771871984004974, -0.017990386113524437, -0.07975984364748001, 0.08971291780471802, 0.0385027639567852, -0.0219399593770504, -0.023538637906312943, -0.028448140248656273, 0.060800809413194656, 0.0876123458147049, 0.015996960923075676, -0.024273134768009186, 0.047578103840351105, 0.02873333729803562, 0.038166824728250504, 0.038229040801525116, -0.038863953202962875, -0.06677648425102234, -0.0182667076587677, -0.0601825974881649, -0.014569424092769623, -0.0002625479828566313, -0.07239645719528198, -0.022738708183169365, 0.027002060785889626, 0.01711113750934601, 0.037830598652362823, 0.0003585814265534282, -0.009079589508473873, -0.021859532222151756, -0.0

In [54]:
sparse_search_params = {"metric_type": "BM25"}
sparse_request = AnnSearchRequest(
    [query], "sparse_vector", sparse_search_params, limit=5
)

dense_search_params = {"metric_type": "IP"}
dense_request = AnnSearchRequest(
    [query_embedding], "dense_vector", dense_search_params, limit=5
)

results = client.hybrid_search(
    collection_name,
    [sparse_request, dense_request],
    ranker=RRFRanker(),  # Reciprocal Rank Fusion for combining results
    limit=5,
    output_fields=["content", "metadata"],
)
hybrid_results = results[0]

print("\nHybrid Search (Combined):")
for i, result in enumerate(hybrid_results):
    print(
        f"{i+1}. Score: {result['distance']:.4f}, Content: {result['entity']['content']}"
    )



Hybrid Search (Combined):
1. Score: 0.0325, Content: The yellow card communicates a caution and the red card communicates a
sending-off.


Only a player, substitute, substituted player or team official may be shown the
red or yellow card.
2. Score: 0.0164, Content: 2
Red and yellow cards shall be deducted from a maximum of 10 points:
-
first yellow card:

minus 1 point
-
second yellow/indirect red card:
minus 3 points
-
direct red card:

minus 4 points
-
yellow card & direct red card:
minus 5 points
Red and yellow cards are the only criteria entailing minus points.


3
Positive play
Minimum 1 point
Maximum 10 points
3. Score: 0.0161, Content: Only a player, substitute, substituted player or team official may be shown the
red or yellow card.


Players, substitutes and substituted players
Delaying the restart of play to show a card
Once the referee has decided to caution or send off a player, play must not be
restarted until the sanction has been administered, unless the non-offending
t

# Answer generation

In [56]:
context = "\n\n".join([doc["entity"]["content"] for doc in hybrid_results])

prompt = f"""Answer the following question based on the provided context. 
If the context doesn't contain relevant information, just say "I don't have enough information to answer this question."

Context:
{context}

Question: {query}

Answer:"""

response = chat(prompt)

print("User \n" + prompt)
print("Response: " + response["content"])


User 
Answer the following question based on the provided context. 
If the context doesn't contain relevant information, just say "I don't have enough information to answer this question."

Context:
The yellow card communicates a caution and the red card communicates a
sending-off.


Only a player, substitute, substituted player or team official may be shown the
red or yellow card.

2
Red and yellow cards shall be deducted from a maximum of 10 points:
-
first yellow card:

minus 1 point
-
second yellow/indirect red card:
minus 3 points
-
direct red card:

minus 4 points
-
yellow card & direct red card:
minus 5 points
Red and yellow cards are the only criteria entailing minus points.


3
Positive play
Minimum 1 point
Maximum 10 points

Only a player, substitute, substituted player or team official may be shown the
red or yellow card.


Players, substitutes and substituted players
Delaying the restart of play to show a card
Once the referee has decided to caution or send off a player, pl