## Connect to Meilisearch

In [2]:
import meilisearch

meilisearch_client = meilisearch.Client("http://localhost:7700", "aStrongMasterKey")
print("Successfully connected to Meilisearch!")

Successfully connected to Meilisearch!


## Configure the index

In [None]:
index_name = "test"
index = meilisearch_client.index(index_name)

task = index.update_filterable_attributes(["type", "year", "tags"])
meilisearch_client.wait_for_task(task.task_uid)

task = index.update_embedders(
    {"my_embedder": {"source": "userProvided", "dimensions": 3072}}
)
meilisearch_client.wait_for_task(task.task_uid)

task = index.update_searchable_attributes(
    [
        "title",
        "content",
    ]
)
meilisearch_client.wait_for_task(task.task_uid)

task = index.update_sortable_attributes(
    [
        "creation_date",
        "view_count",
    ]
)
meilisearch_client.wait_for_task(task.task_uid)
print("Configured index settings.")

Configured index settings.


## Process frontend request

In [20]:
frontend_request = {
    "query": "Test content",
    "articleType": "blog-post",
    "sortBy": "popularity",
    "filters": {"years": ["2024", "2025"], "tags": ["kNN", "Elasticsearch"]},
}

### Get filter conditions

In [21]:
def get_filter_conditions(data: dict) -> str:
    conditions = []

    if data.get("articleType"):
        conditions.append(f'type = "{data["articleType"]}"')

    filters = data.get("filters", {})
    years_list = filters["years"]
    if years_list:
        conditions.append(f"year IN {years_list}")

    tags = filters["tags"]
    if tags:
        tags_list = [f"'{tag}'" for tag in tags]
        conditions.append(f"tags IN [{', '.join(tags_list)}]")

    return " AND ".join(conditions) if conditions else ""


filter_conditions = get_filter_conditions(frontend_request)
filter_conditions

'type = "blog-post" AND year IN [\'2024\', \'2025\'] AND tags IN [\'kNN\', \'Elasticsearch\']'

### Get sorting criteria

In [27]:
def get_sorting_criteria(data: dict) -> list[str]:
    sort_by = data.get("sortBy", "date")
    field_mapping = {"date": "creation_date:desc", "popularity": "view_count:desc"}

    return [field_mapping.get(sort_by, "creation_date:desc")]


sorting_criteria = get_sorting_criteria(frontend_request)
sorting_criteria

['view_count:desc']

### Embedding

In [26]:
from google import genai
from dotenv import load_dotenv
from typing import Optional, List

load_dotenv()


def get_query_embedding(query_text: str) -> Optional[List[float]]:
    print(f"Generating embedding for query: '{query_text}'")
    try:
        google_client = genai.Client()
        result = google_client.models.embed_content(
            model="gemini-embedding-001", contents=query_text
        )
        if result is None:
            raise ValueError("No result returned from embedding model.")

        if not hasattr(result, "embeddings") or result.embeddings is None:
            raise ValueError("No embeddings found in result.")

        embeddings = result.embeddings[0].values
        return embeddings
    except Exception as e:
        print(f"Error generating embedding: {e}")
        return None


query = frontend_request["query"]
query_vector = get_query_embedding(query)
len(query_vector)

Generating embedding for query: 'Test content'


3072

## Search

In [35]:
query = frontend_request["query"]
search_results = index.search(
    query,
    {
        "filter": filter_conditions,
        "vector": query_vector,
        "hybrid": {"embedder": "my_embedder", "semanticRatio": 0.5},
        "showRankingScore": True,
        "sort": sorting_criteria,
        "facets": ["tags", "year"],
    },
)

for hit in search_results["hits"]:
    score = hit["_rankingScore"]
    print(
        f"- {hit['title']} - {hit['content']} - {hit['view_count']} - {hit['tags']} - Score: {score:.4f}"
    )

- Pre-filtering with kNN search in Elasticsearch - Test content 3 - 1502 - ['Elasticsearch', 'kNN', 'Semantic search'] - Score: 0.9722
- Collapse search results in Elasticsearch - Test content 2 - 100 - ['Elasticsearch'] - Score: 0.9722
- Change the heap size for Elasticsearch - Test content 1 - 0 - ['Elasticsearch'] - Score: 0.9722


In [36]:
facet_distribution = search_results.get("facetDistribution", {})
if "tags" in facet_distribution:
    print("\nTags:")
    for tag, count in facet_distribution["tags"].items():
        print(f"- {tag}: {count}")

if "year" in facet_distribution:
    print("\nYears:")
    for year, count in facet_distribution["year"].items():
        print(f"- {year}: {count}")


Tags:
- Elasticsearch: 3
- kNN: 1
- Semantic search: 1

Years:
- 2025: 3
