In [1]:
import ollama
import json
from functools import reduce, partial

import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

##### `query_embedding`

In [2]:
def query_embedding():
    logging.info(f"Started {query_embedding.__name__}.")

    MIGRATED_QUERY_PATH = "../Query Processing/4_json_results/migrated_query_data.json"
    MIGRATED_QUERY_IDENTIFIER = "migrated"
    migrated_query_data = load_query_data(MIGRATED_QUERY_PATH, MIGRATED_QUERY_IDENTIFIER)

    NEW_QUERY_PATH = "../Query Processing/4_json_results/new_query_data.json"
    NEW_QUERY_IDENTIFIER = "new"
    new_query_data = load_query_data(NEW_QUERY_PATH, NEW_QUERY_IDENTIFIER)

    embed_queries(migrated_query_data, MIGRATED_QUERY_IDENTIFIER)
    embed_queries(new_query_data, NEW_QUERY_IDENTIFIER)

    logging.info(f"Queries successfully embedded.")

In [3]:
def embed_queries(queries, QUERY_IDENTIFIER):
    return reduce(
        lambda accu, func: func(accu),
        [
            partial(embed_query_data, QUERY_IDENTIFIER=QUERY_IDENTIFIER),
            partial(write_query_data_to_json, QUERY_IDENTIFIER=QUERY_IDENTIFIER),
        ],
        queries
    )

In [8]:
res = query_embedding()

2025-03-28 12:32:41,516 - INFO - Started query_embedding.
2025-03-28 12:32:41,518 - INFO - Started load_query_data for migrated queries from path ../Query Processing/4_json_results/migrated_query_data.json.
2025-03-28 12:32:41,523 - INFO - Completed load_query_data for migrated queries.
2025-03-28 12:32:41,524 - INFO - Started load_query_data for new queries from path ../Query Processing/4_json_results/new_query_data.json.
2025-03-28 12:32:41,530 - INFO - Completed load_query_data for new queries.
2025-03-28 12:32:41,531 - INFO - Started embed_query_data for migrated queries.
2025-03-28 12:32:43,769 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-03-28 12:32:44,438 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-03-28 12:32:45,284 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-03-28 12:32:45,588 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
2025-

##### `load_query_data`

In [4]:
def load_query_data(PATH, QUERY_IDENTIFIER):
    logging.info(f"Started {load_query_data.__name__} for {QUERY_IDENTIFIER} queries from path {PATH}.")

    with open(PATH) as f:
        query_meta_data = json.load(f)

    logging.info(f"Completed {load_query_data.__name__} for {QUERY_IDENTIFIER} queries.")
    return query_meta_data

##### `embed_query_data`

In [5]:
def embed_query_data(query_meta_data, QUERY_IDENTIFIER):
    logging.info(f"Started {embed_query_data.__name__} for {QUERY_IDENTIFIER} queries.")

    for query_key, query_value in query_meta_data.items():
        joined_query_data = join_query_meta_data(query_value)
        response = ollama.embed(model="mxbai-embed-large", input=joined_query_data)
        query_meta_data[query_key]["embedding"] = response["embeddings"]

    return query_meta_data

##### `join_query_meta_data`

In [6]:
def join_query_meta_data(query_value):

    all_elements = []
    for key, value in query_value.items():
        if isinstance(value, list):
            all_elements.extend(value)
        else:
            all_elements.append(value)

    return ", ".join(all_elements)

##### `write_query_data_to_json`

In [7]:
def write_query_data_to_json(queries, QUERY_IDENTIFIER):
    logging.info(f"Started {write_query_data_to_json.__name__} for {QUERY_IDENTIFIER} queries.")

    JSON_PATH = f"./json_results/{QUERY_IDENTIFIER}_query_data_with_embedding.json"

    with open(JSON_PATH, "w", encoding="utf-8") as outfile:
        json.dump(queries, outfile, indent=4, ensure_ascii=False)

    logging.info(f"Completed {write_query_data_to_json.__name__} for {QUERY_IDENTIFIER} queries.")
