# Imports

In [1]:
import json
from pymongo import MongoClient
from pymongo.server_api import ServerApi
from llama_cpp import Llama
import re
import torch
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModel

In [4]:
# Load collection descriptions from JSON file
def load_collection_descriptions(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

# Mongodb Connect

In [2]:
# Connect to MongoDB
def connect_to_mongodb(db_uri, db_name):
    client = MongoClient(db_uri, server_api=ServerApi('1'))
    try:
        client.admin.command('ping')
        print("Pinged your deployment. You successfully connected to MongoDB!")
    except Exception as e:
        print("Could not connect to MongoDB:", e)
        return None
    db = client[db_name]
    return db

In [5]:
MONGODB_LINK = "mongodb_link"
DB_NAME = "woice-search-engine"
db = connect_to_mongodb(MONGODB_LINK, DB_NAME)
collection_descriptions = load_collection_descriptions("collections.json")

Pinged your deployment. You successfully connected to MongoDB!


# Llama 3.1 GGUF

In [5]:
# Load the GGUF model
model_name_or_path = '/mnt/SSD/models/Llama-3.1-8B-GGUF/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf'
model = Llama(
    model_path=model_name_or_path,
    n_ctx=8192,
    n_gpu_layers=-1
)

llama_model_loader: loaded meta data with 27 key-value pairs and 292 tensors from /mnt/SSD/models/Llama-3.1-8B-GGUF/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Hermes 3 Llama 3.1 8B
llama_model_loader: - kv   3:                       general.organization str              = NousResearch
llama_model_loader: - kv   4:                           general.basename str              = Hermes-3-Llama-3.1
llama_model_loader: - kv   5:                         general.size_label str              = 8B
llama_model_loader: - kv   6:                          llama.block_count u32              = 32
llama_mod

In [5]:
# # Load the GGUF model
# model_name_or_path = '/mnt/SSD/models/Llama-3-8B-instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf'
# model = Llama(
#     model_path=model_name_or_path,
#     n_ctx=80000,
#     n_batch=1,
#     n_gpu_layers=-1
# )

llama_model_loader: loaded meta data with 22 key-value pairs and 291 tensors from /mnt/SSD/models/Llama-3-8B-instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = .
llama_model_loader: - kv   2:                           llama.vocab_size u32              = 128256
llama_model_loader: - kv   3:                       llama.context_length u32              = 8192
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   5:                          llama.block_count u32              = 32
llama_model_loader: - kv   6:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   7:        

In [6]:
def generate_response(messages, model, temperature=0.8, top_p=0.8, top_k=20, max_tokens=150):
    outputs = model.create_chat_completion(
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k
    )
    return outputs["choices"][0]["message"]["content"]

# Query Classification

In [8]:
def query_classification(query):
    # Define the messages and function schema for the model
    messages = [
        {"role": "system",
         "content": """You are an ecommerce website where the user will search. You are tasked with classifying a user query. Based on the intent of the query, you will match
    it to one of the available collections provided in the list. If the query matches more than one collection, show all of them in the output. Please always return the name of the matching collection
    in valid JSON format: {"collections": ["collection_name1", "collection_name2", ...]}."""},
        {"role": "user",
         "content": f"The available collections are:\n{collection_descriptions}\nThe user query is: {query}"}
    ]

    # Define the function to be called
    tools = [
        {
            "type": "function",
            "function": {
                "name": "classify_query",
                "description": "Classify the user's query to matching collections.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "collections": {
                            "type": "array",
                            "items": {
                                "type": "string"
                            },
                            "description": "List of collections matching the user's query."
                        }
                    },
                    "required": ["collections"],
                },
            },
        }
    ]

    # Generate the output using the model with function calling
    response = model.create_chat_completion(
        messages=messages,
        tools=tools,
        tool_choice="auto",
    )

    response_message = response["choices"][0]["message"]
    
    # Check if the model intended to call a function
    tool_calls = response_message.get("tool_calls")
    if tool_calls:
        # Process function calls
        for tool_call in tool_calls:
            if tool_call.function.name == "classify_query":
                # Simulating the function call for demonstration
                # In a real scenario, implement your classification logic here
                matched_collections = ["example_collection_1", "example_collection_2"]  # Mocked response
                return json.dumps({"collections": matched_collections})

    return response_message["content"]

In [9]:
# Example usage
query = "I want to buy any appliance that keeps my fruits fresh in hot weather"
filtered_collection = query_classification(query=query)

# Output the filtered collections
print(filtered_collection)


llama_print_timings:        load time =     175.75 ms
llama_print_timings:      sample time =       1.23 ms /    19 runs   (    0.06 ms per token, 15484.92 tokens per second)
llama_print_timings: prompt eval time =    2596.73 ms /  7264 tokens (    0.36 ms per token,  2797.37 tokens per second)
llama_print_timings:        eval time =     205.30 ms /    18 runs   (   11.41 ms per token,    87.67 tokens per second)
llama_print_timings:       total time =    2836.27 ms /  7282 tokens


{"collections": ["Kitchen and Home Appliances", "All Appliances", "Home Entertainment Systems"]}


In [11]:
# Example usage
query = "I want to buy a gift for a baby"
filtered_collection = query_classification(query=query)

# Output the filtered collections
print(filtered_collection)

Llama.generate: 7249 prefix-match hit, remaining 10 prompt tokens to eval

llama_print_timings:        load time =     254.78 ms
llama_print_timings:      sample time =       3.33 ms /    52 runs   (    0.06 ms per token, 15634.40 tokens per second)
llama_print_timings: prompt eval time =      47.31 ms /    10 tokens (    4.73 ms per token,   211.35 tokens per second)
llama_print_timings:        eval time =     828.91 ms /    51 runs   (   16.25 ms per token,    61.53 tokens per second)
llama_print_timings:       total time =     907.96 ms /    61 tokens


{"collections": ["Baby Products", "Nursing and Feeding", "Baby Bath Skin and Grooming", "Baby Fashion", "Toys and Games", "STEM Toys Store", "Toys Gifting Store", "International Toy Store"]}


In [11]:
# Example usage
query = "I want to do some shopping for Eid"
filtered_collection = query_classification(query=query)

# Output the filtered collections
print(filtered_collection)

Llama.generate: 7248 prefix-match hit, remaining 10 prompt tokens to eval

llama_print_timings:        load time =     212.74 ms
llama_print_timings:      sample time =       2.28 ms /    36 runs   (    0.06 ms per token, 15817.22 tokens per second)
llama_print_timings: prompt eval time =      32.82 ms /    10 tokens (    3.28 ms per token,   304.72 tokens per second)
llama_print_timings:        eval time =     393.65 ms /    35 runs   (   11.25 ms per token,    88.91 tokens per second)
llama_print_timings:       total time =     444.12 ms /    45 tokens


{"collections": ["Amazon Fashion", "Womens Fashion", "Mens Fashion", "Kids Fashion", "Ethnic Wear", "Fashion Sandals", "Shirts"]}


In [16]:
# Example usage
query = "I want to do something that i can eat quickly."
filtered_collection = query_classification(query=query)

# Output the filtered collections
print(filtered_collection)

Llama.generate: 7248 prefix-match hit, remaining 13 prompt tokens to eval

llama_print_timings:        load time =     212.74 ms
llama_print_timings:      sample time =       1.13 ms /    17 runs   (    0.07 ms per token, 15004.41 tokens per second)
llama_print_timings: prompt eval time =      36.94 ms /    13 tokens (    2.84 ms per token,   351.97 tokens per second)
llama_print_timings:        eval time =     184.41 ms /    16 runs   (   11.53 ms per token,    86.76 tokens per second)
llama_print_timings:       total time =     231.02 ms /    29 tokens


{"collections": ["All Grocery and Gourmet Foods", "Snack Foods"]}


In [13]:
# Example usage
query = "I want to buy gift for a person who plays football"
filtered_collection = query_classification(query=query)

# Output the filtered collections
print(filtered_collection)

Llama.generate: 7248 prefix-match hit, remaining 13 prompt tokens to eval

llama_print_timings:        load time =     212.74 ms
llama_print_timings:      sample time =       0.45 ms /     7 runs   (    0.06 ms per token, 15695.07 tokens per second)
llama_print_timings: prompt eval time =      36.38 ms /    13 tokens (    2.80 ms per token,   357.37 tokens per second)
llama_print_timings:        eval time =      70.97 ms /     6 runs   (   11.83 ms per token,    84.54 tokens per second)
llama_print_timings:       total time =     111.53 ms /    19 tokens


{"collections": ["Football"]}


# Displaying Output of Product from Mongodb Database

In [12]:
def identify_keywords(query):
    # Define the messages and function schema for the model
    messages = [
        {"role": "system",
         "content": """You are an advanced e-commerce search assistant. Your primary responsibility is to analyze the user's query and deduce their underlying intent, even when they do not specify product names directly. 
        Your response should focus on extracting essential concepts related to the query based on the provided products list. Always ensure your output is in valid JSON format: {"Products": ["Product_name1", "Product_name2", ...]} and make it as comprehensive and relevant as possible to the user's intent. Limit your suggestions to a maximum of 20 unique products to ensure the response is concise and relevant."""},
        {"role": "user",
         "content": f"The available collections are:\n{Product_Names}\nThe user query is: {query}"}
    ]

    # Define the function to be called
    tools = [
        {
            "type": "function",
            "function": {
                "name": "suggest_keywords",
                "description": "Understand the user's query to show him relevant products.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "collections": {
                            "type": "array",
                            "items": {
                                "type": "string"
                            },
                            "description": "List of products matching the user's query."
                        }
                    },
                    "required": ["products"],
                },
            },
        }
    ]

    # Generate the output using the model with function calling
    response = model.create_chat_completion(
        messages=messages,
        tools=tools,
        tool_choice="auto",
    )

    response_message = response["choices"][0]["message"]
    
    # Check if the model intended to call a function
    tool_calls = response_message.get("tool_calls")
    if tool_calls:
        # Process function calls
        for tool_call in tool_calls:
            if tool_call.function.name == "classify_query":
                # Simulating the function call for demonstration
                # In a real scenario, implement your classification logic here
                matched_collections = ["example_collection_1", "example_collection_2"]  # Mocked response
                return json.dumps({"collections": matched_collections})

    return response_message["content"]

In [10]:
# Example usage
query = "I want to buy any appliance that keeps my fruits fresh in hot weather"
filtered_collection = identify_keywords(query=query)

# Output the filtered collections
print(filtered_collection)


llama_print_timings:        load time =     170.38 ms
llama_print_timings:      sample time =      23.24 ms /   362 runs   (    0.06 ms per token, 15577.26 tokens per second)
llama_print_timings: prompt eval time =   16846.05 ms / 23687 tokens (    0.71 ms per token,  1406.09 tokens per second)
llama_print_timings:        eval time =    7130.14 ms /   361 runs   (   19.75 ms per token,    50.63 tokens per second)
llama_print_timings:       total time =   24368.63 ms / 24048 tokens


{"Products": ["LG 1.5 Ton 3 Star AI DUAL Inverter Split AC (Copper, Super Convertible 6-in-1 Cooling, HD Filter with Anti-Virus Protection, 2023 Model, White, Gls18I3...", "Voltas 1.5 Ton 3 Star Inverter Split AC (Copper 183V CZQ White)", "Daikin 1.5 Ton 3 Star Inverter Split AC (Copper DTKL50, White)", "Carrier 1.5 Ton 3 Star Hybridjet Inverter Split AC CAI18IN5R31W1 (Copper, INDUS CXI, 6-in-1 Flexicool with Anti-Viral Guard, Smart Energy, 2023 Model, White)", "Blue Star 1.5 Ton 3 Star Inverter Split Ac (Copper,IA318YKU, 2022, White)", "Samsung 1.5 Ton 3 Star Windfree Technology Inverter Split AC (Copper, Convertible 5-in-1 Cooling Mode, Tri Care Filter, 2023 Model AR18...", "Hitachi 1.5 Ton 3 Star Split AC (Copper RSNG318HEDO white)", "Godrej 1.5 Ton 3 Star Inverter Split AC (Copper GIC 18WTC3-WSB White)", "Whirlpool 1.5 Ton 3 Star, Flexicool Inverter Split AC (Copper, Convertible 4-in-1 Cooling Mode, HD Filter 2023 Model, S3K2...", "LG 1.5 Ton 3 Star Inverter Split AC (Copper KS-Q18

In [10]:
# Example usage
query = "I want to buy any appliance that keeps my fruits fresh in hot weather"
filtered_collection = identify_keywords(query=query)

# Output the filtered collections
print(filtered_collection)

Llama.generate: 6 prefix-match hit, remaining 155 prompt tokens to eval

llama_print_timings:        load time =      84.94 ms
llama_print_timings:      sample time =       6.11 ms /    96 runs   (    0.06 ms per token, 15706.81 tokens per second)
llama_print_timings: prompt eval time =      55.29 ms /   155 tokens (    0.36 ms per token,  2803.55 tokens per second)
llama_print_timings:        eval time =     748.50 ms /    95 runs   (    7.88 ms per token,   126.92 tokens per second)
llama_print_timings:       total time =     844.55 ms /   250 tokens


{"Products": ["Fridge", "Freezer", "Refrigerator", "Chest Freezer", "Fruit Basket", "Fruit Cooler", "Fridge Basket", "Portable Fridge", "Insulated Fridge", "Fruit Preservation Appliance", "Fruit Storage Box", "Fruit Humidifier", "Fruit Freshness Kit", "Fruit Preservation System", "Fridge for Fruits", "Fruit Preservation Container"]}


In [14]:
# Example usage
query = "I want to buy a gift for a baby"
filtered_collection = identify_keywords(query=query)

# Output the filtered collections
print(filtered_collection)

Llama.generate: 132 prefix-match hit, remaining 44 prompt tokens to eval

llama_print_timings:        load time =      84.94 ms
llama_print_timings:      sample time =       6.87 ms /   107 runs   (    0.06 ms per token, 15568.17 tokens per second)
llama_print_timings: prompt eval time =      45.51 ms /    44 tokens (    1.03 ms per token,   966.80 tokens per second)
llama_print_timings:        eval time =     844.86 ms /   106 runs   (    7.97 ms per token,   125.46 tokens per second)
llama_print_timings:       total time =     934.76 ms /   150 tokens


Here is the JSON response with relevant product suggestions for a baby gift:

{"Products": ["Baby onesie", "Baby blanket", "Baby bib", "Baby rattle", "Baby mobile", "Baby bath tub", "Baby carrier", "Baby stroller", "Baby monitor", "Baby toy", "Baby book", "Baby shoes", "Baby socks", "Baby hat", "Baby mittens", "Baby swaddle", "Baby crib", "Baby changing table", "Baby safety gate", "Baby nursery set"]}


In [16]:
# Example usage
query = "I want to buy something to eat with tea"
filtered_collection = identify_keywords(query=query)

# Output the filtered collections
print(filtered_collection)

Llama.generate: 167 prefix-match hit, remaining 9 prompt tokens to eval

llama_print_timings:        load time =      84.94 ms
llama_print_timings:      sample time =       9.24 ms /   144 runs   (    0.06 ms per token, 15591.17 tokens per second)
llama_print_timings: prompt eval time =      40.09 ms /     9 tokens (    4.45 ms per token,   224.50 tokens per second)
llama_print_timings:        eval time =    1141.77 ms /   143 runs   (    7.98 ms per token,   125.24 tokens per second)
llama_print_timings:       total time =    1244.71 ms /   152 tokens


Based on the user's query, it seems they are looking for food items that pair well with tea. Here is a list of relevant products that could be suggested:

{"Products": ["Tea biscuits", "Scones", "Shortbread cookies", "Fruit pastries", "Cheese straws", "Chocolate truffles", "Caramel corn", "Mixed nuts", "Assorted chocolates", "Tea sandwiches", "Assorted crackers", "Cheese and fruit platter", "Assorted tea cakes", "Tea loaf", "Tea cookies", "Tea breads", "Tea biscuits", "Tea cakes", "Tea sandwiches", "Assorted pastries"]}


In [10]:
# def generate_response(name, main_category, sub_category):
#     messages = [
#         {"role": "system", "content": "You are an ecommerce assistant. Generate a product description based on the given details. The description of product should include all the keywords relevant to the product."},
#         {"role": "user", "content": f"Name: {name}\nMain Category: {main_category}\nSub Category: {sub_category}"}
#     ]
    
#     response = model.create_chat_completion(
#         messages=messages,
#         max_tokens=150,
#         temperature=0.8,
#         top_p=0.8,
#         top_k=20
#     )
    
#     return response["choices"][0]["message"]["content"].strip()

# Upload Descriptions

In [16]:
def generate_response(name, main_category, sub_category):
    messages = [
        {"role": "system", "content": "You are an ecommerce assistant. Generate a product description based on the given details. Include product's name, all relevant keywords, features and use cases of product."},
        {"role": "user", "content": f"Name: {name}\nMain Category: {main_category}\nSub Category: {sub_category}"}
    ]
    
    response = model.create_chat_completion(
        messages=messages,
        max_tokens=500,
        temperature=0.8,
        top_p=0.8,
        top_k=20
    )
    
    return response["choices"][0]["message"]["content"].strip()

In [17]:
def update_products_with_descriptions():
    products_collection = db['International Toy Store']  # Replace with your actual collection name
    products = products_collection.find({})

    for product in products:
        name = product.get('name')
        main_category = product.get('main_category')
        sub_category = product.get('sub_category')
        
        description = generate_response(name, main_category, sub_category)
        
        # Update the product with the new description
        products_collection.update_one(
            {'_id': product['_id']},
            {'$set': {'description': description}}
        )
        print(f"Updated product {product['_id']} with new description.")

# Execute the update function
update_products_with_descriptions()

Llama.generate: 44 prefix-match hit, remaining 36 prompt tokens to eval

llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      18.41 ms /   305 runs   (    0.06 ms per token, 16567.98 tokens per second)
llama_print_timings: prompt eval time =      33.19 ms /    36 tokens (    0.92 ms per token,  1084.73 tokens per second)
llama_print_timings:        eval time =    2414.05 ms /   304 runs   (    7.94 ms per token,   125.93 tokens per second)
llama_print_timings:       total time =    2605.40 ms /   340 tokens
Llama.generate: 44 prefix-match hit, remaining 25 prompt tokens to eval


Updated product 670f9cb974b51486c6408944 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      17.35 ms /   288 runs   (    0.06 ms per token, 16601.34 tokens per second)
llama_print_timings: prompt eval time =      14.29 ms /    25 tokens (    0.57 ms per token,  1749.72 tokens per second)
llama_print_timings:        eval time =    2271.51 ms /   287 runs   (    7.91 ms per token,   126.35 tokens per second)
llama_print_timings:       total time =    2432.41 ms /   312 tokens
Llama.generate: 42 prefix-match hit, remaining 29 prompt tokens to eval


Updated product 670f9cb974b51486c6408945 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      14.02 ms /   234 runs   (    0.06 ms per token, 16685.68 tokens per second)
llama_print_timings: prompt eval time =      14.35 ms /    29 tokens (    0.49 ms per token,  2020.48 tokens per second)
llama_print_timings:        eval time =    1838.00 ms /   233 runs   (    7.89 ms per token,   126.77 tokens per second)
llama_print_timings:       total time =    1964.87 ms /   262 tokens
Llama.generate: 42 prefix-match hit, remaining 29 prompt tokens to eval


Updated product 670f9cb974b51486c6408946 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      19.72 ms /   327 runs   (    0.06 ms per token, 16583.83 tokens per second)
llama_print_timings: prompt eval time =      14.49 ms /    29 tokens (    0.50 ms per token,  2000.83 tokens per second)
llama_print_timings:        eval time =    2587.66 ms /   326 runs   (    7.94 ms per token,   125.98 tokens per second)
llama_print_timings:       total time =    2777.69 ms /   355 tokens
Llama.generate: 42 prefix-match hit, remaining 25 prompt tokens to eval


Updated product 670f9cb974b51486c6408947 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      17.40 ms /   284 runs   (    0.06 ms per token, 16321.84 tokens per second)
llama_print_timings: prompt eval time =      14.23 ms /    25 tokens (    0.57 ms per token,  1757.22 tokens per second)
llama_print_timings:        eval time =    2245.12 ms /   283 runs   (    7.93 ms per token,   126.05 tokens per second)
llama_print_timings:       total time =    2407.13 ms /   308 tokens
Llama.generate: 42 prefix-match hit, remaining 53 prompt tokens to eval


Updated product 670f9cb974b51486c6408948 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      25.91 ms /   402 runs   (    0.06 ms per token, 15517.64 tokens per second)
llama_print_timings: prompt eval time =      18.79 ms /    53 tokens (    0.35 ms per token,  2820.95 tokens per second)
llama_print_timings:        eval time =    3225.58 ms /   401 runs   (    8.04 ms per token,   124.32 tokens per second)
llama_print_timings:       total time =    3487.61 ms /   454 tokens
Llama.generate: 42 prefix-match hit, remaining 32 prompt tokens to eval


Updated product 670f9cb974b51486c6408949 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      17.77 ms /   269 runs   (    0.07 ms per token, 15138.72 tokens per second)
llama_print_timings: prompt eval time =      14.47 ms /    32 tokens (    0.45 ms per token,  2211.47 tokens per second)
llama_print_timings:        eval time =    2138.25 ms /   268 runs   (    7.98 ms per token,   125.34 tokens per second)
llama_print_timings:       total time =    2297.90 ms /   300 tokens
Llama.generate: 42 prefix-match hit, remaining 30 prompt tokens to eval


Updated product 670f9cb974b51486c640894a with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      19.24 ms /   303 runs   (    0.06 ms per token, 15746.80 tokens per second)
llama_print_timings: prompt eval time =      14.77 ms /    30 tokens (    0.49 ms per token,  2030.73 tokens per second)
llama_print_timings:        eval time =    2417.02 ms /   302 runs   (    8.00 ms per token,   124.95 tokens per second)
llama_print_timings:       total time =    2597.43 ms /   332 tokens
Llama.generate: 42 prefix-match hit, remaining 36 prompt tokens to eval


Updated product 670f9cb974b51486c640894b with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      18.85 ms /   310 runs   (    0.06 ms per token, 16446.50 tokens per second)
llama_print_timings: prompt eval time =      15.75 ms /    36 tokens (    0.44 ms per token,  2285.86 tokens per second)
llama_print_timings:        eval time =    2460.47 ms /   309 runs   (    7.96 ms per token,   125.59 tokens per second)
llama_print_timings:       total time =    2639.48 ms /   345 tokens
Llama.generate: 42 prefix-match hit, remaining 42 prompt tokens to eval


Updated product 670f9cb974b51486c640894c with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      18.96 ms /   313 runs   (    0.06 ms per token, 16505.83 tokens per second)
llama_print_timings: prompt eval time =      16.53 ms /    42 tokens (    0.39 ms per token,  2541.14 tokens per second)
llama_print_timings:        eval time =    2489.72 ms /   312 runs   (    7.98 ms per token,   125.32 tokens per second)
llama_print_timings:       total time =    2671.74 ms /   354 tokens
Llama.generate: 42 prefix-match hit, remaining 32 prompt tokens to eval


Updated product 670f9cb974b51486c640894d with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      16.66 ms /   275 runs   (    0.06 ms per token, 16507.59 tokens per second)
llama_print_timings: prompt eval time =      14.60 ms /    32 tokens (    0.46 ms per token,  2192.38 tokens per second)
llama_print_timings:        eval time =    2178.78 ms /   274 runs   (    7.95 ms per token,   125.76 tokens per second)
llama_print_timings:       total time =    2331.17 ms /   306 tokens
Llama.generate: 42 prefix-match hit, remaining 38 prompt tokens to eval


Updated product 670f9cb974b51486c640894e with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      17.33 ms /   286 runs   (    0.06 ms per token, 16504.13 tokens per second)
llama_print_timings: prompt eval time =      15.75 ms /    38 tokens (    0.41 ms per token,  2412.85 tokens per second)
llama_print_timings:        eval time =    2267.89 ms /   285 runs   (    7.96 ms per token,   125.67 tokens per second)
llama_print_timings:       total time =    2428.59 ms /   323 tokens
Llama.generate: 42 prefix-match hit, remaining 43 prompt tokens to eval


Updated product 670f9cb974b51486c640894f with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      14.52 ms /   242 runs   (    0.06 ms per token, 16662.08 tokens per second)
llama_print_timings: prompt eval time =      16.54 ms /    43 tokens (    0.38 ms per token,  2599.29 tokens per second)
llama_print_timings:        eval time =    1911.10 ms /   241 runs   (    7.93 ms per token,   126.11 tokens per second)
llama_print_timings:       total time =    2045.49 ms /   284 tokens
Llama.generate: 42 prefix-match hit, remaining 34 prompt tokens to eval


Updated product 670f9cb974b51486c6408950 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      20.75 ms /   343 runs   (    0.06 ms per token, 16528.53 tokens per second)
llama_print_timings: prompt eval time =      15.73 ms /    34 tokens (    0.46 ms per token,  2162.02 tokens per second)
llama_print_timings:        eval time =    2733.21 ms /   342 runs   (    7.99 ms per token,   125.13 tokens per second)
llama_print_timings:       total time =    2937.06 ms /   376 tokens
Llama.generate: 42 prefix-match hit, remaining 42 prompt tokens to eval


Updated product 670f9cb974b51486c6408951 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      18.02 ms /   300 runs   (    0.06 ms per token, 16652.79 tokens per second)
llama_print_timings: prompt eval time =      16.54 ms /    42 tokens (    0.39 ms per token,  2539.91 tokens per second)
llama_print_timings:        eval time =    2381.79 ms /   299 runs   (    7.97 ms per token,   125.54 tokens per second)
llama_print_timings:       total time =    2551.72 ms /   341 tokens
Llama.generate: 42 prefix-match hit, remaining 28 prompt tokens to eval


Updated product 670f9cb974b51486c6408952 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      17.45 ms /   289 runs   (    0.06 ms per token, 16560.66 tokens per second)
llama_print_timings: prompt eval time =      14.38 ms /    28 tokens (    0.51 ms per token,  1947.56 tokens per second)
llama_print_timings:        eval time =    2295.69 ms /   288 runs   (    7.97 ms per token,   125.45 tokens per second)
llama_print_timings:       total time =    2458.45 ms /   316 tokens
Llama.generate: 42 prefix-match hit, remaining 35 prompt tokens to eval


Updated product 670f9cb974b51486c6408953 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      20.90 ms /   342 runs   (    0.06 ms per token, 16362.85 tokens per second)
llama_print_timings: prompt eval time =      15.86 ms /    35 tokens (    0.45 ms per token,  2206.39 tokens per second)
llama_print_timings:        eval time =    2737.20 ms /   341 runs   (    8.03 ms per token,   124.58 tokens per second)
llama_print_timings:       total time =    2940.73 ms /   376 tokens
Llama.generate: 42 prefix-match hit, remaining 34 prompt tokens to eval


Updated product 670f9cb974b51486c6408954 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      25.36 ms /   419 runs   (    0.06 ms per token, 16525.34 tokens per second)
llama_print_timings: prompt eval time =      15.78 ms /    34 tokens (    0.46 ms per token,  2155.31 tokens per second)
llama_print_timings:        eval time =    3364.95 ms /   418 runs   (    8.05 ms per token,   124.22 tokens per second)
llama_print_timings:       total time =    3627.40 ms /   452 tokens
Llama.generate: 42 prefix-match hit, remaining 33 prompt tokens to eval


Updated product 670f9cb974b51486c6408955 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      19.74 ms /   328 runs   (    0.06 ms per token, 16616.85 tokens per second)
llama_print_timings: prompt eval time =      15.74 ms /    33 tokens (    0.48 ms per token,  2096.84 tokens per second)
llama_print_timings:        eval time =    2612.66 ms /   327 runs   (    7.99 ms per token,   125.16 tokens per second)
llama_print_timings:       total time =    2801.82 ms /   360 tokens
Llama.generate: 42 prefix-match hit, remaining 35 prompt tokens to eval


Updated product 670f9cb974b51486c6408956 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      20.26 ms /   336 runs   (    0.06 ms per token, 16582.77 tokens per second)
llama_print_timings: prompt eval time =      15.79 ms /    35 tokens (    0.45 ms per token,  2217.15 tokens per second)
llama_print_timings:        eval time =    2671.98 ms /   335 runs   (    7.98 ms per token,   125.38 tokens per second)
llama_print_timings:       total time =    2865.70 ms /   370 tokens
Llama.generate: 42 prefix-match hit, remaining 29 prompt tokens to eval


Updated product 670f9cb974b51486c6408957 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      26.43 ms /   438 runs   (    0.06 ms per token, 16574.59 tokens per second)
llama_print_timings: prompt eval time =      14.65 ms /    29 tokens (    0.51 ms per token,  1979.39 tokens per second)
llama_print_timings:        eval time =    3500.55 ms /   437 runs   (    8.01 ms per token,   124.84 tokens per second)
llama_print_timings:       total time =    3771.41 ms /   466 tokens
Llama.generate: 42 prefix-match hit, remaining 28 prompt tokens to eval


Updated product 670f9cb974b51486c6408958 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      16.71 ms /   277 runs   (    0.06 ms per token, 16580.87 tokens per second)
llama_print_timings: prompt eval time =      14.40 ms /    28 tokens (    0.51 ms per token,  1944.04 tokens per second)
llama_print_timings:        eval time =    2193.67 ms /   276 runs   (    7.95 ms per token,   125.82 tokens per second)
llama_print_timings:       total time =    2347.38 ms /   304 tokens
Llama.generate: 42 prefix-match hit, remaining 31 prompt tokens to eval


Updated product 670f9cb974b51486c6408959 with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      16.93 ms /   281 runs   (    0.06 ms per token, 16592.86 tokens per second)
llama_print_timings: prompt eval time =      14.58 ms /    31 tokens (    0.47 ms per token,  2126.93 tokens per second)
llama_print_timings:        eval time =    2227.55 ms /   280 runs   (    7.96 ms per token,   125.70 tokens per second)
llama_print_timings:       total time =    2384.81 ms /   311 tokens
Llama.generate: 42 prefix-match hit, remaining 39 prompt tokens to eval


Updated product 670f9cb974b51486c640895a with new description.



llama_print_timings:        load time =     222.02 ms
llama_print_timings:      sample time =      18.79 ms /   313 runs   (    0.06 ms per token, 16655.14 tokens per second)
llama_print_timings: prompt eval time =      15.95 ms /    39 tokens (    0.41 ms per token,  2445.45 tokens per second)
llama_print_timings:        eval time =    2485.26 ms /   312 runs   (    7.97 ms per token,   125.54 tokens per second)
llama_print_timings:       total time =    2664.41 ms /   351 tokens


Updated product 670f9cb974b51486c640895b with new description.


In [13]:
# !pip install torch==2.4.1

In [11]:
# !pip install torch==2.0.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

In [12]:
# !pip show torch

In [10]:
# !pip install --upgrade transformers


# Vector Search

In [12]:
import transformers

In [9]:
from transformers import AutoTokenizer, AutoModel

In [6]:
collection = db['International Toy Store'] 

In [7]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
modelID = 'BAAI/bge-large-en-v1.5'
print(f"Loading model {modelID} on {device}")
tokenizer = AutoTokenizer.from_pretrained(modelID, use_fast=True)
model = AutoModel.from_pretrained(modelID).to(device)

Loading model BAAI/bge-large-en-v1.5 on cuda


## Generate Vector Embeddings using BGE Large Model

In [8]:
class VectorEmbeddingGenerator:
    def __init__(self, model, tokenizer):
        self.tokenizer = tokenizer
        self.model = model.to(device)

    def generate_embeddings(self, descriptions):
        embeddings = []
        for desc in descriptions:
            emb = self._get_query_embedding(desc)
            embeddings.append(emb)
        return np.vstack(embeddings)

    def _get_query_embedding(self, query):
        inputs = self.tokenizer(query, padding=True, truncation=True, return_tensors='pt', max_length=256,
                                add_special_tokens=True, return_attention_mask=True, return_token_type_ids=False)
        with torch.no_grad():
            outputs = self.model(**inputs.to(device))

        attention_mask = inputs['attention_mask']

        last_hidden = outputs.last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
        embeddings = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
        return embeddings.cpu().numpy()

In [None]:
# Fetch descriptions from MongoDB
descriptions = []
documents = collection.find({}, {'description': 1})  # Fetch only the description field
for doc in documents:
    descriptions.append(doc['description'])

# Generate embeddings for the descriptions
embedding_generator = VectorEmbeddingGenerator(model, tokenizer)
description_embeddings = embedding_generator.generate_embeddings(descriptions)

# Optionally, store the embeddings back to MongoDB
for i, doc in enumerate(collection.find()):
    collection.update_one({'_id': doc['_id']}, {'$set': {'description_embedding': description_embeddings[i].tolist()}})

print("Embeddings generated and stored in MongoDB.")

Embeddings generated and stored in MongoDB.


In [4]:
def vector_search(collection, index='bgeVectors', path='description_embedding', num_candidates=100, limit=10, query=""):
    t = time()
    logger.info(f"Generating embedding for query: '{query}'")
    
    # Get query embedding
    query_vector = get_query_embedding(query=query).tolist()
    logger.info(f"Generated query vector: {query_vector[:5]}... (showing first 5 elements)")
    
    # Construct MongoDB pipeline for vector search
    pipeline = [
        {
            "$vectorSearch": {
                "index": index,
                "path": path,
                "queryVector": query_vector[0],
                "numCandidates": num_candidates,
                "limit": limit
            }
        },
        {
            '$project': {
                '_id': 0, 
                'name': 1,
                'main_category': 1,
                'sub_category': 1,
                'image': 1,
                'link': 1,
                'ratings': 1,
                'no_of_ratings': 1,
                'discount_price': 1,
                'actual_price': 1,
                'description': 1,
                'score': {"$meta": "vectorSearchScore"}
            }
        }
    ]

    # Log pipeline for debugging
    logger.info(f"Pipeline for MongoDB vector search: {pipeline}")
    
    # Execute the pipeline
    t = time()
    results = collection.aggregate(pipeline)
    logger.info(f'MongoDB vector search execution time: {time() - t:.3f} sec')

    # Collect results and log number of results found
    results_list = [res for res in results]
    logger.info(f"Number of results found: {len(results_list)}")

    if results_list:
        for res in results_list:
            logger.info(f"Result: {res}")
    else:
        logger.warning("No results found.")

    return results_list

# Example usage
query = "3D puzzles for kids"
results = vector_search(collection=db['International Toy Store'], query=query)

# Display results
for result in results:
    print(result)


In [11]:
def get_query_embedding(query):
    inputs = tokenizer(query, padding=True, truncation=True, return_tensors='pt', max_length=256,
                       add_special_tokens=True, return_attention_mask=True, return_token_type_ids=False)
    with torch.no_grad():
        outputs = model(**inputs.to(device))

    attention_mask = inputs['attention_mask']
    last_hidden = outputs.last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
    embeddings = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
    return embeddings.cpu().numpy()


In [14]:
from sklearn.metrics.pairwise import cosine_similarity

def semantic_search(query, top_n=5):
    # Get the query embedding
    query_embedding = get_query_embedding(query)

    # Fetch all stored embeddings from MongoDB
    stored_embeddings = []
    ids = []
    documents = collection.find({}, {'_id': 1, 'description_embedding': 1})  # Fetch IDs and embeddings
    for doc in documents:
        stored_embeddings.append(doc['description_embedding'])
        ids.append(doc['_id'])

    # Convert stored embeddings to numpy array
    stored_embeddings = np.array(stored_embeddings)

    # Compute cosine similarities
    similarities = cosine_similarity(query_embedding, stored_embeddings)

    # Get the top_n results
    top_indices = np.argsort(similarities[0])[::-1][:top_n]
    top_results = [(ids[i], similarities[0][i]) for i in top_indices]

    return top_results


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [10]:
# Fetch sample document to verify if the collection retrieves data
sample = collection.find_one({'description_embedding': {'$exists': True}})
print("Sample document:", sample)


Sample document: {'_id': ObjectId('670f9cb974b51486c6408944'), 'name': 'Ravensburger 3D Puzzles Big Ben Night Edition, Multi Color (216 Pieces)', 'main_category': 'toys & baby products', 'sub_category': 'International Toy Store', 'image': 'https://m.media-amazon.com/images/W/IMAGERENDERING_521856-T1/images/I/61r8g1fjWTL._AC_UL320_.jpg', 'link': 'https://www.amazon.in/Ravensburger-Puzzles-Night-Multi-Pieces/dp/B00QM4W042/ref=sr_1_289?qid=1679219918&s=toys&sr=1-289', 'ratings': 4.6, 'no_of_ratings': '5,017', 'discount_price': nan, 'actual_price': '₹6,752', 'description': "Introducing the Ravensburger 3D Puzzles Big Ben Night Edition, a spectacular addition to your collection of multi-colored, intricate and fascinating puzzles. With 216 high-quality pieces, this puzzle is perfect for both adults and children who love a challenge and a stunning end result.\n\nKey Features:\n- Brand: Ravensburger, a renowned leader in jigsaw puzzles and quality toys\n- Theme: Big Ben at night, featuring the