In [1]:
import pandas as pd
import numpy as np
import torch

device = torch.device("xpu" if torch.xpu.is_available() else "cpu")
torch.xpu.is_available()

False

In [2]:
!pip freeze --local > requirements.txt

In [3]:
# Load dataset
data_path = 'Dataset/data.csv'
df = pd.read_csv(data_path, encoding="ISO-8859-1")

# add column for original description   
df['Original Description'] = df['Description']

# Data Cleaning
df.dropna(subset=['Description'], inplace=True)  # Remove rows with missing descriptions
df['Description'] = df['Description'].str.lower()  # Convert to lowercase
df['Description'] = df['Description'].str.replace(r'[^a-zA-Z0-9 ]', '', regex=True)  # Remove special characters
# elimination of NaN values
df.dropna(inplace=True)
# elimination of duplicate rows
df.drop_duplicates(inplace=True)
# elimination of cancelled orders
df = df[~df['InvoiceNo'].str.startswith('C')]

In [4]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,Original Description
0,536365,85123A,white hanging heart tlight holder,6,12/1/2010 8:26,2.55,17850.0,United Kingdom,WHITE HANGING HEART T-LIGHT HOLDER
1,536365,71053,white metal lantern,6,12/1/2010 8:26,3.39,17850.0,United Kingdom,WHITE METAL LANTERN
2,536365,84406B,cream cupid hearts coat hanger,8,12/1/2010 8:26,2.75,17850.0,United Kingdom,CREAM CUPID HEARTS COAT HANGER
3,536365,84029G,knitted union flag hot water bottle,6,12/1/2010 8:26,3.39,17850.0,United Kingdom,KNITTED UNION FLAG HOT WATER BOTTLE
4,536365,84029E,red woolly hottie white heart,6,12/1/2010 8:26,3.39,17850.0,United Kingdom,RED WOOLLY HOTTIE WHITE HEART.


In [9]:
from sentence_transformers import SentenceTransformer

# # Load the minilmv6-l2 model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Save the model locally
model_save_path = r'C:\Users\Intel7\Desktop\MAS\all-MiniLM-L6-v2'
model.save(model_save_path)

# Load the model from the saved path
loaded_model = SentenceTransformer(model_save_path)

In [12]:
import faiss

# Encode the product descriptions
descriptions = df['Description'].tolist()
encoded_descriptions = loaded_model.encode(descriptions)

# Create a FAISS index
dimension = encoded_descriptions.shape[1]
index = faiss.IndexFlatL2(dimension)

# Add the encoded descriptions to the index
index.add(encoded_descriptions)

# Store the mapping of row indices
row_mapping = {i: idx for i, idx in enumerate(df.index)}

print("FAISS index created and descriptions added.")

FAISS index created and descriptions added.


In [24]:
print(f"FAISS database size: {index.ntotal}")

FAISS database size: 392732


In [25]:
import pickle

# Save the FAISS index
faiss.write_index(index, 'faiss_index.bin')

# Save the row mapping
with open('row_mapping.pkl', 'wb') as f:
    pickle.dump(row_mapping, f)

print("FAISS index and row mapping saved.")

FAISS index and row mapping saved.


In [30]:
# Define the search query
search_query = "big mug"

# Encode the search query using the loaded model
encoded_query = loaded_model.encode([search_query])

# Perform the search
k = 5  # Number of nearest neighbors to retrieve
distances, indices = index.search(encoded_query, k)

# Retrieve the corresponding descriptions from the dataframe
results = [df.iloc[row_mapping[idx]]['Description'] for idx in indices[0]]

print("Search results:")
for result in results:
    print(result)

Search results:
jumbo bag doiley patterns
home building block word
red retrospot charlotte bag
ivory diner wall clock
bathroom metal sign


In [31]:
from agno.agent import Agent

class SearchAgent(Agent):
    def __init__(self, index, row_mapping, model):
        super().__init__()
        self.index = index
        self.row_mapping = row_mapping
        self.model = model

    def search(self, query, k=5):
        encoded_query = self.model.encode([query])
        distances, indices = self.index.search(encoded_query, k)
        results = [df.iloc[self.row_mapping[idx]]['Description'] for idx in indices[0]]
        return results

# Initialize the search agent
search_agent = SearchAgent(index=index, row_mapping=row_mapping, model=loaded_model)

# Define a function to use the agent for searching
def search_with_agent(query, k=5):
    return search_agent.search(query, k)

# Example usage
query = "CREAM CUPID HEARTS COAT HANGER"
results = search_with_agent(query)
print("Search results:")
for result in results:
    print(result)

Search results:
red retrospot oven glove double
retrospot small tube matches
childs garden trowel blue 
ivory giant garden thermometer
wood s3 cabinet ant white finish


In [35]:
from agno.agent import Agent, RunResponse 
from agno.models.ollama import Ollama

agent = Agent(model=Ollama(id="llama3.2"), markdown=True)

# Get the response in a variable
run: RunResponse = agent.run("Share a 2 sentence horror story")
print(run.content)

As I lay in bed, I couldn't shake the feeling that someone was watching me. It wasn't until I heard my own voice whisper "goodnight" back to me that I realized I wasn't alone.


In [7]:
import pandas as pd
from agno.agent import Agent, RunResponse
from agno.embedder.ollama import OllamaEmbedder
from agno.models.ollama import Ollama
from agno.vectordb.pgvector import PgVector
from agno.knowledge.text import TextKnowledgeBase

# Load product descriptions
csv_file = "Dataset/data.csv"  # Update with your actual CSV file
df = pd.read_csv(csv_file, encoding="ISO-8859-1")
# Data Cleaning
df.dropna(subset=['Description'], inplace=True)  # Remove rows with missing descriptions
df['Description'] = df['Description'].str.lower()  # Convert to lowercase
df['Description'] = df['Description'].str.replace(r'[^a-zA-Z0-9 ]', '', regex=True)# Remove special characters
# elimination of NaN values
df.dropna(inplace=True)
# elimination of duplicate rows
df.drop_duplicates(inplace=True)
# elimination of cancelled orders
df = df[~df['InvoiceNo'].str.startswith('C')]
products = list(set(df['Description'].tolist()))

# Database connection
db_url = "postgresql+psycopg://postgres:pranav2004$@localhost:5433/ProductDescription"  # Update with your actual DB URL

# Initialize Vector Database
vector_db = PgVector(
    table_name="product_descriptions",
    db_url=db_url,
    embedder=OllamaEmbedder(id="llama3.2", dimensions=3072),
)


In [8]:
from agno.knowledge.text import Document
from tqdm import tqdm
# Ensure vector_db has an embedder
embedder = vector_db.embedder  

documents = []
for description in tqdm(products):
    
    # Generate embedding manually
    embedding = embedder.get_embedding(description)  # Convert text to vector

    # Create document with content and embedding
    doc = Document(content=description)
    doc.embedding = embedding  # Assign embedding manually

    documents.append(doc)

100%|██████████| 3867/3867 [10:31<00:00,  6.13it/s]


In [9]:
vector_db.insert(documents)

OperationalError: (psycopg.errors.ConnectionTimeout) connection timeout expired
(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [None]:
query = "LOVE HEART NAPKIN BOX"
# Convert query text into an embedding
query_embedding = vector_db.embedder.get_embedding(query)  # Pass the query as a list


In [None]:
results = vector_db.search(query, limit=5)  # Retrieve top 5 similar descriptions

print(results)


[Document(content='embossed heart trinket box', id='72c18181960b87cbc4ad317eeebf349b', name=None, meta_data={}, embedder=OllamaEmbedder(dimensions=3072, id='llama3.2', host=None, timeout=None, options=None, client_kwargs=None, ollama_client=None), embedding=array([-0.19260289, -0.5875256 ,  4.287101  , ..., -0.22511965,
        0.5104131 ,  0.34725544], dtype=float32), usage=None, reranking_score=None), Document(content='strawberry ceramic trinket box', id='3c7b4fe2bb925e97cc115f65624b5181', name=None, meta_data={}, embedder=OllamaEmbedder(dimensions=3072, id='llama3.2', host=None, timeout=None, options=None, client_kwargs=None, ollama_client=None), embedding=array([ 0.13428666,  0.04675705,  4.0765805 , ...,  0.37095943,
       -0.0974075 ,  0.63025624], dtype=float32), usage=None, reranking_score=None), Document(content='red gingham rose jewellery box', id='d5bb33a814efa5144c2851a5d209adbf', name=None, meta_data={}, embedder=OllamaEmbedder(dimensions=3072, id='llama3.2', host=None, t

In [None]:
# Load product descriptions into the vector DB
knowledge_base = TextKnowledgeBase(
    texts=products,  # Ensure "Description" column exists
    vector_db=vector_db,
    path="product_descriptions"
)

# Store embeddings (only run once or when updating)
knowledge_base.load(recreate=False)


In [None]:
knowledge_base

TextKnowledgeBase(reader=TextReader(chunk=True, chunk_size=3000, separators=['\n', '\n\n', '\r', '\r\n', '\n\r', '\t', ' ', '  '], chunking_strategy=<agno.document.chunking.fixed.FixedSizeChunking object at 0x0000019D2089E550>), vector_db=<agno.vectordb.pgvector.pgvector.PgVector object at 0x0000019B6C0141D0>, num_documents=5, optimize_on=1000, chunking_strategy=<agno.document.chunking.fixed.FixedSizeChunking object at 0x0000019D2089E550>, path='product_descriptions', formats=['.txt'])

In [None]:
# Initialize the Agent with Vector DB Knowledge
agent = Agent(
    model=Ollama(id="llama3.2"),
    knowledge=knowledge_base,
    show_tool_calls=True,  # Enable to see tool calls
)

# Function to interact with the agent
def ask_agent(query):
    
    print("\nAgent's Response:")
    run: RunResponse = agent.run(query)
    print(run.content)

In [None]:
# Example query: Ask the agent for recommendations
user_query = "return a recommendation similar to LOVE HEART NAPKIN BOX, use the knowledge base only to provide the answer"
ask_agent(user_query)


Agent's Response:
 - Running: search_knowledge_base(query=recommendations for love heart napkin box style gift set)

Based on the popularity of the LOVE HEART NAPKIN BOX, I recommend the SPACEBOY BABY GIFT SET. This gift set is also a popular choice among parents and features a unique and modern design that is perfect for little ones. The Spaceboy theme is trendy and versatile, making it suitable for both boys and girls.


In [None]:
ask_agent("return a recommendation similar to LOVE HEART NAPKIN BOX, use the knowledge base only to provide the answer")


Agent's Response:
 - Running: search_knowledge_base(query=Recommendations similar to LOVE HEART NAPKIN BOX)

Based on the tool call response, I recommend the "Lilac Diamante Pen in Gift Box" as a similar product to the LOVE HEART NAPKIN BOX. The Lilac Diamante Pen in Gift Box is a beautifully presented gift box containing a lovely lilac-colored pen with diamante details. It's a thoughtful and elegant gift idea that can be used for everyday writing or as a special treat.


In [None]:
ask_agent("return a recommendation similar to GUMBALL COAT RACK, use the knowledge base only to provide the answer")


Agent's Response:
 - Running: search_knowledge_base(query=furniture recommendations similar to gumball coat rack)

Based on the tool call response, I recommend the "Trellis Coat Rack" as a similar product to the Gumball Coat Rack. The Trellis Coat Rack has a unique and stylish design that can add a touch of elegance to any entryway or living room. It features multiple hooks for hanging coats, hats, and bags, making it a practical and functional addition to your home decor.
