In [6]:
import pandas as pd
import numpy as np
import torch

In [7]:
# Load dataset
data_path = 'data.csv'
df = pd.read_csv(data_path, encoding="ISO-8859-1")

# Data Cleaning
df.dropna(subset=['Description'], inplace=True)  # Remove rows with missing descriptions
df['Description'] = df['Description'].str.lower()  # Convert to lowercase
df['Description'] = df['Description'].str.replace(r'[^a-zA-Z0-9 ]', '', regex=True)  # Remove special characters
# elimination of NaN values
df.dropna(inplace=True)
# elimination of duplicate rows
df.drop_duplicates(inplace=True)
# elimination of cancelled orders
df = df[~df['InvoiceNo'].str.startswith('C')]

In [8]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,white hanging heart tlight holder,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,white metal lantern,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,cream cupid hearts coat hanger,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,knitted union flag hot water bottle,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,red woolly hottie white heart,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


<h1> Basic RAG </h1>


In [None]:
from sentence_transformers import SentenceTransformer
import faiss
import os
import pickle

# # Load the minilmv6-l2 model
# model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Save the model locally
model_save_path = r'C:\Users\prana\OneDrive\Desktop\IITHyderabd\PR-RAG\MiniLM-L6-v2'
# model.save(model_save_path)

# Load the model from the saved path
loaded_model = SentenceTransformer(model_save_path)

In [None]:
# Encode the product descriptions
descriptions = df['Description'].tolist()
encoded_descriptions = loaded_model.encode(descriptions)

# Create a FAISS index
dimension = encoded_descriptions.shape[1]
index = faiss.IndexFlatL2(dimension)

# Add the encoded descriptions to the index
index.add(encoded_descriptions)

# Store the mapping of row indices
row_mapping = {i: idx for i, idx in enumerate(df.index)}

print("FAISS index created and descriptions added.")

FAISS index created and descriptions added.


In [11]:
print(f"FAISS database size: {index.ntotal}")

FAISS database size: 392732


In [12]:
import pickle

# Save the FAISS index
faiss.write_index(index, 'faiss_index.bin')

# Save the row mapping
with open('row_mapping.pkl', 'wb') as f:
    pickle.dump(row_mapping, f)

print("FAISS index and row mapping saved.")

FAISS index and row mapping saved.


In [13]:
# Define the search query
search_query = "vintage mug"

# Encode the search query using the loaded model
encoded_query = loaded_model.encode([search_query])

# Perform the search
k = 5  # Number of nearest neighbors to retrieve
distances, indices = index.search(encoded_query, k)

# Retrieve the corresponding descriptions from the dataframe
results = [df.iloc[row_mapping[idx]]['Description'] for idx in indices[0]]

print("Search results:")
for result in results:
    print(result)

Search results:
french blue metal door sign 2
ladies  gentlemen metal sign
set3 book box green gingham flower 
french kitchen sign blue metal
hanging fairy cake decoration


In [14]:
from agno.agent import Agent

class SearchAgent(Agent):
    def __init__(self, index, row_mapping, model):
        super().__init__()
        self.index = index
        self.row_mapping = row_mapping
        self.model = model

    def search(self, query, k=5):
        encoded_query = self.model.encode([query])
        distances, indices = self.index.search(encoded_query, k)
        results = [df.iloc[self.row_mapping[idx]]['Description'] for idx in indices[0]]
        return results

# Initialize the search agent
search_agent = SearchAgent(index=index, row_mapping=row_mapping, model=loaded_model)

# Define a function to use the agent for searching
def search_with_agent(query, k=5):
    return search_agent.search(query, k)

# Example usage
query = "CREAM CUPID HEARTS COAT HANGER"
results = search_with_agent(query)
print("Search results:")
for result in results:
    print(result)

Search results:
cream cupid hearts coat hanger
wooden picture frame white finish
lunch box i love london
love heart pocket warmer
small popcorn holder


In [None]:
from agno.agent import Agent, RunResponse  # noqa
from agno.models.ollama import Ollama

agent = Agent(model=Ollama(id="llama3.2"), markdown=True)

# Get the response in a variable
run: RunResponse = agent.run("Share a 2 sentence horror story")
print(run.content)



As I lay in bed, I couldn't shake the feeling that something was watching me from the shadows. It wasn't until I heard my own voice whisper "goodnight" back to me that I realized I wasn't alone.


<h1> Agentic RAG </h1>

In [None]:
import pandas as pd
from agno.agent import Agent, RunResponse
from agno.embedder.ollama import OllamaEmbedder
from agno.models.ollama import Ollama
from agno.vectordb.pgvector import PgVector
from agno.knowledge.text import TextKnowledgeBase

# Load product descriptions
csv_file = "data.csv"  # Update with your actual CSV file
df = pd.read_csv(csv_file, encoding="ISO-8859-1")
# Data Cleaning
df.dropna(subset=['Description'], inplace=True)  # Remove rows with missing descriptions
df['Description'] = df['Description'].str.lower()  # Convert to lowercase
df['Description'] = df['Description'].str.replace(r'[^a-zA-Z0-9 ]', '', regex=True)# Remove special characters

# elimination of NaN values
df.dropna(inplace=True)
# elimination of duplicate rows
df.drop_duplicates(inplace=True)
# elimination of cancelled orders
df = df[~df['InvoiceNo'].str.startswith('C')]
products = list(set(df['Description'].tolist()))


In [None]:
# Database connection
db_url = "postgresql+psycopg://postgres:pranav2004$@localhost:5433/ProductDescription"  # Update with your actual DB URL

# Initialize Vector Database
vector_db = PgVector(
    table_name="product_descriptions",
    db_url=db_url,
    embedder=OllamaEmbedder(id="llama3.2", dimensions=3072),
)

In [2]:
from agno.knowledge.text import Document
from tqdm import tqdm
# Ensure vector_db has an embedder
embedder = vector_db.embedder  

documents = []
for description in tqdm(products):
    
    # Generate embedding manually
    embedding = embedder.get_embedding(description)  # Convert text to vector

    # Create document with content and embedding
    doc = Document(content=description)
    doc.embedding = embedding  # Assign embedding manually

    documents.append(doc)

100%|██████████| 3867/3867 [02:25<00:00, 26.56it/s]


In [None]:
vector_db.insert(documents)

In [4]:
query = "LOVE HEART NAPKIN BOX"
# Convert query text into an embedding
query_embedding = vector_db.embedder.get_embedding(query)  # Pass the query as a list


In [5]:
results = vector_db.search(query, limit=5)  # Retrieve top 5 similar descriptions

print(results)


[Document(content='embossed heart trinket box', id='72c18181960b87cbc4ad317eeebf349b', name=None, meta_data={}, embedder=OllamaEmbedder(dimensions=3072, id='llama3.2', host=None, timeout=None, options=None, client_kwargs=None, ollama_client=None), embedding=array([-0.19260289, -0.5875256 ,  4.287101  , ..., -0.22511965,
        0.5104131 ,  0.34725544], dtype=float32), usage=None, reranking_score=None), Document(content='strawberry ceramic trinket box', id='3c7b4fe2bb925e97cc115f65624b5181', name=None, meta_data={}, embedder=OllamaEmbedder(dimensions=3072, id='llama3.2', host=None, timeout=None, options=None, client_kwargs=None, ollama_client=None), embedding=array([ 0.13428666,  0.04675705,  4.0765805 , ...,  0.37095943,
       -0.0974075 ,  0.63025624], dtype=float32), usage=None, reranking_score=None), Document(content='red gingham rose jewellery box', id='d5bb33a814efa5144c2851a5d209adbf', name=None, meta_data={}, embedder=OllamaEmbedder(dimensions=3072, id='llama3.2', host=None, t

In [6]:
# Load product descriptions into the vector DB
knowledge_base = TextKnowledgeBase(
    texts=products,  # Ensure "Description" column exists
    vector_db=vector_db,
    path="product_descriptions"
)

# Store embeddings (only run once or when updating)
knowledge_base.load(recreate=False)


In [7]:
knowledge_base

TextKnowledgeBase(reader=TextReader(chunk=True, chunk_size=3000, separators=['\n', '\n\n', '\r', '\r\n', '\n\r', '\t', ' ', '  '], chunking_strategy=<agno.document.chunking.fixed.FixedSizeChunking object at 0x0000025EEAD3C790>), vector_db=<agno.vectordb.pgvector.pgvector.PgVector object at 0x0000025EE0A095D0>, num_documents=5, optimize_on=1000, chunking_strategy=<agno.document.chunking.fixed.FixedSizeChunking object at 0x0000025EEAD3C790>, path='product_descriptions', formats=['.txt'])

In [8]:
# Initialize the Agent with Vector DB Knowledge
agent = Agent(
    model=Ollama(id="llama3.2"),
    knowledge=knowledge_base,
    show_tool_calls=True,  # Enable to see tool calls
)

# Function to interact with the agent
def ask_agent(query):
    
    print("\nAgent's Response:")
    run: RunResponse = agent.run(query)
    print(run.content)

In [12]:
ask_agent("return a recommendation similar to LOVE HEART NAPKIN BOX")


Agent's Response:
 - Running: search_knowledge_base(query=Recommendations for products similar to LOVE HEART NAPKIN BOX)

Based on the LOVE HEART NAPKIN BOX, here are some similar tool recommendations:

1. **LOVE HEART TEA SET IN GIFT BOX**: A beautifully crafted tea set with heart-shaped designs, perfect for a lovely afternoon tea.
2. **DIAMANTE PEN SET IN GIFT BOX**: A set of elegant pens with diamante details, ideal for writing love letters or signing special occasions.
3. **HEART-SHAPED TEA TIME TRAY IN GIFT BOX**: A charming tea time tray with a heart-shaped design, perfect for serving delicate finger foods and teas.

These recommendations offer a mix of elegance, romance, and whimsy, similar to the LOVE HEART NAPKIN BOX.


In [11]:
ask_agent("return a recommendation similar to LOVE HEART NAPKIN BOX, use the knowledge base only to provide the answer")


Agent's Response:
 - Running: search_knowledge_base(query=RECOMMENDATION FOR LOVE HEART NAPKIN BOX SIMILAR PRODUCTS)

Based on the tool call response, I recommend the "Pig Mug in Two Colour Designs" as a similar product to the LOVE HEART NAPKIN BOX. This mug is a popular item that can add a touch of personality and whimsy to any room.


In [10]:
ask_agent("return a recommendation similar to GUMBALL COAT RACK, use the knowledge base only to provide the answer")


Agent's Response:
 - Running: search_knowledge_base(query=GUMBALL COAT RACK)

Based on the tool call response, I recommend the "TRELLIS COAT RACK" as an alternative to the GUMBALL COAT RACK. The Trellis Coat Rack has a similar aesthetic and functionality to the Gumball Coat Rack, with a unique and stylish design that can add a touch of personality to any entryway or hallway.
