# Price Discovery Project Using RAG

## Import Libraries

In [11]:
import os
import torch
import shutil
import tqdm
import open_clip
import qdrant_client
import pandas as pd
from sklearn.model_selection import train_test_split
from langchain_community.document_loaders import DirectoryLoader, CSVLoader
from langchain_community.document_loaders.image import UnstructuredImageLoader
from langchain_community.vectorstores import Chroma, Qdrant
from langchain_experimental.open_clip import OpenCLIPEmbeddings


In [6]:
# Specify variables
model_name = "ViT-B-16"
checkpoint = "openai"
embedding_model= OpenCLIPEmbeddings(model_name=model_name, checkpoint=checkpoint)
db_dir = "db"


## Load Data

In [7]:
# Read the text data

# Instatiate loader 
text_loader = CSVLoader("train.csv",
                        encoding="utf-8",
                        csv_args={
                            "delimiter": ",",
                            "quotechar": '"',
                        })

# Load the data
text_data = text_loader.load()

## Create Vector DB to store data

In [9]:
# Instantiate DB
text_db = Chroma(persist_directory=db_dir, embedding_function=embedding_model, collection_name="text-collection")
images_db = Chroma(persist_directory=db_dir, embedding_function=embedding_model, collection_name="images-collection")

In [57]:
text_db.add_documents(text_data)

['8187329a-d59a-11ee-9bee-c0b6f9d0c838',
 '81876dba-d59a-11ee-a5ff-c0b6f9d0c838',
 '81876dbb-d59a-11ee-bea2-c0b6f9d0c838',
 '81876dbc-d59a-11ee-9f02-c0b6f9d0c838',
 '81876dbd-d59a-11ee-91bd-c0b6f9d0c838',
 '81876dbe-d59a-11ee-ac1c-c0b6f9d0c838',
 '81876dbf-d59a-11ee-a066-c0b6f9d0c838',
 '81876dc0-d59a-11ee-84ec-c0b6f9d0c838',
 '81876dc1-d59a-11ee-b111-c0b6f9d0c838',
 '81876dc2-d59a-11ee-8a20-c0b6f9d0c838',
 '81876dc3-d59a-11ee-8b8b-c0b6f9d0c838',
 '81876dc4-d59a-11ee-b5fe-c0b6f9d0c838',
 '81876dc5-d59a-11ee-9d4b-c0b6f9d0c838',
 '81876dc6-d59a-11ee-9161-c0b6f9d0c838',
 '81876dc7-d59a-11ee-bcf0-c0b6f9d0c838',
 '81876dc8-d59a-11ee-8140-c0b6f9d0c838',
 '81876dc9-d59a-11ee-bfab-c0b6f9d0c838',
 '81876dca-d59a-11ee-9416-c0b6f9d0c838',
 '81876dcb-d59a-11ee-9a19-c0b6f9d0c838',
 '81876dcc-d59a-11ee-a69b-c0b6f9d0c838',
 '81876dcd-d59a-11ee-a8c2-c0b6f9d0c838',
 '81876dce-d59a-11ee-b228-c0b6f9d0c838',
 '81876dcf-d59a-11ee-9035-c0b6f9d0c838',
 '81876dd0-d59a-11ee-a4f4-c0b6f9d0c838',
 '81876dd1-d59a-

## Create agent for retrieval

In [186]:
from langchain.tools.retriever import create_retriever_tool
from langchain.agents.load_tools import load_tools
from langchain.agents import AgentExecutor, Tool, create_structured_chat_agent, create_react_agent, create_self_ask_with_search_agent
from langchain_community.utilities import SearchApiAPIWrapper, SerpAPIWrapper
from langchain import hub
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain_community.tools.google_lens import GoogleLensQueryRun
from langchain_community.utilities.google_lens import GoogleLensAPIWrapper
from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain.agents.agent import AgentAction
from langchain_core.callbacks.base import BaseCallbackHandler
from typing import List, Dict, Any, Union

In [60]:
# Instantiate retrievers from db

text_retriever = text_db.as_retriever()

In [230]:
# Instantiate langsmith for visualizing metrivs
langchain_api_key = ""
os.environ["LANGCHAIN_API_KEY"] = langchain_api_key
os.environ["LANGCHAIN_PROJECT"] = "price-discovery"
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [232]:
# Setup google api
GOOGLE_API_KEY = ""  
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY, temperature=0.3)

In [62]:
# setup serp api
serp_api_key = ""
os.environ["SERPAPI_API_KEY"] = serp_api_key

In [233]:
# Create prompts
test_prompt = """
    
As an experienced product analyst adept at accurately determining product price ranges,utilize the available tools {tools} to answer the question asked. 
Return ONLY a reasonable, accurate and concise price range for my product after analysing its current pricing
and that of a similar products. 

    Use the following format:

    Question: the input question you must answer
    Thought: you should always think about what to do
    Action: the action to take, should be one of [{tool_names}]
    Action Input: the input to the action
    Observation: the result of the action
    ... (this Thought/Action/Action Input/Observation can repeat N times). If no result is found use your own knowledge
    Thought: I now know the final answer
    Final Answer: the final answer to the original input question

    Begin!

    Question: {input}
    Thought: {agent_scratchpad}
"""

test_prompt_one = """    
As an experienced product analyst adept at accurately determining product price ranges, utilize ALL the available tools {tools} to answer the question asked accurately. 
Check whether the product is in the database, then search the internet and compare the prices.
If a tool encounters an error, use another tool.
Return ONLY a reasonable and accurate  PRICE RANGE for the product.
Use the following format:

Question: the input question you must answer.
Thought: you should always think about what to do. 
Action: the action to take, should be ONE of or ALL of [{tool_names}]. 
Action Input: the input to the action. It SHOULD be a string.
Action: choose another tool if a tool fails or handle its error.
Observation: the result of the action, including insights gained or findings.
... (this Thought/Action/Action Input/Observation can repeat N times). If no result is found, use your own knowledge.
Thought: I now know the final answer that is based on the analysis of your findings and observations.
Final Answer: the final answer to the original input question, which must be as accurate as possible. NEVER say you can't answer.

Begin!

Question: {input}
Thought: {agent_scratchpad}

"""

In [234]:
# Create prompt template from prompts
test_prompt_template = PromptTemplate.from_template(test_prompt_one)

In [203]:
# Instantiate error handler
class RetryCallbackHandler(BaseCallbackHandler):
    def on_tool_error(
        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
    ) -> Any:
        # Log the error or perform any necessary actions
        
        # Retry logic
        max_retries = 3
        current_retry = kwargs.get('retry_count', 0)
        if current_retry < max_retries:
            print(f"Retrying tool execution (Attempt {current_retry + 1})")
            # Increment retry count
            kwargs['retry_count'] = current_retry + 1
            # Re-run the tool
            return AgentAction.RERUN
        else:
            print("Maximum retries reached. Switching to another tool.")
            return AgentAction.CHANGE_TOOL

In [None]:
# The error handler
error_handler = RetryCallbackHandler()

In [235]:
# Create tools
search = SerpAPIWrapper()
tool_search_item = Tool(
    name="search internet",
    description="Searches the internet for the price of a product by using its description.",
    func=search.run,
    handle_tool_error=True
)

# Tool for searching product price using image URL
tool_search_image = Tool(
    name="search image",
    description="Searches for the price of a product using its image URL.",
    func=search.run,
    handle_tool_error=True
)

# Tool for retrieving product information from the text database based on the product description
tool_retrieve_from_text_db = create_retriever_tool(
    text_retriever,
    'search text db',
    'Query a retriever for product price using its product description.')

# List of all tools
toolz = [tool_search_item, tool_search_image, tool_retrieve_from_text_db]

In [236]:
# Instantiate the agent with error handling
def _handle_error(error) -> str:
    return (
        "The following errors occurred during tool execution:"
        + error.args[0]
        + "Please try another tool."
    )

serp_agent = create_react_agent(llm, toolz, test_prompt_template)
serp_agent_executor = AgentExecutor(agent=serp_agent,
                                    tools=toolz,
                                    verbose=True,
                                    return_intermediate_steps=True,
                                    handle_parsing_errors=True,
                                    callbacks=[error_handler])
                                    

In [175]:
description = "iHaHa Fire Truck Toys for 1 2 3 4 5 6 Years Old Boys Toddler, 5 in 1 Kids Carrier Toy Birthday, Car Friction Power Toys with Light Sound"
image_url = "https://m.media-amazon.com/images/I/81nq8H2IlyL._AC_UL320_.jpg"

In [237]:
# Run the agent executor
serp_agent_executor.invoke({"input": f"what is a reasonable and accurate price range of this {description} "})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: search text db
Action Input: iHaHa Fire Truck Toys for 1 2 3 4 5 6 Years Old Boys Toddler, 5 in 1 Kids Carrier Toy Birthday, Car Friction Power Toys with Light Sound[0m[38;5;200m[1;3mtitle: iHaHa Fire Truck Toys for 1 2 3 4 5 6 Years Old Boys Toddler, 5 in 1 Kids Carrier Toy Birthday, Car Friction Power Toys with Light Sound
price: 26.99

title: Fire Truck Toy Remote Control with Lights and Sounds Extending Rescue Ladder Fire Engine Toys for Boys and Girls Kids Toddlers Ages 3 and Up
price: 19.87

title: iHaHa Construction Toys for 1 2 3 4 5 6 Years Old Boys Toddlers, 5 in 1 Carrier Trucks Cars Toys for Toddler Boy Girl Birthday, Cars Friction Power Toys with Light Sound
price: 26.99

title: Fire Truck Car Toys Set, Friction Powered Car Carrier Trailer with Sound and Light, Play Vehicle Set for Kids Toddlers Boys Child Gift Age 3 4 5 6 7 Years Old, 2 Rescue Car, Helicopter, Plane
price: 35.99[0m[32;1m[1;3mAction

{'input': 'what is a reasonable and accurate price range of this iHaHa Fire Truck Toys for 1 2 3 4 5 6 Years Old Boys Toddler, 5 in 1 Kids Carrier Toy Birthday, Car Friction Power Toys with Light Sound ',
 'output': '$12.09 - $26.99',
 'intermediate_steps': [(AgentAction(tool='search text db', tool_input='iHaHa Fire Truck Toys for 1 2 3 4 5 6 Years Old Boys Toddler, 5 in 1 Kids Carrier Toy Birthday, Car Friction Power Toys with Light Sound', log='Action: search text db\nAction Input: iHaHa Fire Truck Toys for 1 2 3 4 5 6 Years Old Boys Toddler, 5 in 1 Kids Carrier Toy Birthday, Car Friction Power Toys with Light Sound'),
   'title: iHaHa Fire Truck Toys for 1 2 3 4 5 6 Years Old Boys Toddler, 5 in 1 Kids Carrier Toy Birthday, Car Friction Power Toys with Light Sound\nprice: 26.99\n\ntitle: Fire Truck Toy Remote Control with Lights and Sounds Extending Rescue Ladder Fire Engine Toys for Boys and Girls Kids Toddlers Ages 3 and Up\nprice: 19.87\n\ntitle: iHaHa Construction Toys for 1 2 3 

In [239]:
serp_agent_executor.invoke({"input": f"what is a reasonable and accurate PRICE RANGE of this {description} and image url {image_url}"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: search image
Action Input: https://m.media-amazon.com/images/I/81nq8H2IlyL._AC_UL320_.jpg[0m[33;1m[1;3m['You are here: Stores; Current: Gallery. Gallery. Gallery will display up to 8 images. Gallery Requirements. Tile Type, Max Text Size. Heading, 1 Line.', 'No information is available for this page.', 'https://images-na.ssl-images-amazon.com/images/I/41c-Ua-m7UL._SS300_.jpg. One person found this helpful. Helpful. Share. Report. Top of page.', 'https://m.media-amazon.com/images/I/613irHDRloL._AC_UL320_.jpg. USD$10.99. Price when purchased online. Image 1 of https://m.media-amazon.com/images. Image ...', 'New iPad 9.7 inch 2018 2017/ iPad Air 1/2 Case -... 4.0 out of 5 stars14. Currently unavailable.', 'Page 1.', 'Online shopping from a great selection at Media + Store ... Amazon Photos · Prime Video · Sell products on Amazon · Subscribe & Save · The Drop ...', 'Page 1.'][0m[32;1m[1;3mAction: search text db
Acti

{'input': 'what is a reasonable and accurate PRICE RANGE of this iHaHa Fire Truck Toys for 1 2 3 4 5 6 Years Old Boys Toddler, 5 in 1 Kids Carrier Toy Birthday, Car Friction Power Toys with Light Sound and image url https://m.media-amazon.com/images/I/81nq8H2IlyL._AC_UL320_.jpg',
 'output': '20-30',
 'intermediate_steps': [(AgentAction(tool='search image', tool_input='https://m.media-amazon.com/images/I/81nq8H2IlyL._AC_UL320_.jpg', log='Action: search image\nAction Input: https://m.media-amazon.com/images/I/81nq8H2IlyL._AC_UL320_.jpg'),
   "['You are here: Stores; Current: Gallery. Gallery. Gallery will display up to 8 images. Gallery Requirements. Tile Type, Max Text Size. Heading, 1 Line.', 'No information is available for this page.', 'https://images-na.ssl-images-amazon.com/images/I/41c-Ua-m7UL._SS300_.jpg. One person found this helpful. Helpful. Share. Report. Top of page.', 'https://m.media-amazon.com/images/I/613irHDRloL._AC_UL320_.jpg. USD$10.99. Price when purchased online. Im

# Test
To test, load your data for use in testing then run

In [94]:
test = pd.read_csv("test.csv")
train = pd.read_csv("train.csv")

In [240]:
# Get the data seen and unseen
train_sample = train.sample(5)
test_sample = test.sample(5)

validation_data = pd.concat([train_sample, test_sample], axis=0)

validation_descriptions = validation_data['title'].values
validation_prices = validation_data['price'].values

In [241]:
# Validate :
results = []
for item_description in validation_descriptions:
    result = serp_agent_executor.invoke({"input": f"what is a reasonable price range of this {item_description}"})
    results.append(result["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: search text db
Action Input: 2 Pack 15ml 0.5oz Empty Refillable Round Frosted Glass Cream Jar Cosmetic Storage Container Pot Sample Jars Bottle with Natural Bamboo Lid and Inner Liners for Cream Lotion Lip Balm[0m[38;5;200m[1;3mtitle: 2 Pack 15ml 0.5oz Empty Refillable Round Frosted Glass Cream Jar Cosmetic Storage Container Pot Sample Jars Bottle with Natural Bamboo Lid and Inner Liners for Cream Lotion Lip Balm
price: 9.39

title: Frosted Glass Cream Jars,2 Pack 100ml/3.4oz,Natural Bamboo lids Empty Refillable Cosmetic Container Bottles Glass Cosmetic Sample Jars with lids for Face Cream Make Up Eye Shadow Travel
price: 16.99

title: 2PCS 5g/5ml/0.17oz Empty Upscale Frosted Glass Cream Jar Bottle Cosmetic Dispenser Refillable Container Pot Vial Holder Sample Storage with Eco Bamboo Lid For Eye Cream Lotion Ointments Lip Balm
price: 8.97

title: 2Pcs 5ml/5g/0.17oz Mini Empty Refillable Frosted Clear Glass Cosmetic

In [242]:
for result, price in zip(results, validation_prices):
    print(f"The actual price is {price} and the predicted price range is {result}")

The actual price is 9.39 and the predicted price range is $8.97 - $16.99
The actual price is 27.85 and the predicted price range is $29.99 - $39.99
The actual price is 24.99 and the predicted price range is $20-$30
The actual price is 23.47 and the predicted price range is $20.99 - $29.99
The actual price is 129.99 and the predicted price range is $129.99 - $134.99
The actual price is 14.69 and the predicted price range is $7.99 - $22.99
The actual price is 54.97 and the predicted price range is $25.89 - $92.99
The actual price is 26.99 and the predicted price range is $15 - $36
The actual price is 59.99 and the predicted price range is $69.99 - $119.99
The actual price is 11.99 and the predicted price range is $17.99 - $25.99
