In [2]:
from typing import List

In [10]:
def get_requirements(file_path:str)->List[str]:
    '''
    Returns the requirements
    '''
    requirements = []
    with open(file_path) as file_obj:
        requirements = file_obj.readlines()
        
        for req in range(len(requirements)):
            requirements[req] = requirements[req].strip()
        
        if "-e ." in requirements:
            requirements.remove("-e .")

    return requirements

In [11]:
get_requirements("../requirements.txt")

['python==3.9',
 'selenium',
 'pandas',
 'numpy',
 'seaborn',
 'langchain-community',
 '']

In [1]:
from selenium import webdriver
from amazoncaptcha import AmazonCaptcha
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

In [7]:
def scrape_products(keyword, num_products):

    try:
        path = Service(r"F:/Data Science/Projects/Ecommerce-Chatbot-Project/chromedriver.exe")
            
        # Initializing chrome_options
        chrome_options = Options()
            
        # opening the new chrome window with maximum size
        chrome_options.add_argument("--start-maximized")
            
        # timeout value for 10 seconds
        chrome_options.add_argument('--timeout=15')
            
        # scrape without a new Chrome window every time.
        # chrome_options.add_argument('headless')

        # initializing the driver
        driver = webdriver.Chrome(service=path, options=chrome_options)
            
        #driver.set_window_size(1120, 1000)

        # url of the website
        url = "https://www.amazon.in/"

        # connecting to the url
        driver.get(url)

        time.sleep(2)

        try:
            # captcha handling
            link = driver.find_element(By.XPATH, "//div[@class = 'a-row a-text-center']//img").get_attribute("src")    # <div class=a-row a-text-center>
            
            captcha = AmazonCaptcha.fromlink(link)
            captcha_value = AmazonCaptcha.solve(captcha)

            input_field = driver.find_element(By.ID, "captchacharacters")
            input_field.send_keys(captcha_value)

            continue_shopping = driver.find_element(By.CLASS_NAME, "a-button-text")
            continue_shopping.click()

        except NoSuchElementException:
            print("No captcha found")

        time.sleep(3)

        # search product
        search_tab = driver.find_element(By.XPATH, "/html/body/div[1]/header/div/div[1]/div[2]/div/form/div[2]/div[1]/div/input")
        search_tab.send_keys(keyword)
        search_button = driver.find_element(By.XPATH, "//input[@id='nav-search-submit-button']")
        search_button.click()
        time.sleep(3)

        data = []
        current_page = 1

        while len(data) < num_products:

            print(f"\nScraping page {current_page}")

            products = driver.find_elements(By.XPATH, "//div[@class='a-section a-spacing-base a-text-center']")
            print(f"Number of products found on page {current_page}: {len(products)}")

            # iterating through each products 
            for i in range(len(products)):
                print(f"Scraping product {i+1} on page {current_page}")
                product = products[i]   # get product element 

                try:
                    brand_name = product.find_element(By.XPATH,".//h2[@class='a-size-mini s-line-clamp-1']//span").text
                except:
                    brand_name = "na"
                
                try:   
                    product_name = product.find_element(By.XPATH, ".//h2[@class='a-size-base-plus a-spacing-none a-color-base a-text-normal']//span").text
                except:
                    product_name = "na"
                    
                try:
                    # .text doesn't work because of unknown factors like css, therefore we use 'textContent'
                    rating_element = product.find_element(By.XPATH, ".//i[@data-cy='reviews-ratings-slot']//span")
                    rating = rating_element.get_attribute('textContent')
                except:
                    rating = "na"

                try: 
                    rating_count = product.find_element(By.XPATH, ".//span[@class='a-size-base s-underline-text']").text
                except:
                    rating_count = "na"

                try: 
                    selling_price_element = product.find_element(By.XPATH, ".//span[@class='a-price']//span[@class='a-offscreen']")
                    selling_price = selling_price_element.get_attribute('textContent')
                except:
                    selling_price = "na"
                
                try: 
                    mrp = product.find_element(By.XPATH, ".//span[@class='a-price a-text-price']//span[@aria-hidden='true']").text
                except:
                    mrp = "na"

                try: 
                    offer = product.find_element(By.XPATH, ".//div[@class='a-row']//span[contains(text(), '%')]").text
                except:
                    offer = "na"
                
                # try:
                #     delivery_price = driver.find_element(By.XPATH, "/html/body/div[1]/div[1]/div[1]/div[1]/div/span[1]/div[1]/div[3]/div/div/div/div/span/div/div/div[2]/div[5]/div/div[2]/span/span[1]")
                # except:
                #     delivery_price = "na"


                data.append({"Brand Name": brand_name,
                            "Product Name": product_name,
                            "Rating": rating,
                            "Rating Count": rating_count,
                            "Selling Price": selling_price,
                            "MRP": mrp,
                            "Offer": offer})
                            #"Delivery Price: ", delivery_price})
            
                # Break out of the loop if the desired number of products is reached
                if len(data) == num_products:
                    #break
                    df = pd.DataFrame(data)
                    return df  # Immediately exits the entire function if condition is met

            # Click the "Next" button to go to the next page if the desired number of products isn't reached
            time.sleep(3)
            try:
                next_button = driver.find_element(By.XPATH, "//a[@class='s-pagination-item s-pagination-next s-pagination-button s-pagination-button-accessibility s-pagination-separator']")
                next_button.click()
                current_page += 1
                time.sleep(3)  
            except NoSuchElementException:
                print("No next page found. Ending scrape.")
                break
                
        df = pd.DataFrame(data)
        return df 
    
    finally:
        driver.quit()
    


In [9]:
scrape_products("Mens formal shirts", 50)


Scraping page 1
Number of products found on page 1: 70
Scraping product 1 on page 1
Scraping product 2 on page 1
Scraping product 3 on page 1
Scraping product 4 on page 1
Scraping product 5 on page 1
Scraping product 6 on page 1
Scraping product 7 on page 1
Scraping product 8 on page 1
Scraping product 9 on page 1
Scraping product 10 on page 1
Scraping product 11 on page 1
Scraping product 12 on page 1
Scraping product 13 on page 1
Scraping product 14 on page 1
Scraping product 15 on page 1
Scraping product 16 on page 1
Scraping product 17 on page 1
Scraping product 18 on page 1
Scraping product 19 on page 1
Scraping product 20 on page 1
Scraping product 21 on page 1
Scraping product 22 on page 1
Scraping product 23 on page 1
Scraping product 24 on page 1
Scraping product 25 on page 1
Scraping product 26 on page 1
Scraping product 27 on page 1
Scraping product 28 on page 1
Scraping product 29 on page 1
Scraping product 30 on page 1
Scraping product 31 on page 1
Scraping product 32 on 

Unnamed: 0,Brand Name,Product Name,Rating,Rating Count,Selling Price,MRP,Offer
0,Symbol Premium,Men's Wrinkle-Resistant Regular Fit Cotton For...,4.0 out of 5 stars,497,"₹1,699","₹2,599",(35% off)
1,Symbol Premium,Men's Cotton Non-Iron Formal Shirt (Regular Fi...,3.7 out of 5 stars,69,"₹1,899","₹3,999",(53% off)
2,Amazon Brand - Symbol,Men's Solid Cotton Formal Shirt | Plain | Full...,3.9 out of 5 stars,8650,₹599,"₹1,799",(67% off)
3,Amazon Brand - Symbol,Men's Solid Cotton Rich Formal Shirt | Plain |...,3.8 out of 5 stars,10351,₹549,"₹1,699",(68% off)
4,Generic,Stylish Men’s Luster Cotton Blend Shirt - Comf...,3.4 out of 5 stars,8,₹499,₹999,(50% off)
5,Peter England,Men's Everday Regular Fit Solid Shirt | Cotton...,3.9 out of 5 stars,1002,₹789,"₹1,399",(44% off)
6,Pinkmint,Mens Long Sleeve Button Down Shirt for Men Col...,3.7 out of 5 stars,732,₹379,"₹1,999",(81% off)
7,Peter England,Premium Wonder Fabric Slim Fit Full Sleeve Str...,4.1 out of 5 stars,47,"₹1,009","₹1,799",(44% off)
8,Generic,"Solid Full Sleeves Shirt, Wrinkle Free Mens Sh...",3.2 out of 5 stars,4,₹499,₹999,(50% off)
9,Arrow,Men's Regular Fit Shirt,4.0 out of 5 stars,333,"₹1,039","₹1,999",(48% off)


In [4]:
path = Service(r"F:/Data Science/Projects/Ecommerce-Chatbot-Project/chromedriver.exe")
        
# Initializing chrome_options
chrome_options = Options()
        
# opening the new chrome window with maximum size
chrome_options.add_argument("--start-maximized")
        
# timeout value for 10 seconds
chrome_options.add_argument('--timeout=15')
        
# Uncomment the line below if you'd like to scrape without a new Chrome window every time.
# chrome_options.add_argument('headless')

# initializing the driver
driver = webdriver.Chrome(service=path, options=chrome_options)
wait = WebDriverWait(driver, 10)        
#driver.set_window_size(1120, 1000)

# url of the website
url = "https://www.amazon.in/"

# connecting to the url
driver.get(url)

# wait time of 3 seconds for the website to load
time.sleep(3)

try:
    # bypass captcha
    link = driver.find_element(By.XPATH, "//div[@class = 'a-row a-text-center']//img").get_attribute("src")
    print("Captcha found bypassing it...")
    
    captcha = AmazonCaptcha.fromlink(link)
    captcha_value = AmazonCaptcha.solve(captcha)

    input_field = driver.find_element(By.ID, "captchacharacters")
    input_field.send_keys(captcha_value)

    continue_shopping = driver.find_element(By.CLASS_NAME, "a-button-text")
    continue_shopping.click()

except NoSuchElementException:
    print("No captcha found")

time.sleep(2)

# finding the search tab and entering search item 
search_tab = driver.find_element(By.XPATH, "/html/body/div[1]/header/div/div[1]/div[2]/div/form/div[2]/div[1]/div/input")
search_tab.send_keys("Mens formal shirt")

# clicking "ENTER" key to initiate search
#search_tab.send_keys(Keys.ENTER)

# finding the search(submit) button and clicking it
#search_button = driver.find_element(By.XPATH, "/html/body/div[1]/header/div/div[1]/div[2]/div/form/div[3]/div/span/input")
search_button = driver.find_element(By.XPATH, "//input[@id='nav-search-submit-button']")
search_button.click()

time.sleep(5)

# while True:
#     products = driver.find_elements(By.XPATH, "//div[@class='a-section a-spacing-base a-text-center'][1]")
#     print("Number of products found on page one: ", len(products))

# try:
#          #price = driver.find_element(By.CSS_SELECTOR, "a.a-link-normal span.a-offscreen").text 
#          price = driver.find_element(By.XPATH, "//span[@class='a-price']//span[@class='a-offscreen']").text      
# except NoSuchElementException:
#          price = "na"

print("hello")

#price = driver.find_element(By.XPATH, "//div[@class='a-row a-size-base a-color-base']//div[@class='a-row']//span[@class='a-price']//span[@class='a-offscreen']").text 
#price = driver.find_element(By.XPATH, "/html/body/div[1]/div[1]/div[1]/div[1]/div/span[1]/div[1]/div[3]/div/div/div/div/span/div/div/div[2]/div[4]/div[1]/div[1]/a/span/span[1]").text
#price = driver.find_element(By.XPATH, "//span[@class='a-price']//span[@class='a-offscreen']")
#mrp = driver.find_element(By.XPATH, "//span[@class='a-price a-text-price']//span[@class='a-offscreen']")
offer = driver.find_element(By.XPATH, "//div[@class='a-row']//span[contains(text(), '%')]").text
time.sleep(5) 
print(offer)
#print(mrp.get_attribute('textContent'))
print("hello")
    

time.sleep(5)
next_button = driver.find_element(By.XPATH, "//a[@class='s-pagination-item s-pagination-next s-pagination-button s-pagination-button-accessibility s-pagination-separator']")

time.sleep(5)
next_button.click()
# print(f"Number of prices found: {len(prices)}")
# if prices:
#     print("Price:", prices[0].text)
# else:
#     print("No price elements found.")




# prices = wait.until(EC.presence_of_all_elements_located(
#      (By.XPATH, "//span[@class='a-price']//span[@class='a-offscreen']")
#  ))

# try:
# #     print("Element found: ", prices[1].is_displayed())  # Check if element is visible
# #     print("Text content: ", prices[1].get_attribute('textContent'))  # Try getting text this way
# #     print("Inner HTML: ", prices[1].get_attribute('innerHTML'))  # See what's inside the element
#      print("Direct text: ", prices[1].text)  # Original attempt
# except Exception as e:
#     print(f"Error: {e}")

Captcha found bypassing it...
hello
(31% off)
hello


In [1]:
import os 
os.getcwd()

'f:\\Data Science\\Projects\\Ecommerce-Chatbot-Project\\Rough'

In [2]:
os.path.dirname(os.getcwd())

'f:\\Data Science\\Projects\\Ecommerce-Chatbot-Project'

In [3]:
from src.logger import logging

logging.info("Hello")

In [4]:
from src.logger import logging

In [5]:
from langchain_community.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path="../Data/data.csv")

data = loader.load()

print(data)

[Document(metadata={'source': '../Data/data.csv', 'row': 0}, page_content="Brand Name: Symbol Premium\nProduct Name: Men's Wrinkle-Resistant Regular Fit Cotton Formal Shirt\nRating: 4.0 out of 5 stars\nRating Count: 499\nSelling Price: ₹1,699\nMRP: ₹2,599\nOffer: (35% off)"), Document(metadata={'source': '../Data/data.csv', 'row': 1}, page_content="Brand Name: Symbol Premium\nProduct Name: Men's Cotton Non-Iron Formal Shirt (Regular Fit | Solid)\nRating: 3.7 out of 5 stars\nRating Count: 70\nSelling Price: ₹1,899\nMRP: ₹3,999\nOffer: (53% off)"), Document(metadata={'source': '../Data/data.csv', 'row': 2}, page_content="Brand Name: Amazon Brand - Symbol\nProduct Name: Men's Solid Cotton Rich Formal Shirt | Plain | Full Sleeve-Regular Fit (Available in Plus Size)\nRating: 3.8 out of 5 stars\nRating Count: 10,355\nSelling Price: ₹549\nMRP: ₹1,699\nOffer: (68% off)"), Document(metadata={'source': '../Data/data.csv', 'row': 3}, page_content="Brand Name: Amazon Brand - Symbol\nProduct Name: 

In [8]:
import pandas as pd 
df = pd.read_csv("../Data/data.csv")
df.columns

Index(['Brand Name', 'Product Name', 'Rating', 'Rating Count', 'Selling Price',
       'MRP', 'Offer'],
      dtype='object')

In [9]:
df.head()

Unnamed: 0,Brand Name,Product Name,Rating,Rating Count,Selling Price,MRP,Offer
0,Symbol Premium,Men's Wrinkle-Resistant Regular Fit Cotton For...,4.0 out of 5 stars,499,"₹1,699","₹2,599",(35% off)
1,Symbol Premium,Men's Cotton Non-Iron Formal Shirt (Regular Fi...,3.7 out of 5 stars,70,"₹1,899","₹3,999",(53% off)
2,Amazon Brand - Symbol,Men's Solid Cotton Rich Formal Shirt | Plain |...,3.8 out of 5 stars,10355,₹549,"₹1,699",(68% off)
3,Amazon Brand - Symbol,Men's Solid Cotton Formal Shirt | Plain | Full...,3.9 out of 5 stars,8650,₹599,"₹1,799",(67% off)
4,Generic,Stylish Men’s Luster Cotton Blend Shirt - Comf...,3.4 out of 5 stars,8,₹499,₹999,(50% off)


In [11]:
df.isna().sum()

Brand Name       0
Product Name     0
Rating           0
Rating Count     0
Selling Price    0
MRP              0
Offer            0
dtype: int64

In [12]:
# Function to format each row into a text chunk
def format_product(row):
    return (f"Brand: {row['Brand Name']}, Product: {row['Product Name']}, "
            f"Rating: {row['Rating']} ({row['Rating Count']} reviews), "
            f"Selling Price: {row['Selling Price']} "
            f"(MRP: {row['MRP']}, Offer: {row['Offer']})")

# Apply function to all rows
df["text_chunk"] = df.apply(format_product, axis=1)

# Save processed data for FAISS indexing
df[["text_chunk"]].to_csv("processed_data.csv", index=False)

In [2]:
import pandas as pd
df_processed = pd.read_csv('processed_data.csv')



In [3]:
df_processed.head()

Unnamed: 0,text_chunk
0,"Brand: Symbol Premium, Product: Men's Wrinkle-..."
1,"Brand: Symbol Premium, Product: Men's Cotton N..."
2,"Brand: Amazon Brand - Symbol, Product: Men's S..."
3,"Brand: Amazon Brand - Symbol, Product: Men's S..."
4,"Brand: Generic, Product: Stylish Men’s Luster ..."


In [1]:
from langchain_community.document_loaders.csv_loader import CSVLoader
loader = CSVLoader(file_path='../Data/data.csv',
                   csv_args={'delimiter': ',',
                             'quotechar': '"',
                            'fieldnames': ['Brand Name', 'Product Name', 'Rating', 'Rating Count', 'Selling Price', 'MRP', 'Offer'] })
                            
docs = loader.load()

In [2]:
docs

[Document(metadata={'source': '../Data/data.csv', 'row': 0}, page_content='Brand Name: Brand Name\nProduct Name: Product Name\nRating: Rating\nRating Count: Rating Count\nSelling Price: Selling Price\nMRP: MRP\nOffer: Offer'),
 Document(metadata={'source': '../Data/data.csv', 'row': 1}, page_content="Brand Name: Symbol Premium\nProduct Name: Men's Wrinkle-Resistant Regular Fit Cotton Formal Shirt\nRating: 4.0 out of 5 stars\nRating Count: 499\nSelling Price: ₹1,699\nMRP: ₹2,599\nOffer: (35% off)"),
 Document(metadata={'source': '../Data/data.csv', 'row': 2}, page_content="Brand Name: Symbol Premium\nProduct Name: Men's Cotton Non-Iron Formal Shirt (Regular Fit | Solid)\nRating: 3.7 out of 5 stars\nRating Count: 70\nSelling Price: ₹1,899\nMRP: ₹3,999\nOffer: (53% off)"),
 Document(metadata={'source': '../Data/data.csv', 'row': 3}, page_content="Brand Name: Amazon Brand - Symbol\nProduct Name: Men's Solid Cotton Rich Formal Shirt | Plain | Full Sleeve-Regular Fit (Available in Plus Size)

In [6]:
docs?

[1;31mType:[0m        list
[1;31mString form:[0m [Document(metadata={'source': '../Data/data.csv', 'row': 0}, page_content='Brand Name: Brand Name <...> ing: 3.7 out of 5 stars\nRating Count: 709\nSelling Price: ₹599\nMRP: ₹3,199\nOffer: (81% off)")]
[1;31mLength:[0m      431
[1;31mDocstring:[0m  
Built-in mutable sequence.

If no argument is given, the constructor creates a new empty list.
The argument must be an iterable if specified.

In [7]:
text = [d.page_content for d in docs]

In [11]:
text[1]

"Brand Name: Symbol Premium\nProduct Name: Men's Wrinkle-Resistant Regular Fit Cotton Formal Shirt\nRating: 4.0 out of 5 stars\nRating Count: 499\nSelling Price: ₹1,699\nMRP: ₹2,599\nOffer: (35% off)"

import os 
from dotenv import load_dotenv
load_dotenv()

from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings

client = NVIDIAEmbeddings(
  model="nvidia/nv-embedqa-mistral-7b-v2", 
  api_key=os.getenv("NVIDIA_API_KEY"),
  truncate="NONE", 
  )

embedding = client.embed_documents(text)
print(embedding)


In [1]:
from langchain_community.document_loaders.csv_loader import CSVLoader
loader = CSVLoader(file_path='../Data/data_watches.csv')
data = loader.load()

In [3]:
from langchain_community.document_loaders import DirectoryLoader, CSVLoader
loader = DirectoryLoader(path=r"F:\Data Science\Projects\Ecommerce-Chatbot-Project\Data",
                        glob="*.csv",
                        loader_cls=CSVLoader,
                        loader_kwargs={'csv_args': {'delimiter': ',', 'quotechar': '"'}})
docs = loader.load()

In [4]:
len(docs)

1281

In [13]:
docs

[Document(metadata={'source': 'F:\\Data Science\\Projects\\Ecommerce-Chatbot-Project\\Data\\data_sarees.csv', 'row': 0}, page_content="Brand Name: SGF11\nProduct Name: Women's Kanjivaram Pure Soft Silk Handloom Saree Pure Golden Zari With Blouse Piece\nRating: 4.2 out of 5 stars\nRating Count: 769\nSelling Price: ₹1,499\nMRP: ₹5,999\nOffer: (75% off)"),
 Document(metadata={'source': 'F:\\Data Science\\Projects\\Ecommerce-Chatbot-Project\\Data\\data_sarees.csv', 'row': 1}, page_content="Brand Name: SGF11\nProduct Name: Women's Kanjivaram Woven Soft Silk Saree With Blouse Piece\nRating: 4.0 out of 5 stars\nRating Count: 1,948\nSelling Price: ₹1,699\nMRP: ₹8,999\nOffer: (81% off)"),
 Document(metadata={'source': 'F:\\Data Science\\Projects\\Ecommerce-Chatbot-Project\\Data\\data_sarees.csv', 'row': 2}, page_content='Brand Name: Generic\nProduct Name: Traditional Art cotton Saree with Elephant and Peacock Print zari Border, Green and Gold, with Matching Blouse Piece with tyedye daying\nRati

In [3]:
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore

  from tqdm.autonotebook import tqdm


In [22]:
pc = Pinecone(api_key=)
index_name = "fun"
pc.create_index(name=index_name,
                 dimension = 4096,
                 metric="cosine",
                 spec=ServerlessSpec(cloud="aws",region="us-east-1"))




In [19]:
index 

<pinecone.data.index.Index at 0x2028ed33af0>

In [5]:
import os 
from dotenv import load_dotenv
load_dotenv()

from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings

embed = NVIDIAEmbeddings(
  model="nvidia/nv-embedqa-mistral-7b-v2", 
  api_key=os.getenv("NVIDIA_API_KEY"), 
  truncate="NONE", 
  )


In [26]:
vector_store = PineconeVectorStore.from_documents(documents=docs,
                                                              index_name=index_name, 
                                                              embedding = embed)

In [6]:
docsearch = PineconeVectorStore.from_existing_index(
    index_name="ecommerce-chatbot-project",
    embedding=embed)

In [10]:
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [9]:
llm = ChatGroq(temperature=0.6,
                           model_name="llama-3.3-70b-versatile",
                           groq_api_key=,
                           max_tokens=4096)


In [7]:
retriever = docsearch.as_retriever()

In [18]:
system_prompt = """You are a knowledgeable and friendly fashion consultant for a high-end e-commerce store. 
            Your store specializes in:
            - Men's clothing
            - Women's clothing
            - Watches for men 

            Guidelines for interaction:
            1. Be warm and professional in your responses
            2. Provide specific product recommendations based on the context
            3. Include relevant details about materials, styles, and pricing when available
            4. If asked about products we don't carry, politely explain our product range
            6. Always prioritize customer satisfaction while being honest about product availability

            Current context about our products and inventory:
            {context}
            
            Remember: Only provide information based on the context above. If certain details aren't available, 
            be honest and offer to help the customer find relevant alternatives or suggest contacting customer service for more specific information."""
        
prompt = ChatPromptTemplate.from_messages([("system", system_prompt),
                                            #essagesPlaceholder(variable_name="chat_history"),  # For maintaining conversation history
                                            ("human", "{input}")]) 

In [19]:
doc_chain = create_stuff_documents_chain(llm=llm, 
                                        prompt=prompt,
                                        output_parser=StrOutputParser(),
                                        document_variable_name="context")
            

retrieval_chain = create_retrieval_chain(retriever=retriever, 
                                                     combine_docs_chain=doc_chain)

In [20]:
retrieval_chain.invoke({"input": "what do you do?"})

{'input': 'what do you do?',
 'context': [Document(id='d7321768-3623-4c05-a4df-30d8fd7bc4c4', metadata={'row': 392.0, 'source': 'F:\\Data Science\\Projects\\Ecommerce-Chatbot-Project\\Data\\data_watches.csv'}, page_content="Brand Name: SKMEI\nProduct Name: Men's Watch New Wheels Rolling Creative Fashion Che Youhui League Fans Butterfly Double Snap Gift Wristwatch - 1787\nRating: 4.2 out of 5 stars\nRating Count: 1,055\nSelling Price: ₹1,799\nMRP: ₹6,999\nOffer: (74% off)"),
  Document(id='ec4d4f30-155b-4dea-b4a4-820ef2ff4eab', metadata={'row': 215.0, 'source': 'F:\\Data Science\\Projects\\Ecommerce-Chatbot-Project\\Data\\data_watches.csv'}, page_content="Brand Name: SKMEI\nProduct Name: Men's Watch New Wheels Rolling Creative Fashion Che Youhui League Fans Butterfly Double Snap Gift Wristwatch - 1787\nRating: 4.2 out of 5 stars\nRating Count: 1,055\nSelling Price: ₹1,799\nMRP: ₹6,999\nOffer: (74% off)"),
  Document(id='66f62082-5160-41b6-b5fc-26443f54be3f', metadata={'row': 139.0, 'sou