### Import Dependencies

In [1]:
import os
import pandas as pd
from dotenv import load_env
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_groq import ChatGroq
from langchain_astradb import AstraDBVectorStore

### Load Dataset

In [2]:
df = pd.read_csv('data/flipkart_reviews.csv')

In [3]:
df.head()

Unnamed: 0,product_id,product_title,rating,summary,review
0,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Terrific purchase,1-more flexible2-bass is very high3-sound clar...
1,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Terrific purchase,Super sound and good looking I like that prize
2,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Super!,Very much satisfied with the device at this pr...
3,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Super!,"Nice headphone, bass was very good and sound i..."
4,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version ...,5,Terrific purchase,Sound quality super battery backup super quali...


### Feature Selection

In [4]:
data = df[['product_title', 'review']]

In [5]:
data.head()

Unnamed: 0,product_title,review
0,BoAt Rockerz 235v2 with ASAP charging Version ...,1-more flexible2-bass is very high3-sound clar...
1,BoAt Rockerz 235v2 with ASAP charging Version ...,Super sound and good looking I like that prize
2,BoAt Rockerz 235v2 with ASAP charging Version ...,Very much satisfied with the device at this pr...
3,BoAt Rockerz 235v2 with ASAP charging Version ...,"Nice headphone, bass was very good and sound i..."
4,BoAt Rockerz 235v2 with ASAP charging Version ...,Sound quality super battery backup super quali...


### Convert Data to Documents

In [6]:
product_list = []

for index, row in data.iterrows():
    object = {
        'product_name' : row['product_title'],
        'review'       : row['review']
    }
    product_list.append(object)

In [7]:
product_list[0]

{'product_name': 'BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset',
 'review': "1-more flexible2-bass is very high3-sound clarity is good 4-battery back up to 6 to 8 hour's 5-main thing is fastest charging system is available in that. Only 20 min charge and get long up to 4 hours back up 6-killing look awesome 7-for gaming that product does not support 100% if you want for gaming then I'll recommend you please don't buy but you want for only music then this product is very well for you.. 8-no more wireless headphones are comparing with that headphones at this pric..."}

In [8]:
len(product_list)

450

In [9]:
docs = []

for obj in product_list:
    metadata = {'product': obj['product_name']}
    page_content = obj['review']

    doc = Document(page_content=page_content, metadata=metadata)
    docs.append(doc)

In [10]:
docs[0]

Document(metadata={'product': 'BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset'}, page_content="1-more flexible2-bass is very high3-sound clarity is good 4-battery back up to 6 to 8 hour's 5-main thing is fastest charging system is available in that. Only 20 min charge and get long up to 4 hours back up 6-killing look awesome 7-for gaming that product does not support 100% if you want for gaming then I'll recommend you please don't buy but you want for only music then this product is very well for you.. 8-no more wireless headphones are comparing with that headphones at this pric...")

In [11]:
len(docs)

450

### Loading API KEYS

In [12]:
load_env()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
ASTRA_DB_API_ENDPOINT = os.getenv("ASTRA_DB_API_ENDPOINT")
ASTRA_DB_APPLICATION_TOKEN = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
ASTRA_DB_KEYSPACE = os.getenv("ASTRA_DB_KEYSPACE")
OPENAI_API_KEY= os.getenv("OPENAI_API_KEY")

### Store Database

In [13]:
embedds = OpenAIEmbeddings(model="text-embedding-3-large", api_key=OPENAI_API_KEY)

In [14]:
vector_store = AstraDBVectorStore(
    collection_name="flipkart",
    api_endpoint=ASTRA_DB_API_ENDPOINT,
    token=ASTRA_DB_APPLICATION_TOKEN,
    namespace=ASTRA_DB_KEYSPACE,
    embedding=embedds
)

In [15]:
insert_ids = vector_store.add_documents(docs)

### Setting up LLM, Prompt, RAG and Retrievers

In [16]:
model = ChatGroq(groq_api_key=GROQ_API_KEY, model="llama-3.1-70b-versatile", temperature=0.5)

In [17]:
retriever = vector_store.as_retriever(search_kwargs={'k': 3})

In [18]:
retriever_prompt = ("Given a chat history and latest user question which might reference context in the chat history,"
    "formulate a standalone question which can understood without the chat history."
    "Do Not answer the question, just reformulate it if needed and otherwise return it as is."
)

In [19]:
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", retriever_prompt),
        MessagesPlaceholder(variable_name='chat_history'),
        ("human", "{input}")
    ]
)

In [20]:
history_aware_retriever = create_history_aware_retriever(
    llm=model, retriever=retriever, prompt=contextualize_q_prompt
)

In [21]:
PRODUCT_BOT_TEMPLATE = """
    You are an Ecommerce bot, Expert in product recommendation and customer quries.
    You will analyze product titles and reviews to provide accurate and helpfull response.
    Ensure your answers are relevant to the product context and refrain from straying off-topics.
    Your responses should be consise and informative.

    CONTEXT: {context}

    QUESTION: {input}

    YOUR ANSWER:
"""

In [22]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", PRODUCT_BOT_TEMPLATE),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}")
    ]
)

In [23]:
question_answer_chain = create_stuff_documents_chain(llm=model, prompt=qa_prompt)

In [24]:
chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [25]:
chat_history = []

store = {}

In [26]:
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [27]:
chat_with_memory = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer"
)

In [28]:
chat_with_memory.invoke(
    {'input': "Can you tell me the best bluetooth buds?"},
    config={"configurable": {"session_id": "sharan"}}
)["answer"]

'Based on the reviews, I would recommend the "Realme Buds" as the best Bluetooth buds. They have excellent sound and bass quality, a long-lasting battery, and a good build quality. Additionally, they have features like magnetic standby, Google assistant, and voice cancellation while speaking. Many reviewers have compared them to other popular brands like Boat and Portronics and found them to be superior in terms of sound quality and build.'