# Ecommerce Chatbot

In [1]:
# Install requirements
!pip3 install -r requirements.txt



## Pre-processing of synthetic product dataset

In [2]:
import pandas as pd

# Maximum text length
max_str_len = 1024
auto_truncate = lambda val: val[:max_str_len]

df = pd.read_csv(
    "product_data.csv", 
    converters={
        'bullet_point': auto_truncate,
        'item_keywords': auto_truncate,
        'item_name': auto_truncate
    }
)

In [3]:
import pandas as pd

# Combine 'item_id' and 'domain_name' to form a unique identifier 'primary_key'
df['primary_key'] = df['item_id'].str.cat(df['domain_name'], sep='-')

# Handle missing data: Convert empty strings in 'item_keywords' to NaN and then drop those rows
df['item_keywords'].replace('', pd.NaT, inplace=True)
df.dropna(subset=['item_keywords'], inplace=True)

# Re-index the dataframe to maintain continuity after dropping rows
df.reset_index(drop=True, inplace=True)

# Display the first few rows of the dataframe
df.head()


Unnamed: 0,item_id,marketplace,country,main_image_id,domain_name,bullet_point,item_keywords,material,brand,color,item_name,model_name,model_number,product_type,primary_key
0,B07T6RZ2CM,samsclub,IN,71dZhpsferL,samsclub.in,3D Printed Hard Back Case Mobile Cover for Len...,mobile cover back cover mobile case phone case...,,samsclub Brand - Solimo,Others,samsclub Brand - Solimo Designer Couples Sitti...,Lenovo K4 Note,gz8115-SL40423,CELLULAR_PHONE_CASE,B07T6RZ2CM-samsclub.in
1,B07T2JY31Y,samsclub,IN,71vX7qIEAIL,samsclub.in,3D Printed Hard Back Case Mobile Cover for Son...,mobile cover back cover mobile case phone case...,Wood,samsclub Brand - Solimo,others,samsclub Brand - Solimo Designer Leaf on Wood ...,Sony Xperia Z1 L39H,gz8056-SL40528,CELLULAR_PHONE_CASE,B07T2JY31Y-samsclub.in
2,B0849YGSCZ,samsclub,AE,A1EZF-2mB5L,samsclub.ae,,small de fur rooms navidad woven girls shag pa...,,Stone & Beam,,Stone & Beam Contemporary Doily Wool Farmhouse...,,I59I8044IVYGRYC00-Parent,HOME_FURNITURE_AND_DECOR,B0849YGSCZ-samsclub.ae
3,B081K6TCML,samsclub,IN,81o9EyZ-fAL,samsclub.in,Solimo Plastic Multipurpose Modular Drawer; sm...,drawer modular drawer 3 rack modular drawer ki...,Plastic,samsclub Brand - Solimo,Multicolor,samsclub Brand - Solimo Plastic Multipurpose M...,,sol_cujo_13,HOME,B081K6TCML-samsclub.in
4,B0854774X5,samsclub,IN,81xaJCVnl3L,samsclub.in,"Snug fit for Nokia 8.1, with perfect cut-outs ...",Back Cover Designer Case Designer Take It Easy...,Silicon,samsclub Brand - Solimo,Multicolor,samsclub Brand - Solimo Designer Take It Easy ...,Nokia 8.1,UV10714-SL40617,CELLULAR_PHONE_CASE,B0854774X5-samsclub.in


In [4]:
# Num products to use (subset)
num_products = 5000  
product_metadata = ( 
    df
     .head(num_products)
     .to_dict(orient='index')
)

In [5]:
# Check one of the products
product_metadata[0]

{'item_id': 'B07T6RZ2CM',
 'marketplace': 'samsclub',
 'country': 'IN',
 'main_image_id': '71dZhpsferL',
 'domain_name': 'samsclub.in',
 'bullet_point': '3D Printed Hard Back Case Mobile Cover for Lenovo K4 Note Easy to put & take off with perfect cutouts for volume buttons, audio & charging ports. Stylish design and appearance, express your unique personality. Extreme precision design allows easy access to all buttons and ports while featuring raised bezel to life screen and camera off flat surface. Slim Hard Back Cover No Warranty None',
 'item_keywords': 'mobile cover back cover mobile case phone case mobile panel phone panel Lenovo mobile case Lenovo phone cover Lenovo back case hard case 3D printed mobile cover mobile cover back cover mobile case phone case mobile panel phone panel Lenovo mobile case Lenovo phone cover Lenovo back case hard case 3D printed mobile cover mobile cover back cover mobile case phone case mobile panel phone panel Lenovo mobile case Lenovo phone cover Len

## Create a vector db to store the information of products using Redis

In [6]:
# Import necessary modules
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.redis import Redis as RedisVectorStore

# Extract item_names from the product_metadata to be used for generating embeddings
texts = [metadata['item_name'] for metadata in product_metadata.items()]

# Prepare metadatas which will be stored alongside the vectors
metadatas = list(product_metadata.values())

# Use OpenAIEmbeddings as the embeddings provider
embeddings_provider = OpenAIEmbeddings()

# Define the name for the Redis search index
redis_index_name = "products"

# Define the Redis URL, assuming a Redis server running on a Docker compose network
redis_server_url = "redis://redis:6379"

# Initialize a RedisVectorStore and load documents (embeddings and metadatas) into Redis
vectorstore = RedisVectorStore.from_texts(
    texts=texts,
    metadatas=metadatas,
    embedding=embeddings_provider,
    index_name=redis_index_name,
    redis_url=redis_server_url
)


AuthenticationError: <empty message>

## ChatBot with ConversationalRetrieverChain

In [7]:
from langchain.callbacks.base import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import (
    ConversationalRetrievalChain,
    LLMChain
)
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.prompts.prompt import PromptTemplate

template = """Given the following chat history and a follow up question, rephrase the follow up input question to be a standalone question.
Or end the conversation if it seems like it's done.

Chat History:\"""
{chat_history}
\"""

Follow Up Input: \"""
{question}
\"""

Standalone question:"""

condense_question_prompt = PromptTemplate.from_template(template)

template = """You are a friendly, conversational retail shopping assistant designed to be able to assist with a wide range of tasks ranging from answering simple questions to providing in-depth explanations and discussions on a wide range of topics related to retail shopping.

As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.

Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. 

Overall, Assistant is a powerful tool that can help with a wide range of tasks related to shopping and provide valuable insights and information on a wide range of products. Whether you need help with a specific question or just want to have a conversation about shopping plans, trade offs between different prodcuts, Assistant is here to assis

Use the following context including product names, descriptions, and keywords to show the shopper whats available, help find what they want

Context:\"""
{context}
\"""

Question:\"
\"""

Helpful Answer:"""

qa_prompt= PromptTemplate.from_template(template)


# define two LLM models from OpenAI
llm = OpenAI(temperature=0)

streaming_llm = OpenAI(
    streaming=True,
    callback_manager=CallbackManager([
        StreamingStdOutCallbackHandler()]),
    verbose=True,
    temperature=0.2,
    max_tokens=150
)

# use the LLM Chain to create a question creation chain
question_generator = LLMChain(
    llm=llm,
    prompt=condense_question_prompt
)

# use the streaming LLM to create a question answering chain
doc_chain = load_qa_chain(
    llm=streaming_llm,
    chain_type="stuff",
    prompt=qa_prompt
)


chatbot = ConversationalRetrievalChain(
    retriever=vectorstore.as_retriever(),
    combine_docs_chain=doc_chain,
    question_generator=question_generator
)

In [8]:
import json

from langchain.schema import BaseRetriever
from langchain.vectorstores import VectorStore
from langchain.schema import Document
from pydantic import BaseModel

class RedisProductRetriever(BaseRetriever, BaseModel):
    # Define vectorstore as an attribute of the retriever
    vectorstore: VectorStore

    class Config:
        # Allow arbitrary types
        arbitrary_types_allowed = True

    def combine_metadata(self, doc) -> str:
        """Combine metadata of a document into a single string."""
        metadata = doc.metadata
        return (
            "Item Name: " + metadata["item_name"] + ". " +
            "Item Description: " + metadata["bullet_point"] + ". " +
            "Item Keywords: " + metadata["item_keywords"] + "."
        )

    def get_relevant_documents(self, query):
        """Retrieve relevant documents based on the given query."""
        # Initialize a list to hold the documents
        docs = []
        
        # Iterate over the documents retrieved based on the query
        for doc in self.vectorstore.similarity_search(query):
            # Combine the metadata of each document
            content = self.combine_metadata(doc)
            
            # Append each document to the list of documents
            docs.append(Document(
                page_content=content,
                metadata=doc.metadata
            ))
        return docs


### Setup ChatBot with new retriever

In [9]:
redis_product_retriever = RedisProductRetriever(vectorstore=vectorstore)

chatbot = ConversationalRetrievalChain(
    retriever=redis_product_retriever,
    combine_docs_chain=doc_chain,
    question_generator=question_generator
)

### Retry

In [None]:
# create a chat history buffer
chat_history = []

# gather user input for the first question to kick off the bot
question = input("Hello! What are you looking for today?")

# keep the bot running in a loop to simulate a conversation
while True:
    result = chatbot(
        {"question": question, "chat_history": chat_history}
    )
    print("\n")
    chat_history.append((result["question"], result["answer"]))
    question = input()

Hello! What are you looking for today? i am looking for eggs




No problem! We have a great selection of eggs for you to choose from. We have Everyday Value, Cage-Free Non-GMO Large Brown Grade A Eggs, 12 ct., Everyday Value Organic Brown Large Grade A Eggs, 12 CT., Everyday Value, Organic Large Brown Grade A Eggs, 12 ct., and Everyday Value Brown Medium Grade A Eggs, 18 CT. All of these eggs are raised naturally without antibiotics and vegetarian feed with no animal by-products. They also meet non-GMO project standards. Let me know if you have any questions about any of these products and I'd be happy to help!



 i want bread




No problem! We have a wide selection of products to choose from at sams club. For example, we have the Bun Brioche Hamburger Wheat, 10.6 Ounce which are made-in-France, amazingly rich and tender, seriously-special hamburger buns. We also have the Limited Edition Bread & Muffin Mix, Spiced Pumpkin (Made With Real Pumpkin), 18.3 Ounce which is vegan, vegetarian, certified kosher, and non-GMO project verified. We also have the Pretzel Birthday Cake, 5 Ounce which is a candy snack made with crunchy pretzels enrobed in white chocolate and rainbow sprinkles. Lastly, we have the Organic Honey Graham Cr



 create a shopping list for a banana bread recipe




Thanks for your question! We have a great selection of products related to retail shopping that can help you find what you're looking for. We have the SamsClub Kitchen Tofu Banh Mi Sandwich, 8.9 oz., which is a vegetarian sandwich with pickled carrots and daikon, jalapeños, cilantro, and lemongrass aioli on a baguette. We also have the Everyday Value Organic Instant Pudding, Strawberry Guava, 3.7 oz., which is a limited edition Pacific Rim Fruit Collection with a naturally flavored, certified organic, certified kosher, certified vegan, and certified vegetarian pudding. And lastly, we have the Everyday Value Bread Crumbs, Whole Wheat,

