In [1]:
import os
import qdrant_client
from dotenv import load_dotenv
from langchain.vectorstores import Qdrant 
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain
from langchain.memory import ConversationSummaryBufferMemory

In [2]:
# load enviorment variables

load_dotenv()

QDRANT_URL = os.getenv('QDRANT_URL')
QDRANT_API_KEY = os.getenv('QDRANT_API_KEY')
COLLECTION_NAME = os.getenv('COLLECTION_NAME')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [3]:
print(COLLECTION_NAME)

big-basket-products-all


In [4]:
# create a qdrant client

client = qdrant_client.QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY
)


In [5]:
# create vector store

embeddings = OpenAIEmbeddings()

vector_store = Qdrant(
    client=client,
    collection_name=COLLECTION_NAME,
    embeddings=embeddings,
)

In [7]:
# load documents

file_path = os.getenv('FILE_PATH')

loader = CSVLoader(file_path=file_path, source_column=('rating'), csv_args={'delimiter': ','}, autodetect_encoding=True)
documents = loader.load()
# documents = documents[:100]
# docs = get_chunks(documents)

print(f"Extracted {file_path} with {len(documents)} pages...")
print(documents[0].page_content)
print(documents[0].metadata)
print(documents[0].metadata['row'])

docs = [doc.page_content for doc in documents]
metadatas = [doc.metadata for doc in documents]

Extracted data/bigBasketProducts.csv with 27555 pages...
index: 1
product: Garlic Oil - Vegetarian Capsule 500 mg
category: Beauty & Hygiene
sub_category: Hair Care
brand: Sri Sri Ayurveda
sale_price: 220
market_price: 220
type: Hair Oil & Serum
rating: 4.1
description: This Product contains Garlic Oil that is known to help proper digestion, maintain proper cholesterol levels, support cardiovascular and also build immunity.  For Beauty tips, tricks & more visit https://bigbasket.blog/
{'source': '4.1', 'row': 0}
0


In [12]:
import time
import tqdm
retries_dict = {}

for i in range(0, len(docs), 32):
    try:
        vector_store.add_texts(
            texts=docs[i:i+32],
            metadatas=metadatas[i:i+32],
            ids=tqdm.tqdm(range(i, i+32))
        )
    except Exception as e:
        print(i, e)
        i = i - 32
        retries_dict[i] = retries_dict.get(i, 0) + 1
        if retries_dict[i] > 5:
            print(f"Failed to add documents at index {i} after 3 retries. Skipping...")
            i += 32
            continue
        # Wait for 1 second
        time.sleep(1)

100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<00:00, 111199.44it/s]
100%|██████████| 32/32 [00:00<00:00, 32071.14it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<00:00, 64035.18it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<00:00, 2016.16it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it

In [104]:
# add documents to vector store

vector_store.add_texts(
    texts=docs,
    metadatas=metadatas,
)

['932983619c93414ba6a8014db9be3374',
 'a25b60a3bd774697b2362fe13f8dee0d',
 '611f4a57b9654682abde571b6261d47b',
 '4bfa496be0984c18b4bf4cf5913f0ee0',
 '1c6ca885a453436e807ee7ba8630a87c',
 '17e08483a0f84234adf4d7cab4dee68f',
 '053117af6a5c4d28ba9effa98fbdca7f',
 'a58636ed9d354a16bb3e4acbcaded887',
 'b251cff6c936445dbdc278dd03cde3f5',
 'cc9bf223dee9469dae9eb8d9e51a82d4',
 '549b0a34460e486d9d33772ad7fd4141',
 '9d937e5d1cbc411aa849f61cd3655f79',
 '4be0b3166286460ba215daf7b61527a9',
 '3e629f130fe8458ead9f4e784ffabc86',
 '8694dbbff9aa4a0bbd2e7d3521cedd98',
 '212a0d92a8154cdcb1cffee2862916f0',
 '46d973e4d04f477e991733016c33d474',
 'e7db011a1dd34c09a8d4f5048af5ba0a',
 '58c04c2ea10542b181c6321dd2271e5f',
 'a3084f51da604004af028975b64780e4',
 '506845d8753f4c70b93339ad89dcd7a3',
 '6ce9fc8935394325976d8aeadedcaf7a',
 '1cd79d73be084ef6925092e7bc5d7759',
 '53d1f00bf86b4e48b35c48042ceeb408',
 'a30bfae9c8024e13a04d358fa123826a',
 '9e74fb36f5644edda788debc22468f7a',
 '21e56ebfa3ee4f3096715ca9d690bcf1',
 

In [13]:
# plug vector store into retrieval chain

llm = OpenAI(openai_api_key=OPENAI_API_KEY)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(),
)

In [82]:
question = 'What is the best product for hair growth?'
print('>', question)
print(qa.run(question), end="\n\n")

> What is the best product for hair growth?
 It depends on the person and their hair type. Biotin & Collagen Volumizing Hair Shampoo + Biotin & Collagen Hair Conditioner by StBotanica and Argan-Liquid Gold Hair Spa by Aroma Treasures are both good options for strengthening and nourishing hair.



In [13]:
llm = OpenAI(openai_api_key=OPENAI_API_KEY)

qa = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(search_kwargs={'k':4}),
    return_source_documents=True,
    reduce_k_below_max_tokens=True
)

In [30]:
question = 'Tell me four products that can kill germs'
res = qa({"question": question})

In [34]:
print(res)

{'question': 'Tell me four products that can kill germs', 'answer': ' Antibacterial Germ Kill Spray (No Gas) by Lifebuoy, Germ Kill Spray by Nature Protect, Disinfectant Spray, and All Purpose Cleaners.\n', 'sources': '4.1, 4.2', 'source_documents': [Document(page_content='index: 1133\nproduct: Antibacterial Germ Kill Spray (No Gas) – Safe On Skin, Safe On Surfaces\ncategory: Beauty & Hygiene\nsub_category: Bath & Hand Wash\nbrand: Lifebuoy\nsale_price: 199\nmarket_price: 199\ntype: Hand Wash & Sanitizers\nrating: 4.1\ndescription: While we always remember to sanitize our hands, we often forget about germs on surfaces that are frequently touched. Mobile phones, tablets, doorknobs or toys that kids play with may also carry harmful bacteria & viruses. The World’s Number 1 Germ Protection Brand, Lifebuoy, brings to you its first-ever Germ Kill Spray. It instantly kills 99.9% of bacteria and viruses. Can be used as often as required and works without any water, a must-have at home or on th

In [42]:
import pandas as pd

srcs = [row.page_content for row in res['source_documents']]
print(srcs[0])
dicts = []
# for src in srcs:
key_value = srcs[0].split("\n")
print(key_value)
dict = {}
for v in key_value:
    aux = v.split(": ")
    try:
        dict[aux[0]] = aux[1]
    except:
        print(aux)
# dicts.append(dict)

# df = pd.DataFrame(dicts)
# # df.set_index('product', inplace=True)

# df1 = df[['product','brand', 'sale_price', 'rating', 'description']]

# # Remove duplicates
# df1 = df1.drop_duplicates()

index: 1133
product: Antibacterial Germ Kill Spray (No Gas) – Safe On Skin, Safe On Surfaces
category: Beauty & Hygiene
sub_category: Bath & Hand Wash
brand: Lifebuoy
sale_price: 199
market_price: 199
type: Hand Wash & Sanitizers
rating: 4.1
description: While we always remember to sanitize our hands, we often forget about germs on surfaces that are frequently touched. Mobile phones, tablets, doorknobs or toys that kids play with may also carry harmful bacteria & viruses. The World’s Number 1 Germ Protection Brand, Lifebuoy, brings to you its first-ever Germ Kill Spray. It instantly kills 99.9% of bacteria and viruses. Can be used as often as required and works without any water, a must-have at home or on the go. What’s even better – it is safe on the skin!
This antibacterial, no gas formula comes in a spill-proof pack, making it easy to carry and protect yourself at all times. Best to use it on frequently touched surfaces to kill the unseen bacteria and viruses. Just hold it upright, 

In [21]:
pretty_printed_dict = ""

for key, value in res.items():
    pretty_printed_dict += key + ": " + str(value) + "\n"

print(pretty_printed_dict)

question: What is the best product for hair growth?
answer:  USTRAA's Hair Growth Vitalizer is the best product for hair growth. 

sources: 4
source_documents: [Document(page_content="index: 19192\nproduct: Hair Growth Vitalizer\ncategory: Beauty & Hygiene\nsub_category: Men's Grooming\nbrand: USTRAA\nsale_price: 475\nmarket_price: 699\ntype: Hair Care & Styling\nrating: 4\ndescription: Hair Growth Vitalizer brings together some of the best quality ingredients that would support your hair growth, help stop hair fall and provide vital nutrition. This product is completely free of mineral oil, artificial growth enhancers or harmful chemicals. It takes at least 90 days of regular use to see a visible difference. Results might vary from person to person.", metadata={'row': 19191, 'source': '4'}), Document(page_content='index: 22138\nproduct: Hair Growth Vitalizer\ncategory: Beauty & Hygiene\nsub_category: Hair Care\nbrand: USTRAA\nsale_price: 475\nmarket_price: 699\ntype: Hair Care & Styli

In [29]:
src = res['source_documents'][1].page_content
print(src)

index: 22138
product: Hair Growth Vitalizer
category: Beauty & Hygiene
sub_category: Hair Care
brand: USTRAA
sale_price: 475
market_price: 699
type: Hair Care & Styling
rating: 4
description: Hair Growth Vitalizer brings together some of the best quality ingredients that would support your hair growth, help stop hair fall and provide vital nutrition. This product is completely free of mineral oil, artificial growth enhancers or harmful chemicals. It takes at least 90 days of regular use to see a visible difference. Results might vary from person to person.


In [122]:
key_value = src.split("\n")

dict = {}

for v in key_value:
    aux = v.split(": ")
    dict[aux[0]] = aux[1]

print(dict)

{'index': '9', 'product': 'Biotin & Collagen Volumizing Hair Shampoo + Biotin & Collagen Hair Conditioner', 'category': 'Beauty & Hygiene', 'sub_category': 'Hair Care', 'brand': 'StBotanica', 'sale_price': '1098', 'market_price': '1098', 'type': 'Shampoo & Conditioner', 'rating': '3.5', 'description': 'An exclusive blend with Vitamin B7 Biotin, Hydrolyzed collagen, Oat Extract along with premium & organic cold-pressed ingredients helps to infuse nutrients into every strand and creates the appearance of thicker, fuller healthier looking hair. This powerful formula helps volumize even the skinniest strands into fuller and more abundant looking locks. It is safe for color-treated hair and safe for all hair types. St Botanica Biotin & Collagen Hair Conditioner has been specially formulated to repair dry & damaged hair for full, thick, voluminous, shiny & healthy looking hair! The amazing hair conditioner ingredients include Biotin, Hydrolyzed Collagen, Pro-Vitamin B5, Vitamin E, & Hydrolyz

In [123]:
srcs = [row.page_content for row in res['source_documents']]

dicts = []
for src in srcs:
    key_value = src.split("\n")
    dict = {}
    for v in key_value:
        aux = v.split(": ")
        dict[aux[0]] = aux[1]
    dicts.append(dict)

In [133]:
import pandas as pd

df = pd.DataFrame(dicts)
# df.set_index('product', inplace=True)


df1 = df[['product','brand', 'sale_price', 'rating', 'description']]
display(df1)

Unnamed: 0,product,brand,sale_price,rating,description
0,Biotin & Collagen Volumizing Hair Shampoo + Bi...,StBotanica,1098.0,3.5,"An exclusive blend with Vitamin B7 Biotin, Hyd..."
1,Argan-Liquid Gold Hair Spa,Aroma Treasures,199.5,4.4,Our beautifully crafted Hair Spa Collection pr...


In [8]:
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer

class NeuralSearcher:
    def __init__(self, collection_name):
        self.collection_name = collection_name
        # Initialize encoder model
        self.model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
        # initialize Qdrant client
        self.qdrant_client = QdrantClient(
                url=QDRANT_URL,
                api_key=QDRANT_API_KEY
            ) 

    def search(self, text: str):
            # Convert text query into vector
            vector = self.model.encode(text).tolist()

            # Use `vector` for search for closest vectors in the collection
            search_result = self.qdrant_client.search(
                collection_name=self.collection_name,
                query_vector=vector,
                query_filter=None,  # If you don't want any filters for now
                limit=5  # 5 the most closest results is enough
            )
            # `search_result` contains found vector ids with similarity scores along with the stored payload
            # In this function you are interested in payload only
            payloads = [hit.payload for hit in search_result]
            return payloads

neural_searcher = NeuralSearcher(collection_name=COLLECTION_NAME)

In [11]:
res = neural_searcher.search("Any product?")
print(res)

[]


In [7]:
vectors_config = qdrant_client.http.models.VectorParams(
    size=1536,
    distance=qdrant_client.http.models.Distance.COSINE,
)

client.recreate_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=vectors_config,
    shard_number=4,
)

True

In [6]:
client.delete_collection(collection_name=COLLECTION_NAME)

True

In [9]:
import pandas

file_path = os.getenv('FILE_PATH')
df = pandas.read_csv(file_path)
metadatas = [{'source':int(df.loc[i][0]), 'row':i} for i in range(len(df))]
df = df.apply(lambda x: x.to_json(), axis=1)

print(df[0])

docs = [row for row in df]
# docs = docs[:60]

{"index":1,"product":"Garlic Oil - Vegetarian Capsule 500 mg","category":"Beauty & Hygiene","sub_category":"Hair Care","brand":"Sri Sri Ayurveda ","sale_price":220.0,"market_price":220.0,"type":"Hair Oil & Serum","rating":4.1,"description":"This Product contains Garlic Oil that is known to help proper digestion, maintain proper cholesterol levels, support cardiovascular and also build immunity.  For Beauty tips, tricks & more visit https:\/\/bigbasket.blog\/"}


In [10]:
import time
import tqdm
retries_dict = {}

for i in range(0, len(docs), 32):
    try:
        vector_store.add_texts(
            texts=docs[i:i+32],
            metadatas=metadatas[i:i+32],
            ids=tqdm.tqdm(range(i, i+32))
        )
    except Exception as e:
        print(i, e)
        i = i - 32
        retries_dict[i] = retries_dict.get(i, 0) + 1
        if retries_dict[i] > 5:
            print(f"Failed to add documents at index {i} after 3 retries. Skipping...")
            i += 32
            continue
        # Wait for 1 second
        time.sleep(1)

100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<00:00, 32109.50it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<00:00, 31812.69it/s]
100%|██████████| 32/32 [00:00<00:00, 31315.38it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<00:00, 30812.15it/s]
100%|██████████| 32/32 [00:00<00:00, 33329.46it/s]
100%|██████████| 32/32 [00:00<00:00, 25073.37it/s]
100%|██████████| 32/32 [00:00<00:00, 291144.75it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
10

18848 The read operation timed out


100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<00:00, 22028.18it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<00:00, 31926.20it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 32/32 [00:00<?, ?it/s]
100%|██████████| 3

In [40]:
vector_store.add_texts(
    texts=docs,
    metadatas=metadatas,
)

['63f3bc4ad7614689834d9ef45a264e72',
 'faf4b2d64d77412582c727affefe6834',
 'c7e17ac867aa45cca1aa08ec102abaca',
 'fa70dc6145b84d94b5da728f35fdcfd0',
 'a7b7281bc0484c349461e86fffc424ba',
 '13a1706929264123843066d2f07f69be',
 'a234de73636d45fc8a15979ecca0e1d9',
 'e1be9bae5f1b4c1294e3cb0d2940ebcc',
 '9a8d97fbdff14f7eb547f212108cd39b',
 '7f94ac00873e4e1db9c9bc6f2494585b',
 'b26b212ae83a4bb8959702b82949811c',
 '6344badf01974208a7a5a81006489eda',
 '91c95a84724f478891330d3e498e5772',
 'e1325a72c920402ab97288fd2d73f3d8',
 'bb1b8e3d53b54e5dbbb7f0b4422dd126',
 '6a086baf738543a0bc2abf303810c6a0',
 'a29876b3c31b47c58e06da62e627abc1',
 '5bec40b546ef42c3984d94ed3717c1ee',
 '34012440b2674291a0fceba8eeac7b51',
 'b2bfb499706b496c955cd96f1cb41f8e',
 '6bb55681897c4f58967f8b822bf5791d',
 '65d8bff72f1f42aa976178979dc8c827',
 '2226fa9f1a954e4a845f9164bdbb5d84',
 '706a3630eaf945ed98b3f8ee213fcf71',
 'd039c50ff93a4e74bdebe90e5189d58d',
 '25428008bf1943ad9bd950244ff76521',
 '7065ab60a70e4b3a81ca47c929aed416',
 

In [36]:
question = 'Suggest me some hair care products'
res = qa({"question": question})

In [48]:
import json
srcs = [json.loads(row.page_content) for row in res['source_documents']]

In [64]:
import pandas as pd
# Convert json to pandas dataframe
df = pandas.DataFrame(srcs)
df = df.fillna('null')
display(df)

Unnamed: 0,index,product,category,sub_category,brand,sale_price,market_price,type,rating,description
0,7407,Daily Use Hair Conditioner - For Men,Beauty & Hygiene,Men's Grooming,USTRAA,153.23,199.0,Bath & Shower,3.8,Using conditioner after shampoo keeps your hai...
1,16192,Hairfall Control Shampoo,Beauty & Hygiene,Hair Care,Aroma Magic,182.75,215.0,Shampoo & Conditioner,4.1,This Hairfall control shampoo is 100% free of ...
2,1413,Herbal Amla & Reetha Hair Cleanser,Beauty & Hygiene,Hair Care,Khadi Natural,160.0,160.0,Shampoo & Conditioner,4.5,This amla & reetha shampoo contains essential ...
3,23742,"Hair Care Kit - Oil, Shampoo, Conditioner & Serum",Beauty & Hygiene,Hair Care,Dr Batra's,651.0,1085.0,Shampoo & Conditioner,,Dr Batra’s Hair Care Kit gives you a daily dos...


In [6]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationChain, ConversationalRetrievalChain

In [39]:
# llm = OpenAI(openai_api_key=OPENAI_API_KEY)
llm = ChatOpenAI(temperature=0.0, model='gpt-3.5-turbo', api_key=OPENAI_API_KEY)
memory = ConversationSummaryBufferMemory(
    llm=llm,
    return_messages=True,
)

In [27]:
qa = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(search_kwargs={'k':2}),
    reduce_k_below_max_tokens=True,
    memory=memory,
    verbose=True
)

In [28]:
question = 'Hi my name is Abhishek'
res = qa({"question": question})
print(res['answer'])



[1m> Entering new RetrievalQAWithSourcesChain chain...[0m

[1m> Finished chain.[0m
There are two products mentioned: Abhayarishtam by Kerala Ayurveda and Abhayarishta by Dabur. Abhayarishtam is primarily made of 'Haritaki' and is known for its ability to remove diseases and make the body shine. Abhayarishta by Dabur is an ayurvedic formulation that helps treat stomach ailments such as constipation, indigestion, flatulence, and is also indicated for bleeding and non-bleeding piles.



In [30]:
question = 'what is my name?'
res = qa({"question": question})
print(res['answer'])



[1m> Entering new RetrievalQAWithSourcesChain chain...[0m

[1m> Finished chain.[0m
I don't know your name.



In [34]:
memory.load_memory_variables({})

{'history': []}

In [35]:
conversation = ConversationalRetrievalChain(
    llm=llm, 
    memory = memory,
    verbose=True
)

In [36]:
conversation.predict(input="Hi, my name is Andrew. There are two products mentioned: Abhayarishtam by Kerala Ayurveda and Abhayarishta by Dabur. Abhayarishtam is primarily made of 'Haritaki' and is known for its ability to remove diseases and make the body shine. Abhayarishta by Dabur is an ayurvedic formulation that helps treat stomach ailments such as constipation, indigestion, flatulence, and is also indicated for bleeding and non-bleeding piles.\n")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
[]
Human: Hi, my name is Andrew. There are two products mentioned: Abhayarishtam by Kerala Ayurveda and Abhayarishta by Dabur. Abhayarishtam is primarily made of 'Haritaki' and is known for its ability to remove diseases and make the body shine. Abhayarishta by Dabur is an ayurvedic formulation that helps treat stomach ailments such as constipation, indigestion, flatulence, and is also indicated for bleeding and non-bleeding piles.

AI:[0m

[1m> Finished chain.[0m


"Hello Andrew! It's nice to meet you. Thank you for providing information about the two products, Abhayarishtam by Kerala Ayurveda and Abhayarishta by Dabur. Abhayarishtam seems to be focused on the benefits of 'Haritaki' and its ability to remove diseases and enhance the body's radiance. On the other hand, Abhayarishta by Dabur is an ayurvedic formulation specifically designed to address stomach ailments like constipation, indigestion, flatulence, as well as bleeding and non-bleeding piles. If you have any more questions or if there's anything else I can assist you with, feel free to let me know!"

In [13]:
conversation.predict(input="What is 1+1?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
[HumanMessage(content='Hi, my name is Andrew'), AIMessage(content="Hello Andrew! It's nice to meet you. How can I assist you today?")]
Human: What is 1+1?
AI:[0m

[1m> Finished chain.[0m


'1+1 is equal to 2.'

In [37]:
conversation.predict(input="What is my name?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
[HumanMessage(content="Hi, my name is Andrew. There are two products mentioned: Abhayarishtam by Kerala Ayurveda and Abhayarishta by Dabur. Abhayarishtam is primarily made of 'Haritaki' and is known for its ability to remove diseases and make the body shine. Abhayarishta by Dabur is an ayurvedic formulation that helps treat stomach ailments such as constipation, indigestion, flatulence, and is also indicated for bleeding and non-bleeding piles.\n"), AIMessage(content="Hello Andrew! It's nice to meet you. Thank you for providing information about the two products, Abhayarishtam by Kerala Ayurveda and Abhayarishta by Dabur. Abhayarishtam seems to be 

'Your name is Andrew.'

In [14]:
from langchain.chains import (
    StuffDocumentsChain, LLMChain, ConversationalRetrievalChain
)
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI

# combine_docs_chain = StuffDocumentsChain()
retriever = vector_store.as_retriever(search_kwargs={'k':2})

# This controls how the standalone question is generated.
# Should take `chat_history` and `question` as input variables.
# template = (
#     "Combine the chat history and follow up question into "
#     "a standalone question. Chat History: {chat_history}"
#     "Follow up question: {question}"
# )
# prompt = PromptTemplate.from_template(template)
# llm = OpenAI()
# question_generator_chain = LLMChain(llm=llm, prompt=prompt)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, input_key="question", output_key="answer")
# chain = ConversationalRetrievalChain(
#     combine_docs_chain=combine_docs_chain,
#     retriever=retriever,
#     question_generator=question_generator_chain,
# )
qa = ConversationalRetrievalChain.from_llm(ChatOpenAI(max_tokens=512, model="gpt-3.5-turbo"), retriever=retriever, return_source_documents=True, memory=memory)

In [13]:
question = 'Hi my name is Abhishek'
res = qa({"question": question})
print(res['answer'])

ValueError: Missing some input keys: {'chat_history'}

In [46]:
question = 'Suggest me some hair care products'
res = qa({"question": question})
print(res['answer'])

Yes, I can recommend two hair care products for you:

1. Daily Use Hair Conditioner - For Men by USTRAA: This conditioner contains Vitamin E, Water Hyssop (Brahmi), wheat gram, and Evening Primrose. It helps to increase blood flow to the scalp, strengthen hair follicles, check split ends, and promote healthy hair growth. It is suitable for daily use.

2. Silky Strength Aloevera-Wheatgerm Moisturising Shampoo by Just Herbs: This shampoo is specially formulated to moisturize and nourish the scalp while effectively cleansing the hair without stripping off its natural oils. It contains Aloe Vera, wheat germ oil, lecithin, hibiscus, and vetiver. It strengthens the roots, fights free radicals, repairs damaged hair, and promotes growth of shiny black hair.

Please note that these recommendations are based on the provided descriptions and ratings of the products. It's always a good idea to read reviews and consider your specific hair care needs before making a purchase.


In [19]:
memory.load_memory_variables({})

{'chat_history': [HumanMessage(content='Hey! My name is Abhishek'),
  AIMessage(content='Hello Abhishek! How can I assist you today?'),
  HumanMessage(content='What is my name?'),
  AIMessage(content="I'm sorry, but I don't have access to your personal information, so I don't know your name."),
  HumanMessage(content='I have 3 apples'),
  AIMessage(content='Based on the given context, it is not mentioned how many apples you have.'),
  HumanMessage(content='I have 3 apples'),
  AIMessage(content='Based on the given context, it is not mentioned how many apples you have.')]}

In [18]:
question = 'I have 3 apples'
res = qa({"question": question})
print(res['answer'])

Based on the given context, it is not mentioned how many apples you have.


In [20]:
question = 'How many apples do I have?'
res = qa({"question": question})
print(res['answer'])

Based on the given context, it is not possible to determine the number of apples you have. The context only provides information about the importance and versatility of apples, but does not mention the quantity.


In [33]:
from langchain.memory import ConversationSummaryMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

llm = ChatOpenAI(model_name='gpt-3.5-turbo')

memory = ConversationSummaryMemory(
    llm = llm,
    memory_key='chat_history',
    return_messages=True,
    output_key='answer'
)

chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    return_source_documents=True,
    get_chat_history=lambda h :h,
)

In [34]:
chain.verbose = True
chain.combine_docs_chain.verbose = True
chain.combine_docs_chain.llm_chain.verbose = True
chain.combine_docs_chain.llm_chain.llm.verbose = True
chain.question_generator.verbose = True
chain.question_generator.llm.verbose = True

In [39]:
memory.chat_memory.add_user_message("my name is David, what is your name?")
memory.chat_memory.add_ai_message("Hello, David! My name is Roboto. How can I help you today?")

In [42]:
memory.load_memory_variables({})

{'chat_history': [SystemMessage(content='The human introduces themselves as Abhishek. The AI responds that it does not have a personal name as it is an AI language model.')]}

In [41]:
res = chain({'question':'Hi my name is Abhishek'})



[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
[SystemMessage(content='')]
Follow Up Input: Hi my name is Abhishek
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
{"index":17598,"product":"Single Line Soft Bound Brown Note Book 18x24 Cm","category":"Cleaning & Household","sub_category":"Stationery","brand":"Navneet Youva","sale_price":48.0,"market_price":50.0,"type":"Notebooks, Files, Folders","rating":4.0,"descrip

In [43]:
res = chain({'question':'What is my name?'})



[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
[SystemMessage(content='The human introduces themselves as Abhishek. The AI responds that it does not have a personal name as it is an AI language model.')]
Follow Up Input: What is my name?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
{"index":9625,"product":"My Life For Him - Eau De Parfum Natural Spray","category":"Beauty & Hygiene","sub_category":"Fragrances & D

In [None]:
from langchain import PromptTemplate

# note that the input variables ('question', etc) are defaults, and can be changed

condense_prompt = PromptTemplate.from_template(
    ('Do X with user input ({question}), and do Y with chat history ({chat_history}).')
)

combine_docs_custom_prompt = PromptTemplate.from_template(
    ('Write a haiku about a dolphin.\n\n'
     'Completely ignore any context, such as {context}, or the question ({question}).')
)

In [None]:
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

chain = ConversationalRetrievalChain.from_llm(
    OpenAI(temperature=0), 
    vector_store.as_retriever(), # see below for vectorstore definition
    memory=memory,
    condense_question_prompt=condense_prompt,
    combine_docs_chain_kwargs=dict(prompt=combine_docs_custom_prompt)
)

In [6]:
from langchain.memory import ConversationSummaryMemory
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate
from langchain.chains import RetrievalQAWithSourcesChain
from pprint import PrettyPrinter as pp

prompt_template = '''
You are a Product Recommendation Agent who gets his context from the retrieved descriptions of the products that matches best with the User's query.
Answer my questions based on your knowledge and our older conversation. Do not make up answers.
If you do not know the answer to a question, just say "I don't know" in a polite manner.

{context}

Given the following conversation and a follow up question, answer the question.

{chat_history}

question: {question}
'''

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "chat_history", "question"]
)

llm = ChatOpenAI(model_name='gpt-3.5-turbo')

memory = ConversationSummaryMemory(
    llm=llm,
    memory_key="chat_history",
    return_messages=True,
    output_key='answer'
)
# this time it was required to specify output_key in memory.

retriever = vector_store.as_retriever(search_kwargs={'k':2})

chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    memory=memory,
    return_source_documents=True,
    verbose=True
)

# testing the responses
# pp.pprint(chain(PROMPT.format(question=q1, chat_history=memory.chat_memory.messages, context='')))

# pp.pprint(chain(PROMPT.format(question=q2, chat_history=memory.chat_memory.messages, context='')))

In [8]:
q1 = 'Hey! My name is Abhishek. Suggesst me some hair care products'
res = chain(PROMPT.format(question=q1, chat_history=memory.chat_memory.messages, context=''))



[1m> Entering new RetrievalQAWithSourcesChain chain...[0m

[1m> Finished chain.[0m


In [7]:
memory.load_memory_variables({})

{'chat_history': [SystemMessage(content='')]}

In [11]:
q2 = "What is the name of the product you mentioned earlier?"
res = chain(PROMPT.format(question=q2, chat_history=memory.chat_memory.messages, context=''))



[1m> Entering new RetrievalQAWithSourcesChain chain...[0m

[1m> Finished chain.[0m


In [13]:
print(memory.load_memory_variables({})['chat_history'])

[SystemMessage(content='The AI is a Product Recommendation Agent that provides suggestions for hair care products. The AI is asked to recommend hair care products and provides a specific recommendation for Amla Hair Oil - Sarson by Dabur Amla. When asked about the name of the product mentioned earlier, the AI correctly identifies it as "Amla Hair Oil - Sarson by Dabur Amla".')]


In [9]:
print(res['answer'])

Based on your query for hair care products, I recommend the following options:
1. Amla Hair Oil - Sarson by Dabur Amla. It has a rating of 4.5 and is described as non-sticky. The sale price is $135.0 (market price $150.0). [


In [10]:
print(res['source_documents'])

[Document(page_content='{"index":15395,"product":"Amla Hair Oil - Sarson","category":"Beauty & Hygiene","sub_category":"Hair Care","brand":"Dabur Amla","sale_price":135.0,"market_price":150.0,"type":"Hair Oil & Serum","rating":4.5,"description":"Don\'t we all love those days when we\'re at home, with no thoughts buzzing in our wired brains, enjoying a nice oil massage by a good old Tai teleporting you back to such beloved moments? Dabur Sarson Amla Hair Oil comes in with its non-sticky charm."}', metadata={'row': 15394, 'source': 15395}), Document(page_content='{"index":9030,"product":"Sarson Amla Hair Oil","category":"Beauty & Hygiene","sub_category":"Hair Care","brand":"Dabur Amla","sale_price":50.0,"market_price":50.0,"type":"Hair Oil & Serum","rating":4.0,"description":"Don\'t we all love those days when we are at home, with no thoughts buzzing in our wired brains, enjoying a nice oil massage by your good old Tai teleporting you back to such beloved moments? Dabur Sarson Amla Hair 

In [12]:
print(res['answer'])

The name of the product mentioned earlier is "Amla Hair Oil - Sarson by Dabur Amla".

