In [105]:
import os
import qdrant_client
from dotenv import load_dotenv
from langchain.vectorstores import Qdrant 
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain

In [106]:
# load enviorment variables

load_dotenv()

QDRANT_URL = os.getenv('QDRANT_URL')
QDRANT_API_KEY = os.getenv('QDRANT_API_KEY')
COLLECTION_NAME = os.getenv('COLLECTION_NAME')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [107]:
print(COLLECTION_NAME)

big-basket-products


In [108]:
# create a qdrant client

client = qdrant_client.QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY
)


In [109]:
# create vector store

embeddings = OpenAIEmbeddings()

vector_store = Qdrant(
    client=client,
    collection_name=COLLECTION_NAME,
    embeddings=embeddings,
)

In [103]:
# load documents

file_path = os.getenv('FILE_PATH')

loader = CSVLoader(file_path=file_path, source_column=('rating'), csv_args={'delimiter': ','}, autodetect_encoding=True)
documents = loader.load()
documents = documents[:100]
# docs = get_chunks(documents)

print(f"Extracted {file_path} with {len(documents)} pages...")
print(documents[0].page_content)
print(documents[0].metadata)
print(documents[0].metadata['row'])

docs = [doc.page_content for doc in documents]
metadatas = [doc.metadata for doc in documents]

Extracted data/bigBasketProducts.csv with 100 pages...
index: 1
product: Garlic Oil - Vegetarian Capsule 500 mg
category: Beauty & Hygiene
sub_category: Hair Care
brand: Sri Sri Ayurveda
sale_price: 220
market_price: 220
type: Hair Oil & Serum
rating: 4.1
description: This Product contains Garlic Oil that is known to help proper digestion, maintain proper cholesterol levels, support cardiovascular and also build immunity.  For Beauty tips, tricks & more visit https://bigbasket.blog/
{'source': '4.1', 'row': 0}
0


In [104]:
# add documents to vector store

vector_store.add_texts(
    texts=docs,
    metadatas=metadatas,
)

['932983619c93414ba6a8014db9be3374',
 'a25b60a3bd774697b2362fe13f8dee0d',
 '611f4a57b9654682abde571b6261d47b',
 '4bfa496be0984c18b4bf4cf5913f0ee0',
 '1c6ca885a453436e807ee7ba8630a87c',
 '17e08483a0f84234adf4d7cab4dee68f',
 '053117af6a5c4d28ba9effa98fbdca7f',
 'a58636ed9d354a16bb3e4acbcaded887',
 'b251cff6c936445dbdc278dd03cde3f5',
 'cc9bf223dee9469dae9eb8d9e51a82d4',
 '549b0a34460e486d9d33772ad7fd4141',
 '9d937e5d1cbc411aa849f61cd3655f79',
 '4be0b3166286460ba215daf7b61527a9',
 '3e629f130fe8458ead9f4e784ffabc86',
 '8694dbbff9aa4a0bbd2e7d3521cedd98',
 '212a0d92a8154cdcb1cffee2862916f0',
 '46d973e4d04f477e991733016c33d474',
 'e7db011a1dd34c09a8d4f5048af5ba0a',
 '58c04c2ea10542b181c6321dd2271e5f',
 'a3084f51da604004af028975b64780e4',
 '506845d8753f4c70b93339ad89dcd7a3',
 '6ce9fc8935394325976d8aeadedcaf7a',
 '1cd79d73be084ef6925092e7bc5d7759',
 '53d1f00bf86b4e48b35c48042ceeb408',
 'a30bfae9c8024e13a04d358fa123826a',
 '9e74fb36f5644edda788debc22468f7a',
 '21e56ebfa3ee4f3096715ca9d690bcf1',
 

In [110]:
# plug vector store into retrieval chain

llm = OpenAI(openai_api_key=OPENAI_API_KEY)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(),
)

In [82]:
question = 'What is the best product for hair growth?'
print('>', question)
print(qa.run(question), end="\n\n")

> What is the best product for hair growth?
 It depends on the person and their hair type. Biotin & Collagen Volumizing Hair Shampoo + Biotin & Collagen Hair Conditioner by StBotanica and Argan-Liquid Gold Hair Spa by Aroma Treasures are both good options for strengthening and nourishing hair.



In [111]:
qa = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(search_kwargs={'k':2}),
    return_source_documents=True
)

In [114]:
res = qa({"question": question})

In [118]:
pretty_printed_dict = ""

for key, value in res.items():
    pretty_printed_dict += key + ": " + str(value) + "\n"

print(pretty_printed_dict)

question: What is the best product for hair growth?
answer:  The best product for hair growth appears to be the "Biotin & Collagen Volumizing Hair Shampoo + Biotin & Collagen Hair Conditioner" from StBotanica, which has a rating of 3.5.

sources: 3.5, 4.4
source_documents: [Document(page_content='index: 9\nproduct: Biotin & Collagen Volumizing Hair Shampoo + Biotin & Collagen Hair Conditioner\ncategory: Beauty & Hygiene\nsub_category: Hair Care\nbrand: StBotanica\nsale_price: 1098\nmarket_price: 1098\ntype: Shampoo & Conditioner\nrating: 3.5\ndescription: An exclusive blend with Vitamin B7 Biotin, Hydrolyzed collagen, Oat Extract along with premium & organic cold-pressed ingredients helps to infuse nutrients into every strand and creates the appearance of thicker, fuller healthier looking hair. This powerful formula helps volumize even the skinniest strands into fuller and more abundant looking locks. It is safe for color-treated hair and safe for all hair types. St Botanica Biotin & C

In [134]:
src = res['source_documents'][1].page_content
print(src)

index: 58
product: Argan-Liquid Gold Hair Spa
category: Beauty & Hygiene
sub_category: Hair Care
brand: Aroma Treasures
sale_price: 199.5
market_price: 210
type: Hair & Scalp Treatment
rating: 4.4
description: Our beautifully crafted Hair Spa Collection promises to be the perfect solution for nourishing and rejuvenating your tresses. Made with Argan Oil it revives moisture in the dull dry hair, reduces frizz, smooths unmanageable locks of curly hair, deeply nurtures hair shafts & follicles to prevent hair fall, repair split ends and strengthens the roots!Revitalising Hair Oil - The luxurious fusion of Moroccan Argan Oil, Sesame Oil, Olive Oil, Lavender and Geranium provides intense nutrition to revitalize dry and damaged hair.Follikare Wash - Precious blend of Moroccan Argan Oil, Hibiscus, and Ylang Ylang nudges away sebum to strengthen the hair follicles and improve hair growth.Follikare Deep Conditioning Mask - Perfect balance of Moroccan Argan Oil, Patchouli and Shea Butter restores

In [122]:
key_value = src.split("\n")

dict = {}

for v in key_value:
    aux = v.split(": ")
    dict[aux[0]] = aux[1]

print(dict)

{'index': '9', 'product': 'Biotin & Collagen Volumizing Hair Shampoo + Biotin & Collagen Hair Conditioner', 'category': 'Beauty & Hygiene', 'sub_category': 'Hair Care', 'brand': 'StBotanica', 'sale_price': '1098', 'market_price': '1098', 'type': 'Shampoo & Conditioner', 'rating': '3.5', 'description': 'An exclusive blend with Vitamin B7 Biotin, Hydrolyzed collagen, Oat Extract along with premium & organic cold-pressed ingredients helps to infuse nutrients into every strand and creates the appearance of thicker, fuller healthier looking hair. This powerful formula helps volumize even the skinniest strands into fuller and more abundant looking locks. It is safe for color-treated hair and safe for all hair types. St Botanica Biotin & Collagen Hair Conditioner has been specially formulated to repair dry & damaged hair for full, thick, voluminous, shiny & healthy looking hair! The amazing hair conditioner ingredients include Biotin, Hydrolyzed Collagen, Pro-Vitamin B5, Vitamin E, & Hydrolyz

In [123]:
srcs = [row.page_content for row in res['source_documents']]

dicts = []
for src in srcs:
    key_value = src.split("\n")
    dict = {}
    for v in key_value:
        aux = v.split(": ")
        dict[aux[0]] = aux[1]
    dicts.append(dict)

In [133]:
import pandas as pd

df = pd.DataFrame(dicts)
# df.set_index('product', inplace=True)


df1 = df[['product','brand', 'sale_price', 'rating', 'description']]
display(df1)

Unnamed: 0,product,brand,sale_price,rating,description
0,Biotin & Collagen Volumizing Hair Shampoo + Bi...,StBotanica,1098.0,3.5,"An exclusive blend with Vitamin B7 Biotin, Hyd..."
1,Argan-Liquid Gold Hair Spa,Aroma Treasures,199.5,4.4,Our beautifully crafted Hair Spa Collection pr...


In [8]:
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer

class NeuralSearcher:
    def __init__(self, collection_name):
        self.collection_name = collection_name
        # Initialize encoder model
        self.model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
        # initialize Qdrant client
        self.qdrant_client = QdrantClient(
                url=QDRANT_URL,
                api_key=QDRANT_API_KEY
            ) 

    def search(self, text: str):
            # Convert text query into vector
            vector = self.model.encode(text).tolist()

            # Use `vector` for search for closest vectors in the collection
            search_result = self.qdrant_client.search(
                collection_name=self.collection_name,
                query_vector=vector,
                query_filter=None,  # If you don't want any filters for now
                limit=5  # 5 the most closest results is enough
            )
            # `search_result` contains found vector ids with similarity scores along with the stored payload
            # In this function you are interested in payload only
            payloads = [hit.payload for hit in search_result]
            return payloads

neural_searcher = NeuralSearcher(collection_name=COLLECTION_NAME)

In [11]:
res = neural_searcher.search("Any product?")
print(res)

[]


In [None]:
from fastapi import FastAPI

In [102]:
vectors_config = qdrant_client.http.models.VectorParams(
    size=1536,
    distance=qdrant_client.http.models.Distance.COSINE,
)

client.recreate_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=vectors_config,
)

True

In [101]:
client.delete_collection(collection_name=COLLECTION_NAME)

True

In [None]:
    df = pd.DataFrame(
        {
            "product": ["Biotin & Collagen Volumizing Hair Shampoo", "Argan-Liquid Gold Hair Spa"],
            "brand": ["StBotanica", "Aroma Treasures"],
            "sale_price": [1098, 199.5],
            "rating": [3.5, 4.4],
            "description": [
                "An exclusive blend with Vitamin B7 Biotin, Hydrolyzed collagen, Oat Extract along with premium & organic cold-pressed ingredients helps to infuse nutrients into every strand and creates the appearance of thicker, fuller healthier looking hair. This powerful formula helps volumize even the skinniest strands into fuller and more abundant looking locks. It is safe for color-treated hair and safe for all hair types. St Botanica Biotin & Collagen Hair Conditioner has been specially formulated to repair dry & damaged hair for full, thick, voluminous, shiny & healthy looking hair! The amazing hair conditioner ingredients include Biotin, Hydrolyzed Collagen, Pro-Vitamin B5, Vitamin E, & Hydrolyzed Silk Proteins for glistening looking hair. Biotin and Collagen, infused with most efficacious natural extracts not only promotes healthy hair growth but also prevents hair dryness, increases the elasticity of the hair cortex, thereby strengthening hair, minimizing hair breakage and helping hair grow longer, healthier and thicker. PLUMP IT UP: the nutrient-rich, plump-it-up power of this haircare infused with ProVitamin B7 biotin and collagen helps give each strand of hair a beautiful boost, this dynamic duo will leave your hair feeling thicker, fuller, and looking oh, so healthy.",
                "Our beautifully crafted Hair Spa Collection promises to be the perfect solution for nourishing and rejuvenating your tresses. Made with Argan Oil it revives moisture in the dull dry hair, reduces frizz, smooths unmanageable locks of curly hair, deeply nurtures hair shafts & follicles to prevent hair fall, repair split ends and strengthens the roots!Revitalising Hair Oil - The luxurious fusion of Moroccan Argan Oil, Sesame Oil, Olive Oil, Lavender and Geranium provides intense nutrition to revitalize dry and damaged hair.Follikare Wash - Precious blend of Moroccan Argan Oil, Hibiscus, and Ylang Ylang nudges away sebum to strengthen the hair follicles and improve hair growth.Follikare Deep Conditioning Mask - Perfect balance of Moroccan Argan Oil, Patchouli and Shea Butter restores hairs natural luster and provide deep nourishment to brittle, weak and frizzy locks.Restorative Nourishing Tonic - Exotic elixir of Moroccan Argan Oil, Chamomile, Watercress and Sage Extracts strengthens the root to prevent hair fall, enhance the volume and silken the texture of hair.Satin Feel Leave-On - Potent serum of Moroccan Argan Oil and Vitamin E repairs damaged hair strands, while moisturises and smoothens unmanageable dry frizzy hair.Argan - Liquid Gold - Aroma Treasures brings to you an exclusive Hair Spa Collection enriched with Argan Oil extracted from the kernels of Moroccos Argan tree."
            ]
        }
    )
    st.dataframe(
        df,
        column_config={
            "product": "Product Name",
            "brand": "Brand",
            "sale_price": st.column_config.NumberColumn(
                "Sale Price (in INR)",
                help="The price of the product in USD",
                min_value=0,
                max_value=1000,
                format="₹%f",
            ),
            "rating": st.column_config.NumberColumn(
                "Rating",
                help="Rating of the product",
                format="%f ⭐",
            ),
            "description": "Description",
        },
        hide_index=True,
    )

In [24]:
from tqdm import tqdm

def add_texts(texts, batch_size=100):
    for i in tqdm(range(0, len(texts), batch_size)):
        try:
            vector_store.add_texts(
                texts=texts[i:i+batch_size],
            )
        except Exception as e:
            print(i, e)
            continue

add_texts(docs, batch_size=100)

  1%|          | 2/276 [00:26<1:00:38, 13.28s/it]

100 The read operation timed out


  1%|▏         | 4/276 [00:45<47:42, 10.52s/it]  

300 The write operation timed out


  2%|▏         | 6/276 [01:11<53:59, 12.00s/it]


KeyboardInterrupt: 