In [2]:
import os
from dotenv import load_dotenv
load_dotenv()  #load all the environment variables

True

In [3]:
os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

In [22]:
import json
from langchain.schema import Document

# Load JSON file
with open("pharmakon_products.json", "r", encoding="utf-8") as f:
    products = json.load(f)

#  Convert each product description into a single Document
# the embedding model will be for description, not for the whole product details , but we have metadata for each product
docs = []
for product in products:
    docs.append(Document(
        page_content=product["product_description"],  # full description in one chunk
        metadata={
            "name": product["product_name"],
            "link": product["product_link"],
            "price": product["product_price"]
        }
    ))

#  Check first document
print(docs[3])


page_content='Composition
Menthol, arginine, Allantoin, caffeine, Carnitine, Glycerin, Carbomer, Triethanolamine, CMC , PVP , Monopropylene glycol , Polysorbate 20 , aloe vera, Chlorhexidine , eugenol, methyl paraben, propyl paraben, sugar, Parfum, Aqua.
properties
Skout Gel works with absorption effect technology. It is quick to be absorbed from the penis skin , so the desensitizing effect will not transfer to your partner. Skout Gelenables you to keep pleasurable sexual action , while delaying your semen from coming out. Skout Gel allows you to last more and more time enjoying sex. Skout Gel creates a warming sensation as vessels dilate and capillaries expand. Skout Gel provides the vigor and vitality of men. Skout Gel provides unmatched sexual desire. Skout Gel increases sexual ability for men. Skout Gel provides nourishment to improve the person’s energy level and mood , thus giving an extra pleasure. Skout Gel improves the circulation in the penile tissue as it increases blood flo

In [25]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings

#  Initialize the embedding model
embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")

#  Create a Chroma vector store from the docs
if not os.path.exists("./chroma_db"):
    vectordb = Chroma.from_documents(
        documents=docs,       # your list of Document objects
        embedding=embedding_model,
        persist_directory="./chroma_db"  # folder to save vector database
    )

    #  Persist the database to disk
    vectordb.persist()

print("Vector database created and persisted successfully!")


Vector database created and persisted successfully!


In [41]:
# def print_results(results):
#     for i, doc in enumerate(results, 1):
#         print(f"Result {i}:")
#         print("Product Name:", doc.metadata["name"])
#         print("Link:", doc.metadata["link"])
#         print("Price:", doc.metadata["price"])
#         print("Description:", doc.page_content[:300])  # preview first 300 chars
#         print("----------------------------")
def print_results(results):
    for i, (doc, score) in enumerate(results, 1):  # unpack tuple
        print(f"Result {i} (Confidence: {score:.2f}):")
        print("Product Name:", doc.metadata["name"])
        print("Link:", doc.metadata["link"])
        print("Price:", doc.metadata["price"])
        print("Description:", doc.page_content[:300], "...")
        print("----------------------------")

        

In [44]:
def query_vector_db(query, k=1):
    """
    Query the vector database and return top k results.
    
    Args:
        query (str): The search query.
        k (int): Number of top results to return.
    
    Returns:
        list: List of Document objects with metadata.
    """
    results_with_score = vectordb.similarity_search_with_score(query, k=k)
    threshold = 0.9
    filtered_results = [(doc, score) for doc, score in results_with_score if score >= threshold]
    if not filtered_results:
        print("No results found above the threshold.")
        filtered_results = []  # Return empty list if no results meet the threshold
        return 

    return filtered_results

In [39]:
# #  Load the persisted Chroma database
# embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")
# vectordb = Chroma(
#     persist_directory="./chroma_db",
#     embedding_function=embedding_model
# )

In [48]:
query = "headach"  # Define your query
results = query_vector_db(query,k=2) 

#
print_results(results)

Result 1 (Confidence: 1.46):
Product Name: Lady Sept ( Vaginal Tightening)
Link: https://pharmakonegypt.org/product/lady-sept-vaginal-tightening/
Price: EGP0
Description: no describtion available ...
----------------------------
Result 2 (Confidence: 1.46):
Product Name: Lucofun R Spray (Lightening And Exfoliating For Face)
Link: https://pharmakonegypt.org/product/lucofun-r-spray-lightening-and-exfoliating-for-face/
Price: EGP0
Description: no describtion available ...
----------------------------


In [49]:
import streamlit as st
st.title("Pharmakon Product Search")
query = st.text_input("Enter your search query:")
if query:
    results = query_vector_db(query, k=2)
    if results:
        st.write(results)
    else:
        print("No results found.")

2025-08-21 17:16:18.722 
  command:

    streamlit run C:\Users\mohmed\AppData\Roaming\Python\Python310\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-08-21 17:16:18.730 Session state does not function when running a script without `streamlit run`
