# Table Setup: Run only if the table has not been set up already

In [2]:
from datetime import datetime
import logging

from openai import RateLimitError
import pandas as pd
from database.vector_store import VectorStore
from timescale_vector.client import uuid_from_time

# Initialize VectorStore
vec = VectorStore()

# Read the CSV file
df = pd.read_csv("../data/faq_dataset.csv", sep=";")


# Prepare data for insertion
def prepare_record(row):
    """Prepare a record for insertion into the vector store.

    This function creates a record with a UUID version 1 as the ID, which captures
    the current time or a specified time.

    Note:
        - By default, this function uses the current time for the UUID.
        - To use a specific time:
          1. Import the datetime module.
          2. Create a datetime object for your desired time.
          3. Use uuid_from_time(your_datetime) instead of uuid_from_time(datetime.now()).

        Example:
            from datetime import datetime
            specific_time = datetime(2023, 1, 1, 12, 0, 0)
            id = str(uuid_from_time(specific_time))

        This is useful when your content already has an associated datetime.
    """
    content = f"Question: {row['question']}\nAnswer: {row['answer']}"
    embedding = vec.get_embedding(content)
    return pd.Series(
        {
            "id": str(uuid_from_time(datetime.now())),
            "metadata": {
                "category": row["category"],
                "created_at": datetime.now().isoformat(),
            },
            "contents": content,
            "embedding": embedding,
        }
    )

logger = logging.getLogger('pgvectorscale')
try:
    records_df = df.apply(prepare_record, axis=1)
except RateLimitError:
    logger.exception("OpenAI Rate Limit Exceeded")

In [3]:
records_df

Unnamed: 0,id,metadata,contents,embedding
0,298cabf4-51fc-11f0-9ed1-49363e65f4bb,"{'category': 'Shipping', 'created_at': '2025-0...",Question: What are your shipping options?\nAns...,"[-0.009472182020545006, 0.01369680929929018, 0..."
1,29e3c70e-51fc-11f0-8b17-9c05e25073fd,"{'category': 'Order Management', 'created_at':...",Question: How can I track my order?\nAnswer: Y...,"[0.037382688373327255, 0.005943585652858019, 0..."
2,2a27f014-51fc-11f0-8245-0040cc43bbcc,"{'category': 'Returns', 'created_at': '2025-06...",Question: What is your return policy?\nAnswer:...,"[0.0170719213783741, 0.05264050513505936, 0.02..."
3,2a9687ae-51fc-11f0-98df-e79783b3ff28,"{'category': 'Shipping', 'created_at': '2025-0...",Question: Do you offer international shipping?...,"[0.015880830585956573, 0.020843589678406715, 0..."
4,2ac2f2d0-51fc-11f0-b206-43ff96f7fd6f,"{'category': 'Order Management', 'created_at':...",Question: How do I cancel an order?\nAnswer: Y...,"[0.019665976986289024, 0.03133445605635643, -0..."
5,2bdd6308-51fc-11f0-a51c-d14dd6f8364d,"{'category': 'Payment', 'created_at': '2025-06...",Question: What payment methods do you accept?\...,"[0.027361895889043808, -0.02153364010155201, 0..."
6,2c076446-51fc-11f0-8d6a-2cee70e47299,"{'category': 'Product Information', 'created_a...",Question: Are your products authentic?\nAnswer...,"[0.006587592884898186, 0.00847778283059597, -0..."
7,2c21d29a-51fc-11f0-b9fd-43ac45700f0e,"{'category': 'Customer Support', 'created_at':...",Question: How do I contact customer service?\n...,"[0.06086518615484238, -0.005692227743566036, 0..."
8,2c556240-51fc-11f0-9078-135cb00a9608,"{'category': 'Services', 'created_at': '2025-0...",Question: Do you offer gift wrapping?\nAnswer:...,"[0.007302050944417715, 0.012296430766582489, -..."
9,2c6c31fa-51fc-11f0-b3b4-19ad4bdbf0dc,"{'category': 'Pricing', 'created_at': '2025-06...",Question: What is your price match policy?\nAn...,"[-0.037919409573078156, -0.007301652804017067,..."


In [4]:
vec.create_tables()

In [None]:
vec.create_index()  # DiskAnnIndex

DuplicateTable: relation "embeddings_embedding_idx" already exists


In [None]:
vec.upsert(records_df)

# Similarity search: run from here if the table is already set up

In [6]:
from datetime import datetime
from database.vector_store import VectorStore
from services.synthesizer import Synthesizer
from timescale_vector import client

# Initialize VectorStore
vec = VectorStore()

In [7]:
relevant_question = "What are your shipping options?"

In [8]:
results = vec.search(relevant_question, limit=3)

In [9]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,814fe3ca-4dd7-11f0-b77b-e78e4fb36ab9,Question: What are your shipping options?\nAns...,"[-0.00944389, 0.013674567, 0.037937388, 0.0095...",0.318338,Shipping,2025-06-20T18:37:23.128929
1,841deb2e-4dd7-11f0-883e-57d287dba527,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.468099,Shipping,2025-06-20T18:37:27.834603
2,86d022c4-4dd7-11f0-a63f-20f318bea63c,Question: Do you offer gift wrapping?\nAnswer:...,"[0.00729098, 0.012285413, -0.0017600359, -0.00...",0.540579,Services,2025-06-20T18:37:32.358010


In [10]:
response = Synthesizer.generate_response(question=relevant_question, context=results)

In [11]:
response

SynthesizedResponse(thought_process=['The user is asking about the shipping options available.', 'The retrieved context provides information on the shipping options offered by the company.', 'The context mentions both standard and express shipping options, including their respective delivery times.', "There is enough information to answer the user's question based on the provided context."], answer='We offer two shipping options: standard shipping, which takes 3-5 business days, and express shipping, which takes 1-2 business days.', enough_context=True)

In [12]:
print(f"\n{response.answer}")
print("\nThought process:")
for thought in response.thought_process:
    print(f"- {thought}")
print(f"\nContext: {response.enough_context}")


We offer two shipping options: standard shipping, which takes 3-5 business days, and express shipping, which takes 1-2 business days.

Thought process:
- The user is asking about the shipping options available.
- The retrieved context provides information on the shipping options offered by the company.
- The context mentions both standard and express shipping options, including their respective delivery times.
- There is enough information to answer the user's question based on the provided context.

Context: True


In [13]:
irrelevant_question = "What is the weather in Tokyo?"

results = vec.search(irrelevant_question, limit=3)

In [14]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,841deb2e-4dd7-11f0-883e-57d287dba527,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.915522,Shipping,2025-06-20T18:37:27.834603
1,89660918-4dd7-11f0-a434-51a36850fb71,Question: What is your warranty policy?\nAnswe...,"[0.014443719, 0.014022687, 0.017999096, 0.0259...",0.921201,Product Information,2025-06-20T18:37:36.695836
2,83aae598-4dd7-11f0-b6b2-340f562a1aa7,Question: What is your return policy?\nAnswer:...,"[0.017055543, 0.052628532, 0.024427535, 0.0350...",0.923967,Returns,2025-06-20T18:37:27.080796


In [15]:
response = Synthesizer.generate_response(question=irrelevant_question, context=results)

print(f"\n{response.answer}")
print("\nThought process:")
for thought in response.thought_process:
    print(f"- {thought}")
print(f"\nContext: {response.enough_context}")


I'm sorry, but I cannot provide the current weather information for Tokyo based on the available context. Please check a reliable weather service or app for the most up-to-date information.

Thought process:
- The user's question is about the current weather in Tokyo, which is unrelated to the e-commerce context provided.
- The retrieved context does not contain any information about weather or related services.
- I need to inform the user that I cannot provide the current weather information based on the available context.

Context: False


In [17]:
# Try out your own questions here!
custom_question = input('Enter your question: ')
print(f'Custom question: {custom_question}')

results = vec.search(custom_question, limit=3)
response = Synthesizer.generate_response(question=custom_question, context=results)

print(f"\n{response.answer}")
print("\nThought process:")
for thought in response.thought_process:
    print(f"- {thought}")
print(f"\nContext: {response.enough_context}")

Custom question: I bought a product from your website but it appears that I am getting promotional emails that I no longer want to receive. How can I stop receiving these emails?

To stop receiving promotional emails, you can click the "Unsubscribe" link located at the bottom of any of our email newsletters. This will remove you from our mailing list.

Thought process:
- The user wants to stop receiving promotional emails.
- The context provides information on how to unsubscribe from email newsletters.
- The relevant information is that the user can click the 'Unsubscribe' link at the bottom of any email newsletter to stop receiving them.

Context: True


In [18]:
metadata_filter = {"category": "Shipping"}

results = vec.search(relevant_question, limit=3, metadata_filter=metadata_filter)

In [19]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,814fe3ca-4dd7-11f0-b77b-e78e4fb36ab9,Question: What are your shipping options?\nAns...,"[-0.00944389, 0.013674567, 0.037937388, 0.0095...",0.318338,Shipping,2025-06-20T18:37:23.128929
1,841deb2e-4dd7-11f0-883e-57d287dba527,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.468099,Shipping,2025-06-20T18:37:27.834603


In [20]:
response = Synthesizer.generate_response(question=relevant_question, context=results)

print(f"\n{response.answer}")
print("\nThought process:")
for thought in response.thought_process:
    print(f"- {thought}")
print(f"\nContext: {response.enough_context}")


We offer two main shipping options: standard shipping, which takes 3-5 business days, and express shipping, which takes 1-2 business days. Additionally, we provide international shipping to most countries worldwide, with costs and delivery times varying by location.

Thought process:
- The context provides information about the shipping options available, which include standard and express shipping.
- It also mentions international shipping, indicating that the company ships to most countries worldwide.
- The context does not specify any additional shipping options or details about costs, but it provides enough information to answer the user's question about the basic shipping options.

Context: True


In [21]:
predicates = client.Predicates("category", "==", "Shipping")
results = vec.search(relevant_question, limit=3, predicates=predicates)


In [22]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,814fe3ca-4dd7-11f0-b77b-e78e4fb36ab9,Question: What are your shipping options?\nAns...,"[-0.00944389, 0.013674567, 0.037937388, 0.0095...",0.318338,Shipping,2025-06-20T18:37:23.128929
1,841deb2e-4dd7-11f0-883e-57d287dba527,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.468099,Shipping,2025-06-20T18:37:27.834603


In [23]:
predicates = client.Predicates("category", "==", "Shipping") | client.Predicates(
    "category", "==", "Services"
)
results = vec.search(relevant_question, limit=3, predicates=predicates)


In [24]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,814fe3ca-4dd7-11f0-b77b-e78e4fb36ab9,Question: What are your shipping options?\nAns...,"[-0.00944389, 0.013674567, 0.037937388, 0.0095...",0.318338,Shipping,2025-06-20T18:37:23.128929
1,841deb2e-4dd7-11f0-883e-57d287dba527,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.468099,Shipping,2025-06-20T18:37:27.834603
2,86d022c4-4dd7-11f0-a63f-20f318bea63c,Question: Do you offer gift wrapping?\nAnswer:...,"[0.00729098, 0.012285413, -0.0017600359, -0.00...",0.540579,Services,2025-06-20T18:37:32.358010


In [25]:
predicates = client.Predicates("category", "==", "Shipping") & client.Predicates(
    "created_at", ">", "2024-09-01"
)
results = vec.search(relevant_question, limit=3, predicates=predicates)

In [26]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,814fe3ca-4dd7-11f0-b77b-e78e4fb36ab9,Question: What are your shipping options?\nAns...,"[-0.00944389, 0.013674567, 0.037937388, 0.0095...",0.318344,Shipping,2025-06-20T18:37:23.128929
1,841deb2e-4dd7-11f0-883e-57d287dba527,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.468076,Shipping,2025-06-20T18:37:27.834603


In [27]:
time_range = (datetime(2024, 9, 1), datetime(2024, 9, 30))
results = vec.search(relevant_question, limit=3, time_range=time_range)

In [28]:
results

Unnamed: 0,id,content,embedding,distance,metadata


In [29]:
time_range = (datetime(2024, 8, 1), datetime(2024, 8, 30))
results = vec.search(relevant_question, limit=3, time_range=time_range)

In [30]:
results

Unnamed: 0,id,content,embedding,distance,metadata


In [31]:
# June — Returning results
time_range = (datetime(2025, 6, 1), datetime(2025, 6, 30))
results = vec.search(relevant_question, limit=3, time_range=time_range)

In [32]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,814fe3ca-4dd7-11f0-b77b-e78e4fb36ab9,Question: What are your shipping options?\nAns...,"[-0.00944389, 0.013674567, 0.037937388, 0.0095...",0.318317,Shipping,2025-06-20T18:37:23.128929
1,841deb2e-4dd7-11f0-883e-57d287dba527,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.468092,Shipping,2025-06-20T18:37:27.834603
2,86d022c4-4dd7-11f0-a63f-20f318bea63c,Question: Do you offer gift wrapping?\nAnswer:...,"[0.00729098, 0.012285413, -0.0017600359, -0.00...",0.540548,Services,2025-06-20T18:37:32.358010


In [33]:
# May — Not returning any results
time_range = (datetime(2025, 5, 1), datetime(2025, 5, 31))
results = vec.search(relevant_question, limit=3, time_range=time_range)

In [34]:
results

Unnamed: 0,id,content,embedding,distance,metadata
