In [None]:
from datetime import datetime
import logging

from openai import RateLimitError
import pandas as pd
from database.vector_store import VectorStore
from timescale_vector.client import uuid_from_time

# Initialize VectorStore
vec = VectorStore()

# Read the CSV file
df = pd.read_csv("../data/faq_dataset.csv", sep=";")


# Prepare data for insertion
def prepare_record(row):
    """Prepare a record for insertion into the vector store.

    This function creates a record with a UUID version 1 as the ID, which captures
    the current time or a specified time.

    Note:
        - By default, this function uses the current time for the UUID.
        - To use a specific time:
          1. Import the datetime module.
          2. Create a datetime object for your desired time.
          3. Use uuid_from_time(your_datetime) instead of uuid_from_time(datetime.now()).

        Example:
            from datetime import datetime
            specific_time = datetime(2023, 1, 1, 12, 0, 0)
            id = str(uuid_from_time(specific_time))

        This is useful when your content already has an associated datetime.
    """
    content = f"Question: {row['question']}\nAnswer: {row['answer']}"
    embedding = vec.get_embedding(content)
    return pd.Series(
        {
            "id": str(uuid_from_time(datetime.now())),
            "metadata": {
                "category": row["category"],
                "created_at": datetime.now().isoformat(),
            },
            "contents": content,
            "embedding": embedding,
        }
    )

logger = logging.getLogger('pgvectorscale')
try:
    records_df = df.apply(prepare_record, axis=1)
except RateLimitError:
    logger.exception("OpenAI Rate Limit Exceeded")

In [None]:
records_df

Unnamed: 0,id,metadata,contents,embedding
0,ffad4316-4d0a-11f0-a931-564911aa21b7,"{'category': 'Shipping', 'created_at': '2025-0...",Question: What are your shipping options?\nAns...,"[-0.009443890303373337, 0.013674567453563213, ..."
1,001f875a-4d0b-11f0-ae38-a119bc528d6b,"{'category': 'Order Management', 'created_at':...",Question: How can I track my order?\nAnswer: Y...,"[0.03734125196933746, 0.005963528994470835, 0...."
2,0054aa2a-4d0b-11f0-81ab-2b863772fd9d,"{'category': 'Returns', 'created_at': '2025-06...",Question: What is your return policy?\nAnswer:...,"[0.017061205580830574, 0.052696000784635544, 0..."
3,00bd0e30-4d0b-11f0-a700-7add65416f0e,"{'category': 'Shipping', 'created_at': '2025-0...",Question: Do you offer international shipping?...,"[0.015880830585956573, 0.020843589678406715, 0..."
4,00f88938-4d0b-11f0-a997-d8015560be9a,"{'category': 'Order Management', 'created_at':...",Question: How do I cancel an order?\nAnswer: Y...,"[0.019644808024168015, 0.03133554384112358, -0..."
5,017499a6-4d0b-11f0-93af-bb0d3ffc7553,"{'category': 'Payment', 'created_at': '2025-06...",Question: What payment methods do you accept?\...,"[0.027361895889043808, -0.02153364010155201, 0..."
6,01e1814c-4d0b-11f0-a06b-fb0c02d18b8a,"{'category': 'Product Information', 'created_a...",Question: Are your products authentic?\nAnswer...,"[0.006578062195330858, 0.00845483597368002, -0..."
7,024eac72-4d0b-11f0-bea5-ab46ab40631e,"{'category': 'Customer Support', 'created_at':...",Question: How do I contact customer service?\n...,"[0.06086518615484238, -0.005692227743566036, 0..."
8,02c53888-4d0b-11f0-a789-1f173580564f,"{'category': 'Services', 'created_at': '2025-0...",Question: Do you offer gift wrapping?\nAnswer:...,"[0.007290979847311974, 0.012285413220524788, -..."
9,032c98fc-4d0b-11f0-84c4-cddcc60baca8,"{'category': 'Pricing', 'created_at': '2025-06...",Question: What is your price match policy?\nAn...,"[-0.037919409573078156, -0.007301652804017067,..."


In [None]:
vec.create_tables()

In [None]:
vec.create_index()  # DiskAnnIndex

In [None]:
vec.upsert(records_df)

In [None]:
from datetime import datetime
from database.vector_store import VectorStore
from services.synthesizer import Synthesizer
from timescale_vector import client

# Initialize VectorStore
vec = VectorStore()

In [None]:
relevant_question = "What are your shipping options?"

In [None]:
results = vec.search(relevant_question, limit=3)

In [None]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,ffad4316-4d0a-11f0-a931-564911aa21b7,Question: What are your shipping options?\nAns...,"[-0.00944389, 0.013674567, 0.037937388, 0.0095...",0.318256,Shipping,2025-06-19T18:13:28.304207
1,00bd0e30-4d0b-11f0-a700-7add65416f0e,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.468046,Shipping,2025-06-19T18:13:30.085432
2,02c53888-4d0b-11f0-a789-1f173580564f,Question: Do you offer gift wrapping?\nAnswer:...,"[0.00729098, 0.012285413, -0.0017600359, -0.00...",0.540476,Services,2025-06-19T18:13:33.494388


In [None]:
response = Synthesizer.generate_response(question=relevant_question, context=results)

In [None]:
response

SynthesizedResponse(thought_process=['The user is asking about the shipping options available.', 'The retrieved context provides information on the shipping options offered by the company.', 'The context mentions both standard and express shipping options, including their delivery times.', "There is no additional information needed to answer the user's question."], answer='We offer two shipping options: standard shipping, which takes 3-5 business days, and express shipping, which takes 1-2 business days.', enough_context=True)

In [None]:
print(f"\n{response.answer}")
print("\nThought process:")
for thought in response.thought_process:
    print(f"- {thought}")
print(f"\nContext: {response.enough_context}")


We offer two shipping options: standard shipping, which takes 3-5 business days, and express shipping, which takes 1-2 business days.

Thought process:
- The user is asking about the shipping options available.
- The retrieved context provides information on the shipping options offered by the company.
- The context mentions both standard and express shipping options, including their delivery times.
- There is no additional information needed to answer the user's question.

Context: True


In [None]:
irrelevant_question = "What is the weather in Tokyo?"

results = vec.search(irrelevant_question, limit=3)

In [None]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,00bd0e30-4d0b-11f0-a700-7add65416f0e,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.915508,Shipping,2025-06-19T18:13:30.085432
1,056cdd02-4d0b-11f0-a930-121864602eee,Question: What is your warranty policy?\nAnswe...,"[0.014443719, 0.014022687, 0.017999096, 0.0259...",0.921238,Product Information,2025-06-19T18:13:37.948493
2,0054aa2a-4d0b-11f0-81ab-2b863772fd9d,Question: What is your return policy?\nAnswer:...,"[0.017061206, 0.052696, 0.02438565, 0.03504734...",0.923942,Returns,2025-06-19T18:13:29.401297


In [None]:
response = Synthesizer.generate_response(question=irrelevant_question, context=results)

print(f"\n{response.answer}")
print("\nThought process:")
for thought in response.thought_process:
    print(f"- {thought}")
print(f"\nContext: {response.enough_context}")


I'm sorry, but I cannot provide the current weather information for Tokyo as it is outside the scope of the provided context. Please check a reliable weather service for the most up-to-date information.

Thought process:
- The user's question is about the current weather in Tokyo, which is not related to e-commerce or the provided context.
- The retrieved context contains information about shipping, warranty, and return policies, none of which are relevant to the user's question about the weather.
- I need to inform the user that I cannot provide the current weather information as it is outside the scope of the provided context.

Context: False


In [None]:
metadata_filter = {"category": "Shipping"}

results = vec.search(relevant_question, limit=3, metadata_filter=metadata_filter)

In [None]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,ffad4316-4d0a-11f0-a931-564911aa21b7,Question: What are your shipping options?\nAns...,"[-0.00944389, 0.013674567, 0.037937388, 0.0095...",0.318338,Shipping,2025-06-19T18:13:28.304207
1,00bd0e30-4d0b-11f0-a700-7add65416f0e,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.468099,Shipping,2025-06-19T18:13:30.085432


In [None]:
response = Synthesizer.generate_response(question=relevant_question, context=results)

print(f"\n{response.answer}")
print("\nThought process:")
for thought in response.thought_process:
    print(f"- {thought}")
print(f"\nContext: {response.enough_context}")


We offer two main shipping options: standard shipping, which takes 3-5 business days, and express shipping, which takes 1-2 business days. Additionally, we provide international shipping to most countries worldwide, with costs and delivery times varying by location.

Thought process:
- The user is asking about the shipping options available.
- The retrieved context provides information on standard and express shipping options.
- The context also mentions international shipping, which might be relevant if the user is considering shipping outside the country.
- I will synthesize an answer that includes both domestic and international shipping options.

Context: True


In [None]:
predicates = client.Predicates("category", "==", "Shipping")
results = vec.search(relevant_question, limit=3, predicates=predicates)


In [None]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,ffad4316-4d0a-11f0-a931-564911aa21b7,Question: What are your shipping options?\nAns...,"[-0.00944389, 0.013674567, 0.037937388, 0.0095...",0.318338,Shipping,2025-06-19T18:13:28.304207
1,00bd0e30-4d0b-11f0-a700-7add65416f0e,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.468099,Shipping,2025-06-19T18:13:30.085432


In [None]:
predicates = client.Predicates("category", "==", "Shipping") | client.Predicates(
    "category", "==", "Services"
)
results = vec.search(relevant_question, limit=3, predicates=predicates)


In [None]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,ffad4316-4d0a-11f0-a931-564911aa21b7,Question: What are your shipping options?\nAns...,"[-0.00944389, 0.013674567, 0.037937388, 0.0095...",0.318338,Shipping,2025-06-19T18:13:28.304207
1,00bd0e30-4d0b-11f0-a700-7add65416f0e,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.468099,Shipping,2025-06-19T18:13:30.085432
2,02c53888-4d0b-11f0-a789-1f173580564f,Question: Do you offer gift wrapping?\nAnswer:...,"[0.00729098, 0.012285413, -0.0017600359, -0.00...",0.540579,Services,2025-06-19T18:13:33.494388


In [None]:
predicates = client.Predicates("category", "==", "Shipping") & client.Predicates(
    "created_at", ">", "2024-09-01"
)
results = vec.search(relevant_question, limit=3, predicates=predicates)

In [None]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,ffad4316-4d0a-11f0-a931-564911aa21b7,Question: What are your shipping options?\nAns...,"[-0.00944389, 0.013674567, 0.037937388, 0.0095...",0.318338,Shipping,2025-06-19T18:13:28.304207
1,00bd0e30-4d0b-11f0-a700-7add65416f0e,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.468099,Shipping,2025-06-19T18:13:30.085432


In [None]:
time_range = (datetime(2024, 9, 1), datetime(2024, 9, 30))
results = vec.search(relevant_question, limit=3, time_range=time_range)

In [None]:
results

Unnamed: 0,id,content,embedding,distance,metadata


In [None]:
time_range = (datetime(2024, 8, 1), datetime(2024, 8, 30))
results = vec.search(relevant_question, limit=3, time_range=time_range)

In [None]:
results

Unnamed: 0,id,content,embedding,distance,metadata


In [None]:
# June — Returning results
time_range = (datetime(2025, 6, 1), datetime(2025, 6, 30))
results = vec.search(relevant_question, limit=3, time_range=time_range)

In [None]:
results

Unnamed: 0,id,content,embedding,distance,category,created_at
0,ffad4316-4d0a-11f0-a931-564911aa21b7,Question: What are your shipping options?\nAns...,"[-0.00944389, 0.013674567, 0.037937388, 0.0095...",0.318338,Shipping,2025-06-19T18:13:28.304207
1,00bd0e30-4d0b-11f0-a700-7add65416f0e,Question: Do you offer international shipping?...,"[0.01588083, 0.02084359, 0.068945, 0.040534537...",0.468099,Shipping,2025-06-19T18:13:30.085432
2,02c53888-4d0b-11f0-a789-1f173580564f,Question: Do you offer gift wrapping?\nAnswer:...,"[0.00729098, 0.012285413, -0.0017600359, -0.00...",0.540579,Services,2025-06-19T18:13:33.494388


In [None]:
# May — Not returning any results
time_range = (datetime(2025, 5, 1), datetime(2025, 5, 31))
results = vec.search(relevant_question, limit=3, time_range=time_range)

In [None]:
results

Unnamed: 0,id,content,embedding,distance,metadata
