In [None]:
import psycopg2
import json

# Load database credentials
with open('data/creds.json') as f:
    creds = json.load(f)


# Function to connect to the PostgreSQL database
def connect_db():
    conn = psycopg2.connect(
        dbname=creds['database'],
        user=creds['user'],
        password=creds['password'],
        host=creds['host'],
        port=creds['port']
    )
    return conn


# Function to fetch all data and create document embeddings
def fetch_data_as_documents():
    # Connect to the database
    conn = connect_db()
    documents = []

    try:
        with conn.cursor() as cursor:
            # Fetch the necessary columns from your table
            query = """
                SELECT title, price, overall_rating, total_reviews, availability, model_number, material, item_length, clasp
                FROM amazon_watches;
            """
            cursor.execute(query)
            rows = cursor.fetchall()

            # Loop through each row and create a text document
            for row in rows:
                title = row[0] or "N/A"
                price = f"The product costs ${row[1]}." if row[1] else "Price not available."
                rating = f"It has an overall rating of {row[2]}." if row[2] else "No rating available."
                total_reviews = f"It also has a total of {row[3]} reviews." if row[3] else "No rating available."
                availability = row[4] or "Availability information not provided."
                model = f"The model number is {row[5]}." if row[5] else "Model number not provided."
                material = f"The material is {row[6]}." if row[6] else "Material not specified."
                length = f"It has an item length of {row[7]}." if row[7] else "Item length not provided."
                clasp = f"The clasp type is {row[8]}." if row[8] else "Clasp type not specified."

                # Create a document by combining all the attributes
                document = f"{title}. {price} {rating} {total_reviews} {availability} {model} {material} {length} {clasp}"
                
                # Append to documents list
                documents.append(document)

    finally:
        conn.close()

    return documents


# Example usage
documents = fetch_data_as_documents()

# # Print the generated document embeddings
# for doc in documents:
#     print(doc)


In [11]:
import faiss
import numpy as np


# Simulate embeddings
document_embeddings = np.random.rand(len(documents), 768).astype(np.float32)

# Build FAISS index
index = faiss.IndexFlatL2(768)  # L2 distance for 768-dim embeddings
index.add(document_embeddings)

# Function to search in FAISS index
def search(query_embedding, k=3):
    distances, indices = index.search(np.array([query_embedding]), k)
    return indices[0]

# Simulate a query embedding
query_embedding = np.random.rand(768).astype(np.float32)

# Search top documents
top_docs_indices = search(query_embedding)
top_docs = [documents[i] for i in top_docs_indices]

# # Print the generated document embeddings
# for doc in top_docs:
#     print(doc, "\n")


In [10]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load pre-trained model
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")

def generate_answer(query, top_docs):
    # Concatenate the query with retrieved documents
    context = " ".join(top_docs)
    input_text = f"question: {query} context: {context} </s>"
    
    # Tokenize the input and generate the output
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(**inputs)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

# Example query
query = "Which product would be best value for budget?"

answer = generate_answer(query, top_docs)

print(answer)


SAMSUNG Galaxy Watch 7 40mm Bluetooth AI Smartwatch
