In [82]:
import singlestoredb
import json
import numpy as np
import sys
import os
import logging

In [83]:
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
)

In [84]:
# 1. Set Environment Variables
os.environ['SINGLESTORE_HOST'] = 'svc-3482219c-a389-4079-b18b-d50662524e8a-shared-dml.aws-virginia-6.svc.singlestore.com'
os.environ['SINGLESTORE_PORT'] = '3333'
os.environ['SINGLESTORE_USER'] = 'sb'
os.environ['SINGLESTORE_PASSWORD'] = '1qus8i7myhm3NUQS0TX0dFoge7lvA1ip'
os.environ['SINGLESTORE_DATABASE'] = 'db_aurelia_7d548'

HOST = os.getenv('SINGLESTORE_HOST')
PORT = int(os.getenv('SINGLESTORE_PORT', 3306))
USER = os.getenv('SINGLESTORE_USER')
PASSWORD = os.getenv('SINGLESTORE_PASSWORD')
DATABASE = os.getenv('SINGLESTORE_DATABASE')

In [85]:
# Validate that all parameters are available
if not all([HOST, PORT, USER, PASSWORD, DATABASE]):
    logging.error("One or more environment variables for connection parameters are missing.")
    sys.exit(1)

In [86]:
# 3. Establish Connection to SingleStore
try:
    connection = singlestoredb.connect(
        host=HOST,
        port=PORT,
        user=USER,
        password=PASSWORD,
        database=DATABASE
    )
    cursor = connection.cursor()
    logging.info("Successfully connected to SingleStore.")
except singlestoredb.Error as e:
    logging.error(f"SingleStore Error: {e}")
    sys.exit(1)
except Exception as e:
    logging.error(f"Unexpected Error: {e}")
    sys.exit(1)


2025-01-12 04:35:35,213 - INFO - Successfully connected to SingleStore.


In [None]:
# 4. Create Table with Vector and Metadata Columns
delete_table_query = """
DROP TABLE vectors_table;
"""

create_table_query = """
CREATE TABLE IF NOT EXISTS vectors_table (
    id INT AUTO_INCREMENT PRIMARY KEY,
    vector BLOB NOT NULL,
    file_name VARCHAR(255) NOT NULL,
    summary VARCHAR(255) NOT NULL,
    content VARCHAR(255) NOT NULL
);
"""

try:
    cursor.execute(delete_table_query)
    cursor.execute(create_table_query)
    connection.commit()
    logging.info("Table 'vectors_table' is ready.")
except singlestoredb.Error as e:
    logging.error(f"SingleStore Error while creating table: {e}")
    cursor.close()
    connection.close()
    sys.exit(1)
except Exception as e:
    logging.error(f"Unexpected Error while creating table: {e}")
    cursor.close()
    connection.close()
    sys.exit(1)

2025-01-12 04:35:35,619 - INFO - Table 'vectors_table' is ready.


In [88]:
# 5. Prepare Multiple Vectors and Metadata for Bulk Insert
vector_dimension = 5

# Example list of vectors and metadata
data_to_insert = [
    (np.random.rand(vector_dimension).tolist(), "file_name", "summary", "content"),
    (np.random.rand(vector_dimension).tolist(), "file_name1", "summary1", "content1"),
    (np.random.rand(vector_dimension).tolist(), "file_name2", "summary2", "content2"),
    (np.random.rand(vector_dimension).tolist(), "file_name3", "summary3", "content3"),
    (np.random.rand(vector_dimension).tolist(), "file_name4", "summary4", "content4")
]

# Prepare and format data for insertion
formatted_data = []
for data in data_to_insert:
    vector = data[0]
    file_name = data[1]
    summary = data[2]
    content = data[3]
    
    if len(vector) != vector_dimension:
        logging.error(f"Vector dimensionality mismatch: expected {vector_dimension}, got {len(vector)}")
        cursor.close()
        connection.close()
        sys.exit(1)
    
    # Ensure the vector is a JSON array
    vector_json = json.dumps(vector)
    formatted_data.append((vector_json, file_name, summary, content))

# Bulk Insert Data into the Table
insert_query = """
INSERT INTO vectors_table (vector, file_name, summary, content)
VALUES (VECTOR(JSON_ARRAY_PACK(%s)), %s, %s, %s);
"""

try:
    cursor.executemany(insert_query, formatted_data)
    connection.commit()
    logging.info(f"Inserted {cursor.rowcount} rows successfully into 'vectors_table'.")
except singlestoredb.Error as e:
    logging.error(f"SingleStore Error while inserting data: {e}")
    cursor.close()
    connection.close()
    sys.exit(1)
except Exception as e:
    logging.error(f"Unexpected Error while inserting data: {e}")
    cursor.close()
    connection.close()
    sys.exit(1)

2025-01-12 04:35:35,743 - ERROR - SingleStore Error while inserting data: 1128: Function 'db_aurelia_7d548.vector' is not defined


SystemExit: 1

In [67]:
def search_vectors(query_vector, top_t=5):
    if isinstance(query_vector, np.ndarray):
        query_vector = query_vector.tolist()
    elif not isinstance(query_vector, list):
        logging.error("Query vector must be a list or numpy array.")
        return []
    if len(query_vector) != vector_dimension:
        logging.error(f"Vector dimensionality mismatch: expected {vector_dimension}, got {len(query_vector)}")
        return []

    # Construct the VECTOR literal
    vector_literal = ', '.join(map(str, query_vector))

    # SQL query to compute cosine similarity and retrieve top_t vectors
    # SingleStore's HNSW index optimizes this search
    search_query = f"""
    SELECT 
        id, 
        COSINE_SIMILARITY(vector, VECTOR({vector_literal})) AS similarity,
        metadata
    FROM vectors_table
    ORDER BY similarity DESC
    LIMIT {top_t};
    """
    try:
        cursor.execute(search_query)
        results = cursor.fetchall()
        return results
    except singlestoredb.Error as e:
        logging.error(f"SingleStore Error while searching vectors: {e}")
        return []
    except Exception as e:
        logging.error(f"Unexpected Error while searching vectors: {e}")
        return []

# 7. Mock a Search
def mock_search():
    query_vector = np.random.rand(vector_dimension).tolist()
    top_t = 3
    logging.info("Performing a mock search with a random query vector...")
    logging.info(f"Query vector: {query_vector}")
    results = search_vectors(query_vector, top_t)
    if not results:
        logging.info("No results found.")
        return
    logging.info(f"Top {top_t} similar vectors:")
    for row in results:
        vector_id, similarity, metadata_json = row
        metadata = json.loads(metadata_json)
        logging.info(f"ID: {vector_id}, Similarity: {similarity:.4f}, Metadata: {metadata}")

mock_search()

# 8. Clean Up
cursor.close()
connection.close()
logging.info("Connection to SingleStore closed.")


2025-01-12 04:34:08,774 - INFO - Performing a mock search with a random query vector...
2025-01-12 04:34:08,775 - INFO - Query vector: [0.567389708075088, 0.027577619187354796, 0.3421823434073791, 0.15645516366942647, 0.5313828253357159]
2025-01-12 04:34:08,859 - ERROR - SingleStore Error while searching vectors: 1128: Function 'db_aurelia_7d548.vector' is not defined
2025-01-12 04:34:08,862 - INFO - No results found.
2025-01-12 04:34:08,864 - INFO - Connection to SingleStore closed.
