# Weaviate Vector Store Demo

A simple demonstration of using Weaviate for vector similarity search with OpenAI embeddings.

## Setup
Import required libraries and initialize Weaviate and OpenAI clients using environment variables.

In [6]:
#%%
# Pinecone
import os
import dotenv
from tqdm import tqdm
from openai import OpenAI
import weaviate
from weaviate.classes.init import Auth
from weaviate.classes.config import Configure


dotenv.load_dotenv()


openai_client= OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def load_passages(file_path="../../data/cat_facts.txt"):
    with open(file_path, 'r') as f:
        passages = f.readlines()
    return [line.strip() for line in passages]

data = load_passages()

## Connect to Weaviate
Initialize connection to Weaviate Cloud Services (WCS) using API credentials.

In [7]:
wcd_url = os.environ["WCD_URL"]
wcd_api_key = os.environ["WEAVIATE_API_KEY"]

client_weaviate = weaviate.connect_to_weaviate_cloud(
    cluster_url=wcd_url,                                    
    auth_credentials=Auth.api_key(wcd_api_key),             
)

print(client_weaviate.is_ready()) 

True


## Create Embeddings
Convert text passages into numerical vectors using OpenAI's embedding model.

In [8]:
def get_openai_embeddings(texts):
    """
    Given a list of texts, return the embeddings from OpenAI.
    Using 'text-embedding-ada-002' (or 'text-embedding-3-small' if you have access).
    """
    response = openai_client.embeddings.create(
        model="text-embedding-ada-002",
        input=texts
    )
    return response

embeddings = get_openai_embeddings(data)

## Create Collection
Define and create a Weaviate collection schema for storing cat facts with their embeddings.

In [9]:
catfact_class = {
    "class": "CatFact",
    "description": "A class for storing cat facts using user-supplied (OpenAI) embeddings.",
    "vectorizer": "none",  # We supply our own embeddings; no auto-vectorization
    "properties": [
        {
            "name": "fact",
            "dataType": ["text"],
            "description": "A single cat fact",
        }
    ]
}

catfact_collection = client_weaviate.collections.create(
    name="CatFact",
    vectorizer_config=Configure.Vectorizer.none()  # We'll provide our own embeddings
)

## Store Vectors
Upload embeddings to Weaviate along with their original text using batch processing.

In [10]:
with catfact_collection.batch.dynamic() as batch:
    for i, cat_fact in enumerate(data):
        properties = {"fact": cat_fact}
        vector = embeddings.data[i].embedding

        batch.add_object(
            properties=properties,
            vector=vector
        )


## Query Similar Vectors
Demonstrate similarity search by:
1. Converting a question into an embedding
2. Finding the most similar vectors in our collection

In [11]:
user_query = "Are male cats more likely to be left-pawed?"
q_embed = openai_client.embeddings.create(
    model="text-embedding-ada-002",
    input=[user_query]
)
query_vector = q_embed.data[0].embedding

results = catfact_collection.query.near_vector(
    near_vector=query_vector,
    limit=3
)

In [12]:
print(f"\nTop 3 results for query: {user_query}")
for obj in results.objects:
    print("-", obj.properties["fact"])


Top 3 results for query: Are male cats more likely to be left-pawed?
- Female cats tend to be right pawed, while male cats are more often left pawed. Interestingly, while 90% of humans are right handed, the remaining 10% of lefties also tend to be male.
- Cats have five toes on each front paw, but only four toes on each back paw.
- Cats are sometimes born with extra toes. This is called polydactyl. These toes will not harm the cat, but you should keep his claws trimmed just like any toe.


## Format Results
Generate a clear answer using the retrieved facts and OpenAI's language model.

In [14]:
response = openai_client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": "You are a helpful assistant. Based on the provided facts, give a concise answer to the question. If the facts don't directly answer the question, say so."},
        {"role": "user", "content": f"""
Question: {user_query}

Relevant facts found:
{[match.properties["fact"] for match in results.objects]}

Please provide a brief, clear answer based on these facts."""}
    ]
)
print("User Query:")
print(user_query)
print("AI-Generated Answer:")
print(response.choices[0].message.content)

User Query:
Are male cats more likely to be left-pawed?
AI-Generated Answer:
Male cats are more likely to be left-pawed, while female cats tend to be right-pawed.
