In [None]:
# download the weaviate client
%pip install -U weaviate-client

In [None]:
import weaviate, os
from weaviate.config import AdditionalConfig, Timeout
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve environment variables
CLUSTER_URL = os.getenv("CLUSTER_URL")
API_KEY = os.getenv("API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
COHERE_API_KEY = os.getenv("COHERE_API_KEY")

# Connect to Weaviate
client = weaviate.connect_to_weaviate_cloud(
	cluster_url=CLUSTER_URL,
	auth_credentials=weaviate.auth.AuthApiKey(API_KEY),
	headers={
		"X-OpenAI-Api-Key": OPENAI_API_KEY,
		"X-Cohere-Api-Key": COHERE_API_KEY
	},
	additional_config=AdditionalConfig(
		timeout=Timeout(init=30, query=60, insert=120)
	)
)

ready = client.is_ready()
server_version = client.get_meta()["version"]
client_version = weaviate.__version__
live = client.is_live()
connected = client.is_connected()

print(f"Weaviate Ready: {ready}")
print(f"Weaviate Client Version: {client_version}")
print(f"Weaviate Server Version: {server_version}")
print(f"Weaviate Live: {client.is_live()}")
print(f"Client Connected: {connected}")


In [None]:
# Existing of a collection
col = client.collections.get("<COLLECTION_NAME>")
if col.exists():
    print("Collection exists")
else:
    print("Collection does not exist")

In [None]:
# Query an object & its dimensionality
coll = client.collections.get("<COLLECTION_NAME>")

some_data = coll.query.fetch_objects(include_vector=True, limit=1)
default_vector = some_data.objects[0].vector["default"]
dimensionality = len(default_vector)

print(f"The default vector has {dimensionality} dimensions.")

In [None]:
# Query an object & its dimensionality for a Tenant
coll = client.collections.get("<COLLECTION_NAME>")
tenants = coll.tenants.get()

# Extract the first tenant's name
first_tenant = list(tenants.values())[0]  # Get the first Tenant object
tenant_name = first_tenant.name  # Extract the tenant name

# Get the collection specific to this tenant
tenant_coll = coll.with_tenant(tenant_name)

# Fetch an object for the specific tenant
tenant_object = tenant_coll.query.fetch_objects(include_vector=True, limit=1)

# Extract and compute the vector dimensionality
default_vector = tenant_object.objects[0].vector["default"]
dimensionality = len(default_vector)

print(f"The default vector has {dimensionality} dimensions for tenant '{tenant_name}'.")

In [None]:
# Query & Filter by time
from datetime import datetime
from weaviate.classes.query import Filter

collection = client.collections.get("<COLLECTION_NAME>")

filter_time = datetime(2024, 1, 1).isoformat()
print(filter_time)
res = collection.query.fetch_objects(
    filters=Filter.by_creation_time().greater_than(filter_time)
)
print(res)

In [None]:
# Sorting -> works only with fetch_objects (Similarity searches do not support sorting as they are based sorted by similarity)
from weaviate.classes.query import Sort

movies_col= client.collections.get("<COLLECTION_NAME>")

response = movies_col.query.fetch_objects(
    limit=9,
    sort=Sort.by_property(name="<PROP_NAME>", ascending=False)
)

for o in response.objects:
    print(o.properties)

In [None]:
from weaviate.classes.query import MetadataQuery
# **** Querying ****
def query_vector_with_fallback(tenant_collection, near_vector, limit, tenant_name,
                               filters=None, include_vector=False,
                               return_metadata=None, max_distance=None):
    # Set default metadata safely
    if return_metadata is None:
        return_metadata = MetadataQuery(distance=True)

    # Try with original limit
    query_args = {
        "near_vector": near_vector,
        "limit": limit,
        "return_metadata": return_metadata,
        "include_vector": include_vector
    }

    if filters:
        query_args["filters"] = filters

    if max_distance is not None:
        query_args["distance"] = max_distance

    try:
        import time
        start_time = time.time()
        query_result = tenant_collection.query.near_vector(**query_args)
        end_time = time.time()
        print(f"Vector query took {(end_time - start_time) * 1000:.2f}ms")
        return query_result
    except Exception as e:
        print(f"Error in vector query with limit {limit}, falling back to smaller batches: {e}")

