In [None]:
# download the weaviate client
%pip install -U weaviate-client

In [None]:
import weaviate, os
from weaviate.config import AdditionalConfig, Timeout
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve environment variables
CLUSTER_URL = os.getenv("CLUSTER_URL")
API_KEY = os.getenv("API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

# Connect to Weaviate
client = weaviate.connect_to_weaviate_cloud(
	cluster_url=CLUSTER_URL,
	auth_credentials=weaviate.auth.AuthApiKey(API_KEY),
	headers={
		"X-OpenAI-Api-Key": OPENAI_API_KEY,
		"X-Cohere-Api-Key": COHERE_API_KEY,
        "X-Goog-Api-Key": GOOGLE_API_KEY
	},
	additional_config=AdditionalConfig(
		timeout=Timeout(init=30, query=60, insert=120)
	)
)

ready = client.is_ready()
server_version = client.get_meta()["version"]
client_version = weaviate.__version__
live = client.is_live()
connected = client.is_connected()

print(f"Weaviate Ready: {ready}")
print(f"Weaviate Client Version: {client_version}")
print(f"Weaviate Server Version: {server_version}")
print(f"Weaviate Live: {client.is_live()}")
print(f"Client Connected: {connected}")

In [None]:
# Fetching objects from a collection
collection = client.collections.get("<COLLECTION_NAME>")
total_objects = 0
for item in collection.iterator(cache_size=200):
    total_objects += 1
    print(f"Object: {total_objects}")
    print(item.uuid, item.properties)
print(f"Total objects fetched: {total_objects}")

In [None]:
# List the collection names in Weaviate cluster
try:
    collections = client.collections.list_all()
    if collections:
        print("Collections in Weaviate:")
        # Loop through each collection in the instance
        for collection_name in collections.keys():
            print(f"- {collection_name}")
    else:
        print("No collections found.")
except Exception as e:
    print(f"Error retrieving collections: {e}")

In [None]:
# Aggregation with groupBy
from weaviate.classes.aggregate import GroupByAggregate

# Get the collection
collection = client.collections.get("<COLLECTION_NAME>")

# Perform the aggregation with groupBy
response = collection.aggregate.over_all(
    group_by=GroupByAggregate(prop="<PROP_NAME>"),
    total_count=True
)

# Print the results
for group in response.groups:
    print(f"Value: {group.grouped_by.value}")
    print(f"Path: {group.grouped_by.prop}")
    print(f"Count: {group.total_count}")

In [None]:
# Aggregates collection and logs HTTP debug information.
import logging
import datetime
from weaviate.classes.aggregate import GroupByAggregate

# Set up logging to capture HTTP requests
import http.client as http_client
http_client.HTTPConnection.debuglevel = 1

# Configure logging
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
requests_log = logging.getLogger("requests.packages.urllib3")
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True

# Record timestamp
current_timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
print(f"Starting query at: {current_timestamp}")

try:
    # Get the collection
    collection = client.collections.get("<COLLECTION_NAME>")
    
    # Perform the aggregation with groupBy
    response = collection.aggregate.over_all(
        group_by=GroupByAggregate(prop="<PROPERTY_NAME>"),
        total_count=True
    )
    
    # Print the results
    for group in response.groups:
        print(f"Value: {group.grouped_by.value}")
        print(f"Path: {group.grouped_by.prop}")
        print(f"Count: {group.total_count}")
        
except Exception as e:
    print(f"Error occurred: {str(e)}")
    print(f"Error type: {type(e).__name__}")

print("\nInformation:")
print(f"1. Timestamp: {current_timestamp}")
print("2. URL: Check the debug logs above")
print("3. Headers: Check the debug logs above")

In [None]:
import pandas as pd
def aggregate_collections(client):
    try:
        collections = client.collections.list_all()
        total_tenants_count = 0
        result_data = []
        empty_collections = 0
        empty_tenants = 0
        total_objects_regular = 0
        total_objects_multitenancy = 0
        # track empty collections and tenants
        empty_collections_list = []
        empty_tenants_details = []

        if collections:
            # Store the actual number of collections
            collection_count = len(collections)

            for collection_name in collections:
                collection_row = {"Collection": collection_name, "Count": "", "Tenant": "", "Tenant Count": ""}
                result_data.append(collection_row)

                collection = client.collections.get(collection_name)
                try:
                    # Attempt to get tenants for the collection (check if multi-tenancy is enabled)
                    tenants = collection.tenants.get()

                    if tenants: 
                        tenant_count = len(tenants)
                        total_tenants_count += tenant_count
                        collection_tenant_total = 0

                        for tenant_name, tenant in tenants.items():
                            try:
                                tenant_collection = collection.with_tenant(tenant_name)
                                objects_count = tenant_collection.aggregate.over_all(total_count=True).total_count
                                collection_tenant_total += objects_count
                                if objects_count == 0:
                                    empty_tenants += 1
                                    empty_tenants_details.append({
                                        "Collection": collection_name,
                                        "Tenant": tenant_name,
                                        "Count": 0
                                    })
                                tenant_row = {"Collection": "", "Count": "", "Tenant": tenant_name, "Tenant Count": objects_count}
                                result_data.append(tenant_row)
                            except Exception as e_inner:
                                tenant_row = {"Collection": "", "Count": "", "Tenant": tenant_name, "Tenant Count": f"ERROR: {e_inner}"}
                                result_data.append(tenant_row)
                        
                        total_objects_multitenancy += collection_tenant_total

                    else:
                        objects_count = collection.aggregate.over_all(total_count=True).total_count
                        collection_row["Count"] = objects_count
                        if objects_count == 0:
                            empty_collections += 1
                            empty_collections_list.append({
                                "Collection": collection_name,
                                "Count": 0
                            })
                        total_objects_regular += objects_count

                except Exception as e:
                    if "multi-tenancy is not enabled" in str(e):
                        objects_count = collection.aggregate.over_all(total_count=True).total_count
                        collection_row["Count"] = objects_count
                        if objects_count == 0:
                            empty_collections += 1
                            empty_collections_list.append({
                                "Collection": collection_name,
                                "Count": 0
                            })
                        total_objects_regular += objects_count

            result_df = pd.DataFrame(result_data)

            return {
                "collection_count": collection_count,
                "total_tenants_count": total_tenants_count,
                "empty_collections": empty_collections,
                "empty_tenants": empty_tenants,
                "total_objects_regular": total_objects_regular,
                "total_objects_multitenancy": total_objects_multitenancy,
                "total_objects_combined": total_objects_regular + total_objects_multitenancy,
                "result_df": result_df,
                "empty_collections_list": empty_collections_list,
                "empty_tenants_details": empty_tenants_details
            }

        return {
            "collection_count": 0,
            "total_tenants_count": 0,
            "empty_collections": 0,
            "empty_tenants": 0,
            "total_objects_regular": 0,
            "total_objects_multitenancy": 0,
            "total_objects_combined": 0,
            "result_df": pd.DataFrame(),
            "empty_collections_list": [],
            "empty_tenants_details": []
        }

    except Exception as e:
        return {"error": str(e)}
    
aggregation_result = aggregate_collections(client)

# Print the resulting table in the terminal if there is no error
if "error" in aggregation_result:
    print("Error:", aggregation_result["error"])
else:
    print("Aggregated Collections Table:")
    print(aggregation_result["result_df"])


In [None]:
from weaviate.classes.aggregate import GroupByAggregate
collection = client.collections.get("<COLLECTION_NAME>").with_tenant("<TENANT_NAME>")
response = collection.aggregate.over_all(
    group_by=GroupByAggregate(prop="<PROP_NAME>"),
    total_count=True
)
print(response)

In [None]:
# Aggregates collection and logs HTTP debug information.
import logging
import datetime
from weaviate.classes.aggregate import GroupByAggregate

# Set up logging to capture HTTP requests
import http.client as http_client
http_client.HTTPConnection.debuglevel = 1

# Configure logging
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
requests_log = logging.getLogger("requests.packages.urllib3")
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True

# Record timestamp
current_timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
print(f"Starting query at: {current_timestamp}")

try:
    live = client.misc.live()
    collection = client.collections.get("<COLLECTION_NAME>")

    response = collection.aggregate.over_all(
        group_by=GroupByAggregate(prop="<PROPERTY_NAME>"),
        total_count=True
    )
    print(f"Live: {live}")
    for group in response.groups:
        print(f"Value: {group.grouped_by.value}")
        print(f"Path: {group.grouped_by.prop}")
        print(f"Count: {group.total_count}")
            
except Exception as e:
    print(f"Error occurred: {str(e)}")
    print(f"Error type: {type(e).__name__}")

print("\nInformation:")
print(f"1. Timestamp: {current_timestamp}")
print("2. URL: Check the debug logs above")
print("3. Headers: Check the debug logs above")