In [None]:
# download the weaviate client
%pip install -U weaviate-client

In [None]:
import weaviate, os
from weaviate.config import AdditionalConfig, Timeout
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve environment variables
CLUSTER_URL = os.getenv("CLUSTER_URL")
API_KEY = os.getenv("API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
COHERE_API_KEY = os.getenv("COHERE_API_KEY")

# Connect to Weaviate
client = weaviate.connect_to_weaviate_cloud(
	cluster_url=CLUSTER_URL,
	auth_credentials=weaviate.auth.AuthApiKey(API_KEY),
	headers={
		"X-OpenAI-Api-Key": OPENAI_API_KEY,
		"X-Cohere-Api-Key": COHERE_API_KEY
	},
	additional_config=AdditionalConfig(
		timeout=Timeout(init=30, query=60, insert=120)
			)
)

ready = client.is_ready()
server_version = client.get_meta()["version"]
client_version = weaviate.__version__

live = client.is_live()
connected = client.is_connected()

print(f"Weaviate client is ready: {ready}")
print(f"Weaviate Client Version: {client_version}")
print(f"Weaviate Server Version: {server_version}")
print(f"live: {live}")
print(f"Connected: {connected}")

In [None]:

collection = client.collections.get("Movies")
total_objects = 0
for item in collection.iterator(cache_size=200):
    total_objects += 1
    print(f"Object: {total_objects}")
    print(item.uuid, item.properties)
print(f"Total objects fetched: {total_objects}")

In [None]:
# List the collection names in Weaviate cluster
try:
    collections = client.collections.list_all()
    if collections:
        print("Collections in Weaviate:")
        # Loop through each collection in the instance
        for collection_name in collections.keys():
            print(f"- {collection_name}")
    else:
        print("No collections found.")
except Exception as e:
    print(f"Error retrieving collections: {e}")

In [None]:
# List all collection names and Tenant names in Weaviate cluster
try:
	collections = client.collections.list_all()
	if collections:
		collection_count = len(collections)
		print(f"Total number of collections: {collection_count}\n")
		# Iterate through each collection name
		for collection_name in collections:
			print(f"Collection: {collection_name}")
			# Get the collection object
			collection = client.collections.get(collection_name)
			try:
				# Attempt to get tenants for the collection (check if multi-tenancy is enabled)
				tenants = collection.tenants.get()
				if tenants: # Multi-tenancy is enabled and tenants are found
					tenant_count = collection.tenants.get()
					print(f"Total number of tenants in this collection is {len(tenant_count)}")
					for tenant_name, tenant in tenants.items():
						tenant_collection = collection.with_tenant(tenant_name)
						response = tenant_collection.aggregate.over_all(total_count=True).total_count
						print(f"        Tenant: {tenant_name} = {response} objects")
				else:
					# If no tenants, just print the object count for the collection itself
					response = collection.aggregate.over_all(total_count=True).total_count
					print(f"= {response} objects.")

			except Exception as e:
				# Handle errors when retrieving tenants (e.g., multi-tenancy not enabled)
				if "multi-tenancy is not enabled" in str(e):
					response = collection.aggregate.over_all(total_count=True).total_count
					print(f"= {response} objects")

	else:
		print("No collections found.")

except Exception as e:
	print(f"Error retrieving collections: {e}")


In [None]:
import pandas as pd
# Retrieve all collections from Weaviate, process them in batches, and display the results in a DataFrame.
def aggregate_collections_in_batches(client, batch_size=10):

    all_collections = client.collections.list_all()
    all_collections = list(all_collections)

    if not all_collections:
        print("No collections found.")
        return pd.DataFrame()

    # This will hold rows for our final DataFrame
    result_rows = []

    # Process collections in smaller batches
    total_collections = len(all_collections)
    print(f"Found {total_collections} collections. Processing in batches of {batch_size}.\n")

    for start_idx in range(0, total_collections, batch_size):
        # Slice the list of collections for this batch
        collections_batch = all_collections[start_idx : start_idx + batch_size]
        print(f"Processing batch {start_idx+1} to {start_idx+len(collections_batch)} ...")
        # Process each collection in this batch
        for collection_name in collections_batch:
            try:
                collection = client.collections.get(collection_name)
            except Exception as e:
                print(f"Error retrieving collection '{collection_name}': {e}")
                result_rows.append({
                    "Collection": collection_name,
                    "Collection Objects": None,
                    "Tenant": "ERROR",
                    "Tenant Objects": str(e)
                })
                continue

            # Attempt to determine if multi-tenancy is enabled and get tenants
            try:
                tenants = collection.tenants.get()
                if tenants:
                    tenant_count = len(tenants)
                    # Put a summary row indicating multi-tenancy
                    result_rows.append({
                        "Collection": collection_name,
                        "Collection Objects": None,
                        "Tenant": "MULTI-TENANCY",
                        "Tenant Objects": f"{tenant_count} tenants"
                    })

                    # For each tenant, get the object count
                    for tenant_name, tenant_obj in tenants.items():
                        try:
                            tenant_collection = collection.with_tenant(tenant_name)
                            response = tenant_collection.aggregate.over_all(total_count=True).total_count
                            result_rows.append({
                                "Collection": collection_name,
                                "Collection Objects": None,
                                "Tenant": tenant_name,
                                "Tenant Objects": response
                            })
                        except Exception as e_inner:
                            print(f"Error aggregating tenant '{tenant_name}' in collection '{collection_name}': {e_inner}")
                            result_rows.append({
                                "Collection": collection_name,
                                "Collection Objects": None,
                                "Tenant": tenant_name,
                                "Tenant Objects": f"ERROR: {e_inner}"
                            })
                else:
                    # No tenants or empty => single-tenancy scenario
                    try:
                        response = collection.aggregate.over_all(total_count=True).total_count
                        result_rows.append({
                            "Collection": collection_name,
                            "Collection Objects": response,
                            "Tenant": None,
                            "Tenant Objects": None
                        })
                    except Exception as e_inner:
                        print(f"Error aggregating collection '{collection_name}': {e_inner}")
                        result_rows.append({
                            "Collection": collection_name,
                            "Collection Objects": "ERROR",
                            "Tenant": None,
                            "Tenant Objects": f"ERROR: {e_inner}"
                        })
            except Exception as e:
                error_msg = str(e).lower()
                if "multi-tenancy is not enabled" in error_msg:
                    # Fallback: get the total count for the entire collection
                    try:
                        response = collection.aggregate.over_all(total_count=True).total_count
                        result_rows.append({
                            "Collection": collection_name,
                            "Collection Objects": response,
                            "Tenant": None,
                            "Tenant Objects": None
                        })
                    except Exception as e_inner:
                        print(f"Error aggregating collection '{collection_name}': {e_inner}")
                        result_rows.append({
                            "Collection": collection_name,
                            "Collection Objects": "ERROR",
                            "Tenant": None,
                            "Tenant Objects": f"ERROR: {e_inner}"
                        })
                else:
                    print(f"Unexpected error retrieving tenants for '{collection_name}': {e}")
                    result_rows.append({
                        "Collection": collection_name,
                        "Collection Objects": "ERROR",
                        "Tenant": "ERROR",
                        "Tenant Objects": str(e)
                    })

    # Finished processing all batches; convert to DataFrame
    result_df = pd.DataFrame(result_rows)
    return result_df

aggregate_collections_in_batches(client, batch_size=2)