In [None]:
# download the weaviate client
%pip install -U weaviate-client

In [None]:
import weaviate, os
from weaviate.config import AdditionalConfig, Timeout
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve environment variables
CLUSTER_URL = os.getenv("CLUSTER_URL")
API_KEY = os.getenv("API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
COHERE_API_KEY = os.getenv("COHERE_API_KEY")

# Connect to Weaviate
client = weaviate.connect_to_weaviate_cloud(
	cluster_url=CLUSTER_URL,
	auth_credentials=weaviate.auth.AuthApiKey(API_KEY),
	headers={
		"X-OpenAI-Api-Key": OPENAI_API_KEY,
		"X-Cohere-Api-Key": COHERE_API_KEY
	},
	additional_config=AdditionalConfig(
		timeout=Timeout(init=30, query=60, insert=120)
			)
)

ready = client.is_ready()
server_version = client.get_meta()["version"]
client_version = weaviate.__version__

live = client.is_live()
connected = client.is_connected()

print(f"Weaviate client is ready: {ready}")
print(f"Weaviate Client Version: {client_version}")
print(f"Weaviate Server Version: {server_version}")
print(f"live: {live}")
print(f"Connected: {connected}")

In [None]:
# Get the node and shard information into a table
from prettytable import PrettyTable

node_info = client.cluster.nodes(output="verbose")
print(node_info)

shard_table = PrettyTable()

shard_table.field_names = ["Node Name", "Collection", "Shard Name", "Object Count", "Index Status", "Loaded"]

for node in node_info:
    for shard in node.shards:
        shard_table.add_row([node.name, shard.collection, shard.name, shard.object_count, shard.vector_indexing_status, shard.loaded])

print(shard_table)

In [None]:
from collections import defaultdict
import pandas as pd

def check_shard_consistency(node_info):
    """
    Check consistency of shard object counts across nodes.

    :param node_info: Output of client.cluster.nodes(output="verbose").
    """
    # Group shards by collection and shard name
    shard_data = defaultdict(list)
    for node in node_info:
        for shard in node.shards:  # Access shards as attributes
            shard_key = (shard.collection, shard.name)
            shard_data[shard_key].append((node.name, shard.object_count))

    # Check for inconsistencies
    inconsistent_shards = []
    for (collection, shard_name), details in shard_data.items():
        object_counts = [obj_count for _, obj_count in details]
        if len(set(object_counts)) > 1:  # Inconsistent if counts are not identical
            for node_name, object_count in details:
                inconsistent_shards.append({
                    "Collection": collection,
                    "Shard": shard_name,
                    "Node": node_name,
                    "Object Count": object_count,
                })

    # Display results
    if inconsistent_shards:
        df_inconsistent_shards = pd.DataFrame(inconsistent_shards)
        print("Inconsistent Shards Found:")
        print(df_inconsistent_shards.to_string(index=False))
    else:
        print("All shards are consistent.")

# Example usage:

# Fetch node information dynamically from your cluster
node_info = client.cluster.nodes(output="verbose")
# Run the function with your node_info
check_shard_consistency(node_info)