In [None]:
# download the weaviate client
%pip install -U weaviate-client

In [None]:
import weaviate
import weaviate.classes as wvc
from weaviate.client import WeaviateClient
import os
from weaviate.config import AdditionalConfig, Timeout

# Connect source and target clusters for migration
def connect_to_weaviate(is_source=True, is_cloud=True):
    if is_cloud:
        if is_source:
            cluster_url = "<SOURCE_CLUSTER_URL>"
            api_key = "<SOURCE_CLUSTER_API_KEY>"
            headers = {"X-OpenAI-Api-Key": "<OPENAI_API_KEY>"}
        else:
            cluster_url = "<TARGET_CLUSTER_URL>"
            api_key = "<TARGET_CLUSTER_API_KEY>"
            headers = {"X-OpenAI-Api-Key": "<OPENAI_API_KEY>"}
        return weaviate.connect_to_weaviate_cloud(
            cluster_url=cluster_url,
            auth_credentials=weaviate.auth.AuthApiKey(api_key),
            skip_init_checks=True,
            headers=headers,
            additional_config=AdditionalConfig(
		        timeout=Timeout(init=120, query=240, insert=480),
		    )
        )
    else:
        if is_source:
            return weaviate.connect_to_local(
                skip_init_checks=True,
                headers=headers,
                additional_config=AdditionalConfig(
                    timeout=Timeout(init=120, query=240, insert=480),
                )
            )
        else:
            return weaviate.connect_to_local(
                port="<PORT>",
                grpc_port="<GRPC_PORT>",
                headers=headers
            )

# Retrieve collections from the source cluster and determine Multitenancy status dynamically.
def get_collections_mt_status(client: WeaviateClient) -> dict:
    mt_status = {}
    try:
        collections = client.collections.list_all()
        if collections:
            print("Collections in source cluster:")
            for collection_name in collections.keys():
                try:
                    collection = client.collections.get(collection_name)
                    try:
                        # Try to retrieve tenant info; if it works, MT is enabled.
                        _ = collection.tenants.get()
                        print(f"- {collection_name}: MT is enabled")
                        mt_status[collection_name] = True
                    except Exception as tenant_error:
                        if "multi-tenancy is not enabled" in str(tenant_error):
                            print(f"- {collection_name}: MT is not enabled")
                            mt_status[collection_name] = False
                        else:
                            print(f"Error checking multi-tenancy for '{collection_name}': {tenant_error}")
                            mt_status[collection_name] = False
                except Exception as e:
                    print(f"Error retrieving collection '{collection_name}': {e}")
        else:
            print("No collections found.")
    except Exception as e:
        print(f"Error retrieving collections: {e}")
    return mt_status

# Create collections in the target cluster with the corresponding multi-tenancy setting.
def create_collections_with_mt_status(client: WeaviateClient, collections_status: dict):
    for name, mt_enabled in collections_status.items():
        client.collections.create(
            name=name,
            multi_tenancy_config=wvc.config.Configure.multi_tenancy(enabled=mt_enabled, auto_tenant_creation=mt_enabled),
        )
        print(f"Collection '{name}' created with multi-tenancy enabled = {mt_enabled}.")

# Connect Weaviate with source and target clusters.
client_source_cluster = connect_to_weaviate(is_source=True, is_cloud=True)
client_target_cluster = connect_to_weaviate(is_source=False, is_cloud=True)

# Retrieve collection names and their MT status from the source cluster.
collections_status = get_collections_mt_status(client_source_cluster)

# Create the collections in the target cluster using the determined multi-tenancy settings.
create_collections_with_mt_status(client_target_cluster, collections_status)

In [None]:
# Check if multi-tenancy is enabled for a collection and list tenants.
def check_collections_multi_tenancy(client):
    """Returns detailed multi-tenancy info for all collections."""
    try:
        collections = client.collections.list_all()
        collections_info = {}
        
        if collections:
            print(f"Total collections: {len(collections)}")
            print("-" * 50)

            for collection_name in collections:
                collection_info = {"multi_tenancy_enabled": False, "tenants": []}
                collection = client.collections.get(collection_name)
                print(f"Checking: {collection_name}")

                try:
                    # Attempt to get tenants
                    tenants = collection.tenants.get()
                    collection_info["multi_tenancy_enabled"] = True
                    collection_info["tenants"] = list(tenants.keys())
                    
                    print(f"  MT: Enabled | Tenants: {len(collection_info['tenants'])}")
                except Exception as e:
                    if "multi-tenancy is not enabled" in str(e):
                        print("  MT: Disabled")
                    else:
                        print(f"  Error: {str(e)}")

                collections_info[collection_name] = collection_info
                print("-" * 50)

            return collections_info

        else:
            print("No collections found")
            return {}

    except Exception as e:
        print(f"Global error: {str(e)}")
        return {"error": str(e)}
    
# Migrate data
def replicate_data(client_source, client_target, collections_info):
    """Migrates data using collection/tenant info from check_collections_multi_tenancy"""
    for collection_name, info in collections_info.items():
        is_mt_enabled = info["multi_tenancy_enabled"]
        tenants = info["tenants"]

        source_col = client_source.collections.get(collection_name)
        target_col = client_target.collections.get(collection_name)

        if is_mt_enabled and tenants:
            print(f"\nMigrating MT collection: {collection_name}")
            
            # Create tenants in the target collection
            tenants_tgt = [wvc.tenants.Tenant(name=tenant_name) for tenant_name in tenants]
            target_col.tenants.create(tenants_tgt)

            for tenant_name in tenants:
                try:
                    # Prepare tenant-specific collections
                    src_tenant = source_col.with_tenant(tenant_name)
                    tgt_tenant = target_col.with_tenant(tenant_name)

                    # Migrate data
                    with tgt_tenant.batch.fixed_size(batch_size=100) as batch:
                        for obj in src_tenant.iterator(include_vector=True):
                            batch.add_object(
                                properties=obj.properties,
                                vector=obj.vector["default"],
                                uuid=obj.uuid,
                            )
                            print(f"Object with UUID {obj.uuid} added for tenant {tenant_name}")

                except Exception as e:
                    print(f"Error migrating {tenant_name}: {str(e)}")

        else:
            print(f"\nMigrating non-MT collection: {collection_name}")
            try:
                with target_col.batch.fixed_size(batch_size=250) as batch:
                    for obj in source_col.iterator(include_vector=True):
                        batch.add_object(
                            properties=obj.properties,
                            vector=obj.vector["default"],
                            uuid=obj.uuid
                        )
                        print(f"Object with UUID {obj.uuid} added for collection {collection_name}")
            
            except Exception as e:
                print(f"Error migrating collection: {str(e)}")


# Connect to source and target clusters
client_source_cluster = connect_to_weaviate(is_source=True, is_cloud=True)
client_target_cluster = connect_to_weaviate(is_source=False, is_cloud=True)
    
# First get collection info from source
source_info = check_collections_multi_tenancy(client_source_cluster)

# Then migrate data
if isinstance(source_info, dict) and "error" not in source_info:
    replicate_data(client_source_cluster, client_target_cluster, source_info)
else:
    print("Couldn't get collection info")