In [None]:
# download the weaviate client
%pip install -U weaviate-client

In [None]:
# This script migrates Weaviate collections (Identical Schema) into a single, global multi-tenant collection.
# It performs three main steps:
# 1. Connect to source and target Weaviate instances and create a global collection with multi-tenancy enabled.
# 2. Map each source collection into a corresponding tenant in the global collection.
# 3. Migrate objects from each source collection into its associated tenant.

import weaviate
import weaviate.classes as wvc
from tqdm import tqdm
from weaviate.config import AdditionalConfig, Timeout

# Step 1: Connect and create global collection
def connect_and_create_global(src_url=None, tgt_url=None, global_collection_name=None):
    """
    Connect to source and target Weaviate instances and create a global multi-tenant collection.
    
    Args:
        src_url: Source Weaviate URL
        tgt_url: Target Weaviate URL
        global_collection_name: Name for the global collection (will prompt if None)
    
    Returns:
        Tuple of (source client, target client, global collection)
    """
    # Connect to source Weaviate
    client_src = weaviate.connect_to_local(
        auth_credentials=weaviate.auth.AuthApiKey("<ADMIN_API_KEY>"),
        headers={
            "X-OpenAI-Api-Key": "OPENAI_API_KEY"
        },
        additional_config=AdditionalConfig(
            timeout=Timeout(init=30, query=120, insert=240)
        )
    ) if src_url is None else weaviate.Client(src_url)
    
    # Connect to target Weaviate
    if tgt_url is None:
        client_tgt = weaviate.connect_to_weaviate_cloud(
            cluster_url="ENDPOINT_URL",
            auth_credentials=weaviate.auth.AuthApiKey("ADMIN_API_KEY"),
            headers={
                "X-OpenAI-Api-Key": "OPENAI_API_KEY"
            },
            additional_config=AdditionalConfig(
                timeout=Timeout(init=30, query=120, insert=240)
            )
        )
    else:
        client_tgt = weaviate.Client(tgt_url)
    
    # Get or prompt for global collection name
    if global_collection_name is None:
        global_collection_name = input("Enter name for global collection: ")
    
    # Create global collection with multi-tenancy enabled
    print(f"Creating global collection '{global_collection_name}' with multi-tenancy...")
    global_collection = client_tgt.collections.create(
        name=global_collection_name,
        multi_tenancy_config=wvc.config.Configure.multi_tenancy(enabled=True),
    )
    
    return client_src, client_tgt, global_collection

# Step 2: Create tenants from source collections
def create_tenants_from_collections(client_src, global_collection):
    """
    Get all collections from source and create tenants in global collection.
    
    Args:
        client_src: Source Weaviate client
        global_collection: Target global collection with multi-tenancy
    
    Returns:
        Dictionary mapping collection names to tenant objects
    """
    # Get all collections from source
    collections = client_src.collections.list_all()
    collection_names = list(collections.keys())
    
    print(f"Found {len(collection_names)} collections in source: {', '.join(collection_names)}")
    
    # Create tenants based on collection names
    tenants = []
    for name in collection_names:
        tenants.append(wvc.tenants.Tenant(name=name))
    
    print(f"Creating {len(tenants)} tenants in global collection...")
    global_collection.tenants.create(tenants)
    
    # Create mapping from collection name to tenant
    tenant_map = {tenant.name: tenant for tenant in tenants}
    return tenant_map, collections

# Step 3: Migrate data
def migrate_collection_data(source_collection, target_tenant_collection):
    """
    Migrate data from a source collection to a target tenant collection.
    
    Args:
        source_collection: Source collection
        target_tenant_collection: Target tenant collection
        
    Returns:
        Number of objects migrated
    """
    count = 0
    with target_tenant_collection.batch.fixed_size(batch_size=100) as batch:
        for obj in tqdm(source_collection.iterator(include_vector=True)):
            batch.add_object(
                properties=obj.properties,
                vector=obj.vector["default"] if "default" in obj.vector else obj.vector,
                uuid=obj.uuid
            )
            count += 1
    
    return count

def main():
    # Step 1: Connect and create global collection
    client_src, client_tgt, global_collection = connect_and_create_global()
    
    # Step 2: Create tenants from source collections
    tenant_map, source_collections = create_tenants_from_collections(client_src, global_collection)
    
    # Step 3: Migrate data
    total_objects = 0
    for tenant_name in source_collections:
        print(f"Migrating data from collection '{tenant_name}' to tenant...")
        
        # Get source collection
        source_collection = client_src.collections.get(tenant_name)
        
        # Get target tenant collection
        target_tenant_collection = global_collection.with_tenant(tenant_name)
        
        # Migrate data
        migrated = migrate_collection_data(source_collection, target_tenant_collection)
        total_objects += migrated
        
        print(f"Migrated {migrated} objects from '{tenant_name}'")
    
    print(f"Migration complete: {total_objects} total objects migrated to global collection")
    
    # Close connections
    client_src.close()
    client_tgt.close()

if __name__ == "__main__":
    main()