In [None]:
# download the weaviate client
%pip install -U weaviate-client

In [None]:
# This script migrates Weaviate collections (Identical Schema) into a single, global multi-tenant collection.
# It performs three main steps:
    # 1. Connect to source and target Weaviate instances and create a global collection with multi-tenancy enabled.
    # 2. Map each source collection into a corresponding tenant in the global collection.
    # 3. Migrate objects from each source collection into its associated tenant.

import weaviate
import weaviate.classes as wvc
from weaviate.classes.init import Auth
from weaviate.classes.config import Configure, Vectorizers
from tqdm import tqdm

# --- CONFIGURATION ---
SRC_URL = "<SOURCE-WEAVIATE-URL>"
SRC_KEY = "<SOURCE-WEAVIATE-API-KEY>"
# You can set TGT_URL = SRC_URL to migrate within the same instance
TGT_URL = "<TARGET-WEAVIATE-URL>"
TGT_KEY = "<TARGET-WEAVIATE-API-KEY>"

GLOBAL_COLLECTION_NAME = "<COLLECTION-NAME>"  # e.g. "GlobalCollection"

def migrate():
    # 1. CONNECT
    client_src = weaviate.connect_to_weaviate_cloud(
        cluster_url=SRC_URL,
        auth_credentials=Auth.api_key(SRC_KEY),
        additional_config=wvc.init.AdditionalConfig(timeout=wvc.init.Timeout(init=60, query=120))
    )
    
    if SRC_URL == TGT_URL:
        client_tgt = client_src
    else:
        client_tgt = weaviate.connect_to_weaviate_cloud(
            cluster_url=TGT_URL,
            auth_credentials=Auth.api_key(TGT_KEY)
        )

    try:
        # 2. GET TEMPLATE
        src_collections = list(client_src.collections.list_all().keys())
        if not src_collections:
            print("No collections found.")
            return

        template_name = src_collections[0]
        print(f"üìã Template Collection: '{template_name}'")
        template = client_src.collections.get(template_name)
        config = template.config.get()

        # 3. BUILD VECTOR CONFIG
        # You can customize this part to include/exclude specific vectorizers
        new_vector_config = []
        
        if config.vector_config:
            for vec_name, vec_data in config.vector_config.items():
                vec_type = vec_data.vectorizer.vectorizer
                
                print(f"   Found vector: '{vec_name}' (Type: {vec_type.value})")
                
                # Dynamic Mapping
                if vec_type == Vectorizers.TEXT2VEC_WEAVIATE:
                    new_vector_config.append(
                        Configure.NamedVectors.text2vec_weaviate(
                            name=vec_name,
                            source_properties=vec_data.vectorizer.source_properties,
                            vectorize_collection_name=False 
                        )
                    )
                elif vec_type == Vectorizers.TEXT2VEC_OPENAI:
                    new_vector_config.append(
                        Configure.NamedVectors.text2vec_openai(
                            name=vec_name,
                            source_properties=vec_data.vectorizer.source_properties
                        )
                    )
                elif vec_type == Vectorizers.TEXT2VEC_COHERE:
                    new_vector_config.append(
                        Configure.NamedVectors.text2vec_cohere(
                            name=vec_name,
                            source_properties=vec_data.vectorizer.source_properties
                        )
                    )
                # Add other vectorizers here if needed (Google, Jina, etc.)

        # 4. CREATE GLOBAL COLLECTION
        if client_tgt.collections.exists(GLOBAL_COLLECTION_NAME):
            print(f"‚ö†Ô∏è  Collection '{GLOBAL_COLLECTION_NAME}' exists. Skipping creation.")
            global_coll = client_tgt.collections.get(GLOBAL_COLLECTION_NAME)
        else:
            print(f"üî® Creating '{GLOBAL_COLLECTION_NAME}'...")
            global_coll = client_tgt.collections.create(
                name=GLOBAL_COLLECTION_NAME,
                vector_config=new_vector_config,
                multi_tenancy_config=Configure.multi_tenancy(enabled=True)
            )

        # 5. MIGRATE DATA
        for old_name in src_collections:
            if old_name == GLOBAL_COLLECTION_NAME: continue
            
            print(f"üöÄ Processing '{old_name}'...")
            
            # Create Tenant
            if old_name not in global_coll.tenants.get():
                global_coll.tenants.create([wvc.tenants.Tenant(name=old_name)])
            
            src_coll = client_src.collections.get(old_name)
            tgt_tenant = global_coll.with_tenant(old_name)

            count = 0
            with tgt_tenant.batch.fixed_size(batch_size=1000) as batch:
                for obj in tqdm(src_coll.iterator(include_vector=True)):
                    batch.add_object(
                        properties=obj.properties,
                        vector=obj.vector,
                        uuid=obj.uuid
                    )
                    count += 1
            
            if len(tgt_tenant.batch.failed_objects) > 0:
                print(f"‚ùå Errors in '{old_name}': {tgt_tenant.batch.failed_objects[0]}")
            else:
                print(f"‚úÖ Migrated {count} objects.")

    finally:
        client_src.close()
        if SRC_URL != TGT_URL: client_tgt.close()

if __name__ == "__main__":
    migrate()