[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/experimental/merge-namespaces/merge-namespaces.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/experimental/merge-namespaces/merge-namespaces.ipynb)

# Merging Namespaces in a Pinecone Index

This notebook queries vectors out of two namespaces `ns1` and `ns2` and upserts them to a new namespace named `merged`.

Please note this code is **experimental** and not guaranteed to work by Pinecone. Test thoroughly before using in production.

In [None]:
%pip install -qU pinecone-notebooks pinecone-client[grpc]

In [None]:
from pinecone_notebooks.colab import Authenticate

Authenticate()

In [None]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import os

# The generated API key is available in the PINECONE_API_KEY environment variable
api_key = os.environ.get('PINECONE_API_KEY')

# Use the API key to initialize the Pinecone client
pc = Pinecone(api_key=api_key)

# Connect to your index
index_name = "namespace-test" # replace with the correct index name
index = pc.Index(index_name)

In [None]:
import random

dimension = index.describe_index_stats()['dimension']
# Function to fetch all vectors from a namespace
def fetch_all_vectors(namespace):
    try:
      count = index.describe_index_stats()['namespaces'][namespace]['vector_count']
      if count > 10000:
          raise ValueError("Namespaces larger than 10000 vectors need to be handled iteratively.")
    except ValueError as e:
        print(f"Error: {e}")
        return []
    random_vecs = [random.random() for _ in range(dimension)]
    response = index.query(
        namespace=namespace,
        vector=random_vecs,
        top_k=count,
        include_values=True,
        include_metadata=True
    )
    return response['matches']

# Fetch vectors from ns1 and ns2
vectors_ns1 = fetch_all_vectors("ns1")
vectors_ns2 = fetch_all_vectors("ns2")

In [None]:
# Convert fetched vectors to the required upsert format
def format_vectors_for_upsert(fetched_vectors):
    return [{"id": match['id'], 
            "values": match['values'], 
            "metadata": match['metadata']} for match in fetched_vectors]

formatted_vectors_ns1 = format_vectors_for_upsert(vectors_ns1)
formatted_vectors_ns2 = format_vectors_for_upsert(vectors_ns2)

print(f"Preparing to upsert {len(formatted_vectors_ns1)} vectors from ns1 and \
{len(formatted_vectors_ns2)} vectors from ns2")

Note that any vectors with overlapping IDs between `ns1` and `ns2` will be overwritten by the `ns2` upsert.

In [None]:
from itertools import islice

# Upsert vectors in batches of 100
def chunks(data, size=100):
    it = iter(data)
    for chunk in iter(lambda: tuple(islice(it, size)), ()):
        yield chunk

# Upsert vectors into the merged namespace
target_namespace = 'merged'
for batch in chunks(formatted_vectors_ns1):
    index.upsert(vectors=batch, namespace=target_namespace)

for batch in chunks(formatted_vectors_ns2):
    index.upsert(vectors=batch, namespace=target_namespace)

print(f"Upserted {len(formatted_vectors_ns1)} vectors from ns1 and \
{len(formatted_vectors_ns2)} vectors from ns2 into {target_namespace}")