In [1]:
import os
import pinecone

  from tqdm.autonotebook import tqdm


In [2]:
pinecone_key = os.getenv("PINECONE_API_KEY")

### Initialization

In [3]:
pinecone.init(
    api_key=pinecone_key,
    environment='gcp-starter'
)

### Check for Index Presence

In [4]:
indexes = pinecone.list_indexes()
if len(indexes) > 0:
    index_name = indexes[0]

### Eventual Consistency
Pinecone is eventually consistent. So it takes a while for the data to be generally available. To handle this we try the following:

1. Check the vector count from the `describe_index_stats` method
2. Add a wait to ensure vectors are available.
3. Retry.

In [5]:
index = pinecone.Index(index_name)

In [6]:
namespace = ''
stats = index.describe_index_stats()
vector_count = stats['namespaces'][namespace]['vector_count']
vector_count

In [19]:
pinecone.GRPCIndex(pinecone.list_indexes()[0])

<pinecone.core.grpc.index_grpc.GRPCIndex at 0x2aa222b5c30>

In [8]:

for retry in range(3):
    if vector_count > 0:
        # Index namespace populated
        ## Begin querying
        pass

    else:
        # Wait
        pass

### Delete Namespaces
This may be necessary when we want to get rid of cluttered namespaces. This is in the service of ensuring a clean upsert whenever new documents are uploaded.
The downside of this may be delays before vectors are available for query.


In [10]:
index.delete(delete_all=True, namespace='')

{}

In [15]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.00335,
 'namespaces': {'A1PresentationAZ900AzureFundamentalspdf': {'vector_count': 272},
                'E1ExngTextOnlypdf': {'vector_count': 16},
                'IOTMLpdf': {'vector_count': 47}},
 'total_vector_count': 335}