# Diving into Pinecone
- [pinecone.io](https://www.pinecone.io/)
- [Pinecone docs](https://docs.pinecone.io/home)
- [Pinecone example notebooks](https://docs.pinecone.io/examples/notebooks)

In [2]:
%%capture
!pip install -r requirements.txt -q

In [3]:
import os
import openai
import getpass
from pinecone import Pinecone, ServerlessSpec

In [4]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OPENAI_API_KEY: ")
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("Enter your LANGCHAIN_API_KEY: ")
os.environ["PINECONE_API_KEY"] = getpass.getpass("Enter your PINECONE_API_KEY: ")

Enter your OPENAI_API_KEY:  ········
Enter your LANGCHAIN_API_KEY:  ········
Enter your PINECONE_API_KEY:  ········


In [7]:
# Initilizing and authenticating the pinecone client
pc = Pinecone()

In [8]:
pc.list_indexes()

{'indexes': []}

## Working with Pinecone Indexes

In [9]:
pc.list_indexes()

{'indexes': []}

In [17]:
# creat an index

"""
index_name = "langchain"

pc.create_index(
    name=index_name,
    dimension=1536, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)
"""

'\nindex_name = "langchain"\n\npc.create_index(\n    name=index_name,\n    dimension=1536, # Replace with your model dimensions\n    metric="cosine", # Replace with your model metric\n    spec=ServerlessSpec(\n        cloud="aws",\n        region="us-east-1"\n    ) \n)\n'

In [28]:
# deleting an index
if index_name in pc.list_indexes().names():
    print(f'Deleting index {index_name} ... ')
    pc.delete_index(index_name)
    print('Done')
else:
    print(f'Index {index_name} does not exist!')

Deleting index langchain ... 
Done


In [29]:
# creating a Serverless Pinecone index 
# starter free plan permits 1 project, up to 5 indexes, up to 100 namespaces per index
index_name = 'langchain'

if index_name not in pc.list_indexes().names():
    print(f'Creating index {index_name}')
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric='cosine',
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        ) 
    )
    print('Index created! 😊')
else:
    print(f'Index {index_name} already exists!')

Creating index langchain
Index created! 😊


In [11]:
pc.list_indexes()

{'indexes': [{'deletion_protection': 'disabled',
              'dimension': 1536,
              'host': 'langchain-9oj8ute.svc.aped-4627-b74a.pinecone.io',
              'metric': 'cosine',
              'name': 'langchain',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [12]:
# getting a complete description of a specific index:
pc.describe_index(index_name)

{'deletion_protection': 'disabled',
 'dimension': 1536,
 'host': 'langchain-9oj8ute.svc.aped-4627-b74a.pinecone.io',
 'metric': 'cosine',
 'name': 'langchain',
 'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
 'status': {'ready': True, 'state': 'Ready'}}

In [13]:
# getting a list with index names
pc.list_indexes().names()

['langchain']

In [16]:
index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

## Working with Vectors

In [30]:
import random

vectors = [[random.random() for _ in range(1536)] for v in range(5)]
print(vectors[0][:2])

[0.8885520972547745, 0.19287054128056125]


In [31]:
ids = list("abcde")

index_name = "langchain"
index = pc.Index(index_name)
# inserting the vectors into an index using the upsert method
index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [22]:
# updating the vectors
index.upsert(vectors=[("c", [0.5] * 1536)])

{'upserted_count': 1}

In [24]:
# fetching vectors
# index.fetch(ids=["c", "d"])

In [25]:
# deleting vectors by id
index.delete(ids=["b", "c"])

{}

In [32]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [27]:
# vector not existing returns an empty vector
index.fetch(ids=["x"])

{'namespace': '', 'usage': {'read_units': 1}, 'vectors': {}}

In [33]:
# Querying a vector
query_vector = [random.random() for _ in range(1536)]

In [34]:
index.query(
    vector=query_vector,
    top_k=3,
    include_values=False
)

{'matches': [{'id': 'e', 'score': 0.761978, 'values': []},
             {'id': 'c', 'score': 0.753419757, 'values': []},
             {'id': 'a', 'score': 0.749340713, 'values': []}],
 'namespace': '',
 'usage': {'read_units': 5}}

## Namespaces

In [36]:
vectors = [[random.random() for _ in range(1536)] for v in range(3)]
ids = list("xyz")
index.upsert(vectors=zip(ids, vectors), namespace="xyz-namespace")

{'upserted_count': 3}

In [37]:
vectors = [[random.random() for _ in range(1536)] for v in range(2)]
ids = list("qp")
index.upsert(vectors=zip(ids, vectors), namespace="qp-namespace")

{'upserted_count': 2}

In [38]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 5},
                'qp-namespace': {'vector_count': 2},
                'xyz-namespace': {'vector_count': 3}},
 'total_vector_count': 10}

In [39]:
index.fetch(ids=["x"])

{'namespace': '', 'usage': {'read_units': 1}, 'vectors': {}}

In [42]:
# index.fetch(ids=["x"], namespace="xyz-namespace")

In [43]:
index.delete(ids=["x"], namespace="xyz-namespace")

{}

In [44]:
index.delete(delete_all=True, namespace="xyz-namespace")

{}

In [45]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 5}, 'qp-namespace': {'vector_count': 2}},
 'total_vector_count': 7}