### Diving into Pinecone

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [5]:
!pip3 install pinecone-client

Defaulting to user installation because normal site-packages is not writeable
Collecting pinecone-client
  Obtaining dependency information for pinecone-client from https://files.pythonhosted.org/packages/df/d4/cffbb61236c6c1d7510e835c1ff843e4e7d705ed59d21c0e5b6dc1cb4fd8/pinecone_client-2.2.4-py3-none-any.whl.metadata
  Downloading pinecone_client-2.2.4-py3-none-any.whl.metadata (7.8 kB)
Collecting loguru>=0.5.0 (from pinecone-client)
  Obtaining dependency information for loguru>=0.5.0 from https://files.pythonhosted.org/packages/03/0a/4f6fed21aa246c6b49b561ca55facacc2a44b87d65b8b92362a8e99ba202/loguru-0.7.2-py3-none-any.whl.metadata
  Downloading loguru-0.7.2-py3-none-any.whl.metadata (23 kB)
Collecting dnspython>=2.0.0 (from pinecone-client)
  Obtaining dependency information for dnspython>=2.0.0 from https://files.pythonhosted.org/packages/f6/b4/0a9bee52c50f226a3cbfb54263d02bb421c7f2adc136520729c2c689c1e5/dnspython-2.4.2-py3-none-any.whl.metadata
  Downloading dnspython-2.4.2-py3-n

In [6]:
import pinecone

pinecone.init(
    api_key=os.environ.get('PINECONE_API_KEY'),
    environment=os.environ.get('PINECONE_ENV')
)

  from tqdm.autonotebook import tqdm


In [7]:
pinecone.info.version()

VersionResponse(server='2.0.11', client='2.2.4')

#### Pinecone Indexes

In [8]:
pinecone.list_indexes()

[]

In [15]:
index_name = 'langchain-pinecone'

if index_name not in pinecone.list_indexes():
    print(f'Creating index {index_name} ...')
    pinecone.create_index(index_name, dimension=1536, metric='cosine', pods=1, pod_type='p1.x2')
    print('Done')
else:
    print('Index {index_name} already exists!')

Creating index langchain-pinecone ...
Done


In [11]:
pinecone.describe_index(index_name)

IndexDescription(name='langchain-pinecone', metric='cosine', replicas=1, dimension=1536.0, shards=1, pods=1, pod_type='starter', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')

In [12]:
#E deleting an index

index_name = 'langchain-pinecone'

if index_name in pinecone.list_indexes():
    print(f'Deleting index {index_name} ...')
    pinecone.delete_index(index_name)
else:
    print(f'Index {index_name} does not exist!')

Deleting index langchain-pinecone ...


In [16]:
index_name = 'langchain-pinecone'

index = pinecone.Index(index_name)

In [17]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [19]:
# inserting into a Pinecone index
import random

vectors = [[random.random() for _ in range(1536)] for v in range(5)]
# vectors

ids = list('abcde')

In [55]:
index_name = 'langchain-pinecone'
index = pinecone.Index(index_name)
index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [22]:
# updating a vector

index.upsert(vectors=[('c', [0.3] * 1536)])

{'upserted_count': 1}

In [23]:
# fetching a vector

index = pinecone.Index('langchain-pinecone')
index.fetch(ids=['c','d'])

{'namespace': '',
 'vectors': {'c': {'id': 'c',
                   'metadata': {},
                   'values': [0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
                              0.3,
       

In [24]:
# deleting vectors

index.delete(ids=['b','c'])

{}

In [26]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 3e-05,
 'namespaces': {'': {'vector_count': 3}},
 'total_vector_count': 3}

In [28]:
index.fetch(ids=['b'])

{'namespace': '', 'vectors': {}}

In [41]:
# index.delete(delete_all=True) # not working

index.delete(ids=['a', 'b', 'c', 'd', 'e'])

{}

In [54]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [63]:
# querying
queries = [[random.random() for _ in range(1536)] for v in range(2)]

In [64]:
len(queries)

2

In [67]:
index.query(
    # queries=queries, # queries parameter is deprectaed
    vector=queries,
    top_k=3,
    include_values=False
)

ApiException: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'content-type': 'application/json', 'Content-Length': '104', 'date': 'Mon, 04 Dec 2023 08:00:32 GMT', 'x-envoy-upstream-service-time': '11', 'server': 'envoy', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: {"code":3,"message":"Vector dimension 3072 does not match the dimension of the index 1536","details":[]}
