<a href="https://colab.research.google.com/github/Muntasir2179/vector-database-learning/blob/main/VD_Pinecone.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installing and importing dependencies

In [1]:
!pip install pinecone-client

Collecting pinecone-client
  Downloading pinecone_client-3.0.1-py3-none-any.whl (201 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/201.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.0/201.0 kB[0m [31m1.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m201.0/201.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pinecone-client
Successfully installed pinecone-client-3.0.1


In [2]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(
    api_key="af8ce84a-ab95-401c-ba72-b5249afdb37a"
)

# Accessing Indexes

In [3]:
pc.list_indexes()

{'indexes': [{'dimension': 3,
              'host': 'stock-prices-reg6nin.svc.gcp-starter.pinecone.io',
              'metric': 'cosine',
              'name': 'stock-prices',
              'spec': {'pod': {'environment': 'gcp-starter',
                               'pod_type': 'starter',
                               'pods': 1,
                               'replicas': 1,
                               'shards': 1}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [4]:
# delete index if it exists
if "index_name" in pc.list_indexes():
  pc.delete_index(name="index_name")

In [5]:
pc.describe_index('stock-prices')

{'dimension': 3,
 'host': 'stock-prices-reg6nin.svc.gcp-starter.pinecone.io',
 'metric': 'cosine',
 'name': 'stock-prices',
 'spec': {'pod': {'environment': 'gcp-starter',
                  'pod_type': 'starter',
                  'pods': 1,
                  'replicas': 1,
                  'shards': 1}},
 'status': {'ready': True, 'state': 'Ready'}}

# Inserting data into the index

* upsert() - It updates the data if it exists and insert the data if not exists.

In [6]:
index = pc.Index(name="stock-prices")

In [7]:
index.upsert(
  vectors=[
    {"id": "A", "values": [0.1, 0.1, 0.1], "metadata":{"stock_name": "Tesla", "sector": "automobile"}},
    {"id": "B", "values": [0.2, 0.2, 0.2]},
    {"id": "C", "values": [0.3, 0.3, 0.3]},
    {"id": "D", "values": [0.4, 0.4, 0.4]}
  ]
)

{'upserted_count': 4}

In [8]:
index.describe_index_stats()

{'dimension': 3,
 'index_fullness': 5e-05,
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5}

In [9]:
import pandas as pd

data = {
    'id': ['C', 'E'],
    'vector': [[0.4, 0.4, 0.4], [1., 3., 4.]]
}

df = pd.DataFrame(data)
df

Unnamed: 0,id,vector
0,C,"[0.4, 0.4, 0.4]"
1,E,"[1.0, 3.0, 4.0]"


In [10]:
index.upsert(vectors=(zip(df.id, df.vector)))

{'upserted_count': 2}

In [11]:
index.describe_index_stats()

{'dimension': 3,
 'index_fullness': 5e-05,
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5}

# Query the data

> It uses cosine similarith to find the nearest vector.

In [19]:
response = index.query(
    vector=[0.4, 0.4, 0.3],  # query vector
    top_k=1,  # the number of similar results we want to see
    include_values=True,  # by default it is false, it returns the vector as a response with maximum similarity
    include_metadata=True,
    filter={
        "sector": "automobile"
    }
)

In [20]:
response

{'matches': [{'id': 'A',
              'metadata': {'sector': 'automobile', 'stock_name': 'Tesla'},
              'score': 0.991126597,
              'values': [0.1, 0.1, 0.1]}],
 'namespace': '',
 'usage': {'read_units': 6}}