## Pineconedb Vector Database

In [3]:
# Load environment variables
from dotenv import load_dotenv

load_dotenv()

True

In [5]:
texts = [
  "John was born in Jamshedpur, Jharkhand, India, to a modest family.",
  "His early life was marked by financial hardship and limited resources.",
  "He believed education could transform his life and pursued it relentlessly.",
  "John earned a Computer Science and Engineering degree.",
  "He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.",
  "He gained expertise in SAP WebDynpro, FiorI-UI5 HANA, Java, Big Data, and Data Analytics.",
  "John remained committed to making education accessible for everyone.",
  "His teaching empowered countless students to build their careers.",
  "Many students credit him with changing their lives through affordable learning.",
  "His journey is a testament to triumph over adversity and the power of knowledge."
]

texts

['John was born in Jamshedpur, Jharkhand, India, to a modest family.',
 'His early life was marked by financial hardship and limited resources.',
 'He believed education could transform his life and pursued it relentlessly.',
 'John earned a Computer Science and Engineering degree.',
 'He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.',
 'He gained expertise in SAP WebDynpro, FiorI-UI5 HANA, Java, Big Data, and Data Analytics.',
 'John remained committed to making education accessible for everyone.',
 'His teaching empowered countless students to build their careers.',
 'Many students credit him with changing their lives through affordable learning.',
 'His journey is a testament to triumph over adversity and the power of knowledge.']

In [6]:
# Define a function to generate embedddings
import os
import requests
import numpy as np

EURIAI_API_KEY = os.getenv("EURIAI_API_KEY")

def generate_embeddings(text):
    url = "https://api.euron.one/api/v1/euri/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {EURIAI_API_KEY}"
    }
    payload = {
        "input": text,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    
    embedding = np.array(data['data'][0]['embedding'])
    return embedding

In [7]:
text = "The weather is sunny today."
embedding = generate_embeddings(text)

embedding

array([ 0.01067466, -0.03126568,  0.00883722, ..., -0.03035571,
        0.01221461,  0.02594584], shape=(1536,))

In [9]:
# Embeddings texts
embeddings = [generate_embeddings(i).tolist() for i in texts]
len(embeddings)

10

In [10]:
print(embeddings[0])

[0.021342762, -0.00516651, -0.009427254, 0.03008809, 0.020384941, -0.026090225, 0.05367965, 0.02519487, 0.0037558055, -0.065131865, 0.029421778, -0.00849546, -0.030504534, 0.021124128, 0.019656163, 0.027610246, 0.020436997, -0.029484246, -0.029088624, 0.01866711, 0.051680718, -0.008094633, 0.04385157, -0.022779495, 0.022342227, -0.018625464, -0.008714094, -0.032024555, 0.01254538, -0.044184722, 0.0027251064, 0.0013807726, 0.027506135, -0.03500213, 0.02066604, -0.036251463, 0.044851035, -0.022154829, 0.008958755, 0.027526958, -0.026194336, -0.04339348, -0.016824344, 0.085121185, -0.019229308, 0.017147088, 0.006855712, 0.011337692, 0.021301119, 0.029983979, 0.017407365, -0.03377362, 0.01641831, -0.015897755, -0.009193004, -0.023799783, 0.020686863, 0.0031988116, 0.000119239674, -0.02423705, -0.006793245, -0.020811796, 0.006106112, 0.008271622, 0.051097695, -0.030213023, 0.030858511, -0.019333418, -0.0073450333, -0.008589161, 0.021863317, 0.03398184, -0.012982646, 0.02161345, -0.054221027

In [12]:
# Initialize pincone
from pinecone import Pinecone

pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
index = pc.Index("sandbox")
index

<pinecone.db_data.index.Index at 0x2d86a2ad0d0>

In [13]:
# Insert single text
index.upsert(
    vectors=[(str(0), embeddings[0], {"text": texts[0]})]
)

{'upserted_count': 1}

In [17]:
records = []
for i in range(len(texts[1:])):
    records.append((str(i), embeddings[i], {"text": texts[i]}))
    
records

[('0',
  [0.021342762,
   -0.00516651,
   -0.009427254,
   0.03008809,
   0.020384941,
   -0.026090225,
   0.05367965,
   0.02519487,
   0.0037558055,
   -0.065131865,
   0.029421778,
   -0.00849546,
   -0.030504534,
   0.021124128,
   0.019656163,
   0.027610246,
   0.020436997,
   -0.029484246,
   -0.029088624,
   0.01866711,
   0.051680718,
   -0.008094633,
   0.04385157,
   -0.022779495,
   0.022342227,
   -0.018625464,
   -0.008714094,
   -0.032024555,
   0.01254538,
   -0.044184722,
   0.0027251064,
   0.0013807726,
   0.027506135,
   -0.03500213,
   0.02066604,
   -0.036251463,
   0.044851035,
   -0.022154829,
   0.008958755,
   0.027526958,
   -0.026194336,
   -0.04339348,
   -0.016824344,
   0.085121185,
   -0.019229308,
   0.017147088,
   0.006855712,
   0.011337692,
   0.021301119,
   0.029983979,
   0.017407365,
   -0.03377362,
   0.01641831,
   -0.015897755,
   -0.009193004,
   -0.023799783,
   0.020686863,
   0.0031988116,
   0.000119239674,
   -0.02423705,
   -0.00679324

In [18]:
# Insert all texts
index.upsert(records)

{'upserted_count': 9}

In [20]:
# Query results
qeury ="sudhanshu has worked at wipro"
query_embed  = generate_embeddings(qeury).tolist()

result = index.query(vector=query_embed, top_k=2,include_metadata=True)
result

{'matches': [{'id': '4',
              'metadata': {'text': 'He worked at Wipro, Deloitte, Verizon '
                                   'Labs, and Ernst & Young.'},
              'score': 0.579936,
              'values': []},
             {'id': '5',
              'metadata': {'text': 'He gained expertise in SAP WebDynpro, '
                                   'FiorI-UI5 HANA, Java, Big Data, and Data '
                                   'Analytics.'},
              'score': 0.425514251,
              'values': []}],
 'namespace': '',
 'usage': {'read_units': 1}}