In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")


In [2]:
# initializing
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)
print(pc)

<pinecone.pinecone.Pinecone object at 0x000002DE81B901F0>


In [12]:
index_name = "harish"

if not pc.has_index(index_name):
    pc.create_index_for_model(
        name=index_name,
        cloud="aws",
        region="us-east-1",
        embed={
            "model": "llama-text-embed-v2",
            "field_map": {"text": "chunk_text"}
        }
    )


In [13]:
pc.list_indexes()

[
    {
        "name": "harish",
        "metric": "cosine",
        "host": "harish-c2qv2ns.svc.aped-4627-b74a.pinecone.io",
        "spec": {
            "serverless": {
                "cloud": "aws",
                "region": "us-east-1"
            }
        },
        "status": {
            "ready": true,
            "state": "Ready"
        },
        "vector_type": "dense",
        "dimension": 1024,
        "deletion_protection": "disabled",
        "tags": null,
        "embed": {
            "model": "llama-text-embed-v2",
            "field_map": {
                "text": "chunk_text"
            },
            "dimension": 1024,
            "metric": "cosine",
            "write_parameters": {
                "dimension": 1024.0,
                "input_type": "passage",
                "truncate": "END"
            },
            "read_parameters": {
                "dimension": 1024.0,
                "input_type": "query",
                "truncate": "END"
            

In [None]:
# code for deleting the index
pc.delete_index("harish-serverless")

In [4]:
from pinecone import Pinecone, PodSpec  # Use PodSpec for manual dimension-based setup

# Initialize client
pc = Pinecone(api_key=PINECONE_API_KEY)


In [5]:
index_name = "harish-serverless"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

In [6]:
pc.list_indexes()

[
    {
        "name": "harish-serverless",
        "metric": "cosine",
        "host": "harish-serverless-c2qv2ns.svc.aped-4627-b74a.pinecone.io",
        "spec": {
            "serverless": {
                "cloud": "aws",
                "region": "us-east-1"
            }
        },
        "status": {
            "ready": true,
            "state": "Ready"
        },
        "vector_type": "dense",
        "dimension": 1536,
        "deletion_protection": "disabled",
        "tags": null
    }
]

In [8]:
import os
from dotenv import load_dotenv
load_dotenv()
import requests
EURI_API_KEY = os.getenv("EURI_API_KEY")

In [9]:

import numpy as np
def generate_embeddings(data):
    url = "https://api.euron.one/api/v1/euri/alpha/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {EURI_API_KEY}"
    }
    payload = {
        "input": data,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    
    # Convert to numpy array for vector operations
    embedding = np.array(data['data'][0]['embedding'])
    
    print(f"Generated embedding with shape: {embedding.shape}")
    print(f"First 5 values: {embedding[:5]}")
    
    # Example: Calculate vector norm
    norm = np.linalg.norm(embedding)
    print(f"Vector norm: {norm}")
    
    return embedding

In [10]:
embedding_1 = generate_embeddings("my name is harish nandhan shanmugam")

Generated embedding with shape: (1536,)
First 5 values: [-0.01248293 -0.03513887 -0.01417455  0.01893439 -0.02435922]
Vector norm: 1.0000000222359349


In [11]:
embedding_1

array([-0.01248293, -0.03513887, -0.01417455, ..., -0.02846575,
        0.01287959, -0.03140566])

In [14]:
text_2 = "Artificial Intelligence is transforming every industry globally."
text_3 = "Harish is learning how to fine-tune models and store vector embeddings."
embedding_2 = generate_embeddings(text_2)
embedding_3 = generate_embeddings(text_3)


Generated embedding with shape: (1536,)
First 5 values: [-0.03403128 -0.02260049  0.01855942  0.01721996  0.04224964]
Vector norm: 0.9999999905984451
Generated embedding with shape: (1536,)
First 5 values: [-0.01416235  0.01071345  0.0397505   0.00312975  0.007857  ]
Vector norm: 1.0000000286566482


In [13]:
# pinecone upsert 
index = pc.Index('harish-serverless')
index.upsert([
    {
        "id": "item-id-001",
        "values": embedding_1.tolist(),
        "metadata": {
            "name": "harish",
            "info": "personal_info"
        }
    }
])


{'upserted_count': 1}

In [18]:
index.upsert([
    {
        "id": "item-id-002",
        "values": embedding_2.tolist(),
        "metadata": {
            "topic": "AI",
            "info": "global_industry"
        }
    },
    {
        "id": "item-id-003",
        "values": embedding_3.tolist(),
        "metadata": {
            "topic": "learning",
            "info": "model_training"
        }
    }
])


{'upserted_count': 2}

In [21]:
index.fetch(ids=["item-id-003"])

FetchResponse(namespace='', vectors={'item-id-003': Vector(id='item-id-003', values=[-0.0141623523, 0.0107134525, 0.0397505052, 0.00312975282, 0.00785700232, -0.0357162096, 0.0253906716, 0.0651129633, -0.0289453641, 0.0181120113, 0.00881620497, -0.0108615644, -0.0169553254, -0.0483551212, 0.0341081358, -0.0168424789, -0.0154459914, -0.0110661006, -0.00212294213, 0.0249533877, 0.0166449957, -0.0326693282, 0.0717709661, 0.0460135378, 0.0410764627, -0.00418593409, 0.014515, 0.0628560185, 0.0476780385, -0.0170540679, 0.0105794463, -0.0349827036, 0.00716581149, -0.0122651048, -0.0166591015, 0.0208908804, -0.00390734198, 0.0082872333, -0.0258420613, 0.058454968, -0.0177170467, 0.0142963585, -0.0307227112, 0.0360265411, 0.0708681867, 0.0159820169, -0.0480730049, -0.0116303377, -0.0350109152, 0.0202843249, -0.0443772487, 0.0348134302, -0.0431923531, -0.033967074, -0.00891494658, -0.0147406952, -0.00979656726, 0.000425602309, -0.0251367632, 0.00943686627, 0.0167437363, -0.027083382, 0.019169956

In [23]:
# search
text = "my name is harish"
embeddings_to_search = generate_embeddings(text)
result = index.query(vector=embeddings_to_search.tolist(),
            top_k=2,
            include_metadata = True)
print(result)


Generated embedding with shape: (1536,)
First 5 values: [ 0.0137952  -0.03156422 -0.03355113  0.01231759 -0.03324932]
Vector norm: 0.9999999894113698
{'matches': [{'id': 'item-id-001',
              'metadata': {'info': 'personal_info', 'name': 'harish'},
              'score': 0.762226,
              'values': []},
             {'id': 'item-id-003',
              'metadata': {'info': 'model_training', 'topic': 'learning'},
              'score': 0.408192217,
              'values': []}],
 'namespace': '',
 'usage': {'read_units': 6}}


In [24]:
index.upsert([("item-id-001",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-002",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-003",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-004",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-005",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-006",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-007",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-008",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-009",embedding_1.tolist(),{"name":"harish"})],
             
             namespace = 'harsh1')

{'upserted_count': 9}

In [25]:
index.upsert([("item-id-001",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-002",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-003",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-004",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-005",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-006",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-007",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-008",embedding_1.tolist(),{"name":"harish"}),
              ("item-id-009",embedding_1.tolist(),{"name":"harish"})],
             
             namespace = 'harsh2')

{'upserted_count': 9}

In [27]:
!pip install weaviate-client

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting weaviate-client
  Downloading weaviate_client-4.15.2-py3-none-any.whl.metadata (3.7 kB)
Collecting validators==0.34.0 (from weaviate-client)
  Downloading validators-0.34.0-py3-none-any.whl.metadata (3.8 kB)
Collecting authlib<1.3.2,>=1.2.1 (from weaviate-client)
  Downloading Authlib-1.3.1-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting grpcio-tools<2.0.0,>=1.66.2 (from weaviate-client)
  Downloading grpcio_tools-1.73.0-cp310-cp310-win_amd64.whl.metadata (5.5 kB)
Collecting grpcio-health-checking<2.0.0,>=1.66.2 (from weaviate-client)
  Downloading grpcio_health_checking-1.73.0-py3-none-any.whl.metadata (1.0 kB)
Collecting deprecation<3.0.0,>=2.1.0 (from weaviate-client)
  Downloading deprecation-2.1.0-py2.py3-none-any.whl.metadata (4.6 kB)
Collecting cryptography (from authlib<1.3.2,>=1.2.1->weaviate-client)
  Downloading cryptography-45.0.4-cp37-abi3-win_amd64.whl.metadata (5.7 kB)
Collecting protob

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opentelemetry-proto 1.34.1 requires protobuf<6.0,>=5.0, but you have protobuf 6.31.1 which is incompatible.
tensorboard 2.10.1 requires protobuf<3.20,>=3.9.2, but you have protobuf 6.31.1 which is incompatible.
tensorflow 2.10.1 requires protobuf<3.20,>=3.9.2, but you have protobuf 6.31.1 which is incompatible.


In [28]:
import os
import weaviate
from weaviate.classes.init import Auth
import os
from dotenv import load_dotenv
load_dotenv()


# Best practice: store your credentials in environment variables
weaviate_url = os.environ["WEAVIATE_URL"]
weaviate_api_key = os.environ["WEAVIATE_API_KEY"]

# Connect to Weaviate Cloud
client = weaviate.connect_to_weaviate_cloud(
    cluster_url=weaviate_url,
    auth_credentials=Auth.api_key(weaviate_api_key),
)

print(client.is_ready())

True


In [32]:
text_data = "this is to test my weaviate database by harish"
embed_weaviate = generate_embeddings("this is to test my weaviate database by harish")

Generated embedding with shape: (1536,)
First 5 values: [-0.00179439 -0.00260253  0.0619974  -0.0080094  -0.0276714 ]
Vector norm: 0.9999999845013037


In [36]:
collection = client.collections.create("Document1")
collection.data.insert(
    properties = {
        "text" : text_data,
        "author":"harish"
    },
    vector = embed_weaviate.tolist()
)

UUID('53230726-2925-4999-b03e-ee5a06e990f8')

In [40]:
collection = client.collections.get("Document1")
collection.data.insert(
    properties = {
        "text" : text_data,
        "author":"harish"
    },
    vector = embed_weaviate.tolist()
)

UUID('7d1cf872-9323-4981-a776-81bf159d6e96')

In [43]:
test_search = "harish is a student"
embeddings_search_weav = generate_embeddings(test_search)

result = collection.query.near_vector(
    near_vector = embeddings_search_weav.tolist(),
    limit=1
)

Generated embedding with shape: (1536,)
First 5 values: [ 0.00635131 -0.01573069  0.02325958  0.00679886  0.00780186]
Vector norm: 1.0000000190269043


In [47]:
for i in result.objects:
    print(i.properties['text'])

this is to test my weaviate database by harish
