In [1]:
pip install pinecone

Collecting pinecone
  Downloading pinecone-8.0.0-py3-none-any.whl.metadata (11 kB)
Collecting orjson>=3.0.0 (from pinecone)
  Downloading orjson-3.11.5-cp311-cp311-macosx_15_0_arm64.whl.metadata (41 kB)
Collecting pinecone-plugin-assistant<4.0.0,>=3.0.1 (from pinecone)
  Downloading pinecone_plugin_assistant-3.0.1-py3-none-any.whl.metadata (30 kB)
Collecting pinecone-plugin-interface<0.1.0,>=0.0.7 (from pinecone)
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Collecting packaging<25.0,>=24.2 (from pinecone-plugin-assistant<4.0.0,>=3.0.1->pinecone)
  Downloading packaging-24.2-py3-none-any.whl.metadata (3.2 kB)
Downloading pinecone-8.0.0-py3-none-any.whl (745 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m745.9/745.9 kB[0m [31m15.4 MB/s[0m  [33m0:00:00[0m
[?25hDownloading pinecone_plugin_assistant-3.0.1-py3-none-any.whl (280 kB)
Downloading packaging-24.2-py3-none-any.whl (65 kB)
Downloading pinecone_plugin_interface-0.0.7-py3-n

In [2]:
from pinecone import Pinecone, ServerlessSpec



In [4]:
documents = [
    {
        "id": "doc-001",
        "text": "Pinecone is a fully managed vector database for search and recommendation.",
        "category": "documentation",
        "tag": "pinecone",
        "difficulty": "beginner",
        "url": "https://example.com/pinecone-intro"
    },
    {
        "id": "doc-002",
        "text": "To use Pinecone with Python, you create an index and upsert vectors with metadata.",
        "category": "documentation",
        "tag": "python",
        "difficulty": "beginner",
        "url": "https://example.com/pinecone-python"
    },
    {
        "id": "doc-003",
        "text": "Vector databases store embeddings that capture semantic meaning for semantic search.",
        "category": "blog",
        "tag": "vector-db",
        "difficulty": "intermediate",
        "url": "https://example.com/vector-db-concepts"
    },
    {
        "id": "doc-004",
        "text": "You can filter Pinecone search results using metadata such as category or difficulty.",
        "category": "faq",
        "tag": "metadata",
        "difficulty": "beginner",
        "url": "https://example.com/pinecone-metadata"
    },
    {
        "id": "doc-005",
        "text": "In Retrieval-Augmented Generation, a vector database like Pinecone stores document chunks.",
        "category": "blog",
        "tag": "rag",
        "difficulty": "intermediate",
        "url": "https://example.com/rag-pinecone"
    }
]


In [5]:
pc = Pinecone(api_key="pcsk_4qh4M1_DuviAuijEKQBKcCZgSQUGBDpCgmnqMwLBT9xpX6VwZj72mXFn94YtucX2mqYuU1")

In [6]:
pc

<pinecone.pinecone.Pinecone at 0x104468290>

In [7]:
import requests
import numpy as np
from typing import List, Union

EURON_API_KEY = "euri-15626beca1faa283e4147b25cfae0696f11e76f12cc85a0aab76aaa9e91b8839"

def generate_embeddings(texts: Union[str, List[str]]):
    # Always make the input a list
    if isinstance(texts, str):
        texts = [texts]

    url = "https://api.euron.one/api/v1/euri/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {EURON_API_KEY}"
    }
    payload = {
        "input": texts,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()

    # Convert each embedding to numpy array
    embeddings = [np.array(item["embedding"], dtype=np.float32) for item in data["data"]]

    # Return single vector OR batch of vectors
    return embeddings[0] if len(embeddings) == 1 else np.stack(embeddings)


In [8]:
INDEX_NAME = "rev-pinecone-euri-demo"

In [9]:
pc.list_indexes()

[]

In [10]:
pc.create_index(
    name = INDEX_NAME,
    dimension = 1536,
    metric = "cosine", # this is the type of the similarity search.
    spec = ServerlessSpec(
        cloud = "aws",
        region = "us-east-1"))

{
    "name": "rev-pinecone-euri-demo",
    "metric": "cosine",
    "host": "rev-pinecone-euri-demo-imdvu8n.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "region": "us-east-1",
            "cloud": "aws",
            "read_capacity": {
                "mode": "OnDemand",
                "status": {
                    "state": "Ready",
                    "current_shards": null,
                    "current_replicas": null
                }
            }
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 1536,
    "deletion_protection": "disabled",
    "tags": null,
    "_response_info": {
        "raw_headers": {
            "content-type": "application/json",
            "access-control-allow-origin": "*",
            "vary": "origin,access-control-request-method,access-control-request-headers",
            "access-control-expose-headers": "*",
            "x-pinecon

In [11]:
documents

[{'id': 'doc-001',
  'text': 'Pinecone is a fully managed vector database for search and recommendation.',
  'category': 'documentation',
  'tag': 'pinecone',
  'difficulty': 'beginner',
  'url': 'https://example.com/pinecone-intro'},
 {'id': 'doc-002',
  'text': 'To use Pinecone with Python, you create an index and upsert vectors with metadata.',
  'category': 'documentation',
  'tag': 'python',
  'difficulty': 'beginner',
  'url': 'https://example.com/pinecone-python'},
 {'id': 'doc-003',
  'text': 'Vector databases store embeddings that capture semantic meaning for semantic search.',
  'category': 'blog',
  'tag': 'vector-db',
  'difficulty': 'intermediate',
  'url': 'https://example.com/vector-db-concepts'},
 {'id': 'doc-004',
  'text': 'You can filter Pinecone search results using metadata such as category or difficulty.',
  'category': 'faq',
  'tag': 'metadata',
  'difficulty': 'beginner',
  'url': 'https://example.com/pinecone-metadata'},
 {'id': 'doc-005',
  'text': 'In Retrie

In [12]:
# out of these documents I need to extract only text and embedd them

texts= [doc['text'] for doc in documents]

In [13]:
texts

['Pinecone is a fully managed vector database for search and recommendation.',
 'To use Pinecone with Python, you create an index and upsert vectors with metadata.',
 'Vector databases store embeddings that capture semantic meaning for semantic search.',
 'You can filter Pinecone search results using metadata such as category or difficulty.',
 'In Retrieval-Augmented Generation, a vector database like Pinecone stores document chunks.']