In [1]:
!pip install pinecone-client




[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
import getpass
import time
import numpy as np
import os
from sentence_transformers import SentenceTransformer

from pinecone import Pinecone, ServerlessSpec

# Inicializar Pinecone
if not os.getenv("PINECONE_API_KEY"):
    os.environ["PINECONE_API_KEY"] = getpass.getpass("Enter your Pinecone API key: ")

pinecone_api_key = os.environ.get("PINECONE_API_KEY")

In [7]:
pc = Pinecone(api_key=pinecone_api_key)

index_name = "index-with-metadata"  # change if desired

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [8]:
# ===== 3. Insert Vectors with Metadata =====
books = [
    {
        "id": "book1",
        "vector": np.random.rand(384).tolist(),  # Replace with real embeddings
        "metadata": {
            "title": "One Hundred Years of Solitude",
            "author": "Gabriel García Márquez",
            "genre": "magical realism",
            "year": 1967
        }
    },
    {
        "id": "book2",
        "vector": np.random.rand(384).tolist(),
        "metadata": {
            "title": "Ficciones",
            "author": "Jorge Luis Borges",
            "genre": "philosophical short stories",
            "year": 1944
        }
    }
]

# Upsert vectors into the index
index.upsert(
    vectors=[
        (book["id"], book["vector"], book["metadata"]) 
        for book in books
    ]
)
print("Vectors inserted successfully.")

Vectors inserted successfully.


In [9]:
# ===== 4. Query Examples =====
# Generate a random query vector (replace with real embeddings)
query_vector = np.random.rand(384).tolist()

# ---- 4.1 Basic Semantic Search ----
basic_results = index.query(
    vector=query_vector,
    top_k=2,
    include_metadata=True
)

print("\n=== Top 2 Most Similar Books ===")
for match in basic_results["matches"]:
    print(f"• {match['metadata']['title']} by {match['metadata']['author']}")
    print(f"  Genre: {match['metadata']['genre']}")
    print(f"  Similarity Score: {match['score']:.3f}\n")


=== Top 2 Most Similar Books ===
• One Hundred Years of Solitude by Gabriel García Márquez
  Genre: magical realism
  Similarity Score: 0.710

• Ficciones by Jorge Luis Borges
  Genre: philosophical short stories
  Similarity Score: 0.699



In [10]:
# ---- 4.2 Filtered Search ----
filtered_results = index.query(
    vector=query_vector,
    top_k=2,
    filter={
        "genre": {"$eq": "magical realism"},
        "year": {"$gte": 1900}
    },
    include_metadata=True
)

print("\n=== Filtered Results (Magical Realism, 1900+) ===")
for match in filtered_results["matches"]:
    print(f"• {match['metadata']['title']} ({match['metadata']['year']})")


=== Filtered Results (Magical Realism, 1900+) ===
• One Hundred Years of Solitude (1967.0)
