In [1]:
!pip install pinecone-client sentence-transformers



In [2]:
PINECONE_API_KEY = ""
PINECONE_ENV = "us-east-1"
PINECONE_INDEX_NAME = "testindex5"

# Create Index

In [3]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)


# Create a new Pinecone index that supports 384-dimensional embeddings
pc.create_index(
    name=PINECONE_INDEX_NAME,
    dimension=384,  # Must match `all-MiniLM-L6-v2`
    metric="cosine",  # Use "euclidean" or "dotproduct" if needed
    spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

print(f"Created new index: {PINECONE_INDEX_NAME}")


Created new index: testindex5


# Upsert Data to Pinecone

In [4]:
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer


# Check if the index exists
if PINECONE_INDEX_NAME not in pc.list_indexes().names():
    raise ValueError(f"Index '{PINECONE_INDEX_NAME}' does not exist. Please create it first.")

# Connect to the existing Pinecone index
index = pc.Index(PINECONE_INDEX_NAME)

# List of multiple text records
texts = [
    "Machine learning enables computers to learn from data and improve performance over time.",
    "Deep learning is a subset of machine learning that uses neural networks to model complex patterns.",
    "Natural language processing (NLP) allows computers to understand and generate human language.",
    "Computer vision enables machines to interpret and process visual data from the world.",
    "Reinforcement learning trains agents to make decisions by rewarding desirable actions.",
]

# Load embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Generate embeddings and create records
vectors = []
for i, text in enumerate(texts):
    embedding = model.encode(text).tolist()  # Convert text to embedding
    record_id = f"text_{i+1}"  # Unique ID for each record
    vectors.append((record_id, embedding, {"text": text}))  # Tuple format for upserting

# Upsert multiple records into Pinecone in a single batch
index.upsert(vectors)

print(f"Successfully upserted {len(texts)} records into Pinecone!")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Successfully upserted 5 records into Pinecone!
