In [1]:
pip install pinecone-client neo4j openai

Note: you may need to restart the kernel to use updated packages.


In [11]:
!pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.8.0-cp312-cp312-win_amd64.whl.metadata (6.8 kB)
Downloading tiktoken-0.8.0-cp312-cp312-win_amd64.whl (883 kB)
   ---------------------------------------- 0.0/883.8 kB ? eta -:--:--
   ----------- ---------------------------- 262.1/883.8 kB ? eta -:--:--
   ---------------------------------------- 883.8/883.8 kB 2.7 MB/s eta 0:00:00
Installing collected packages: tiktoken
Successfully installed tiktoken-0.8.0


In [19]:
from openai import OpenAI
import os
import tiktoken

# Initialize the OpenAI client using the environment variable
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

# Function to count tokens using tiktoken
def count_tokens(text, model="text-embedding-ada-002"):
    encoding = tiktoken.encoding_for_model(model)
    tokens = encoding.encode(text)
    print(f"Token count: {len(tokens)}")
    return len(tokens)

# Function to embed a document using the new OpenAI client API
def embed_document(text):
    token_count = count_tokens(text)
    if token_count < 1000:  # assuming you want to avoid high usage
        try:
            response = client.embeddings.create(
                model="text-embedding-ada-002",
                input=[text]  # Note: input must be a list of strings
            )
            embeddings = response.data[0].embedding
            print("Embedding successful:", embeddings[:5])  # Show a sample of the embedding
        except Exception as e:
            print("OpenAI request failed:", e)
    else:
        print("Input too lengthy, please shorten it.")

# Test the embedding function
text_input = "New developments in AI enhance satellite technology."
embed_document(text_input)

Token count: 8
Embedding successful: [-0.002322586951777339, 0.007827353663742542, 0.00771274184808135, -0.008097030222415924, 0.0038226612377911806]


In [27]:
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone instance using the updated method
try:
    pc = Pinecone(
        api_key=os.environ.get("PINECONE_API_KEY")  # Now using the explicitly set environment variable
    )

    # Check if the index exists, if not, create it
    index_name = 'satellite-search'
    if index_name not in pc.list_indexes().names():
        pc.create_index(
            name=index_name,
            dimension=1536,  # Adjust dimension based on your embedding model
            metric='cosine',
            spec=ServerlessSpec(
                cloud='gcp',
                region='us-west1'  # Adjust region as needed
            )
        )

    # Connect to the index
    index = pc.Index(index_name)

    print("Pinecone Index connected successfully!")
except Exception as e:
    print("Pinecone Initialization Failed:", e)

Pinecone Initialization Failed: You haven't specified an Api-Key.


In [21]:
# Function to store embedding in Pinecone
def store_embedding_in_pinecone(text_id, embedding):
    try:
        index.upsert([(text_id, embedding)])
        print(f"Embedding for '{text_id}' stored successfully!")
    except Exception as e:
        print("Failed to store embedding:", e)

# Example usage - Store the embedding we obtained
text_input = "New developments in AI enhance satellite technology."
embedding = embed_document(text_input)
store_embedding_in_pinecone("doc1", embedding)

Token count: 8
Embedding successful: [-0.002322586951777339, 0.007827353663742542, 0.00771274184808135, -0.008097030222415924, 0.0038226612377911806]
Failed to store embedding: name 'index' is not defined
