In [10]:
from dotenv import load_dotenv
import os
import json
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI

# Load environment variables from .env file
load_dotenv()

# Fetch the Pinecone API key
api_key = os.getenv("PINECONE_API_KEY")
if not api_key:
    raise ValueError("Pinecone API key not found in environment variables")

# Initialize Pinecone
pc = Pinecone(api_key=api_key)
print(f"Pinecone API Key: {api_key}")

# Delete the existing index if it exists
pc.delete_index(name="jobs")

# Create a Pinecone index
pc.create_index(
    name="jobs",
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

# Load the review data
with open("jobs.json", "r") as f:
    data = json.load(f)

processed_data = []
client = OpenAI()

# Create embeddings for each review
for job in data["jobs"]:
    response = client.embeddings.create(
        input=job['Job Position'], model="text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    processed_data.append(
        {
            "values": embedding,
            "id": job["Company"],
            "metadata": {
                "Job Position": job["Job Position"],
                "Salary": job["Salary"],
                "Location": job["Location"],
            }
        }
    )

# Insert the embeddings into the Pinecone index
index = pc.Index("jobs")
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns2",
)
print(f"Upserted count: {upsert_response['upserted_count']}")

# Print index statistics
print(index.describe_index_stats())


Pinecone API Key: e4ffa4ca-0184-4f76-98a1-0e2ed43c4b84
Upserted count: 158
{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}
