In [17]:
import json
import os
from dotenv import load_dotenv
from openai import OpenAI  # ✅ Keeping this import as requested

# OPTIONAL: If you're using Pinecone, import and initialize it
# from pinecone import Pinecone

# ✅ Load environment variables from .env file
load_dotenv()

# ✅ Access API keys securely
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

# ✅ Check if API keys are loaded correctly
if not OPENAI_API_KEY:
    raise ValueError("❌ OPENAI_API_KEY not found! Check .env file.")
if not PINECONE_API_KEY:
    raise ValueError("❌ PINECONE_API_KEY not found! Check .env file.")

# ✅ Initialize OpenAI client
aclient = OpenAI(api_key=OPENAI_API_KEY)

# OPTIONAL: Initialize Pinecone only if you need to upsert vectors here
# pc = Pinecone(api_key=PINECONE_API_KEY)
# print("✅ Pinecone initialized.")

print("✅ API keys loaded successfully!")

# ------------------------------------------------------------------------
# Minimal helper to handle 'answer' which may be a dict or a string
# ------------------------------------------------------------------------
def convert_answer_to_string(ans):
    """Convert the 'answer' field to a string if it's a dict."""
    if isinstance(ans, dict):
        # Minimal approach: just turn the dict into a string
        # (If you need a more advanced approach, build a textual summary.)
        return str(ans)
    else:
        return ans if ans else ""

# ------------------------------------------------------------------------
# Load knowledge base
# ------------------------------------------------------------------------
knowledgebase_file = "knowledge_base.json"
output_embedding_file = "knowledgebase_embeddings.json"

try:
    with open(knowledgebase_file, "r", encoding="utf-8") as f:
        knowledgebase = json.load(f)
    print(f"📄 Successfully loaded {knowledgebase_file}.")
except Exception as e:
    print(f"❌ Error loading JSON file: {e}")
    exit()

# Ensure correct structure
if "qa_pairs" not in knowledgebase:
    print("❌ Error: 'qa_pairs' key is missing from JSON. Check file structure.")
    exit()

qa_pairs = knowledgebase["qa_pairs"]
embeddings_data = []

# ------------------------------------------------------------------------
# Generate embeddings
# ------------------------------------------------------------------------
for idx, entry in enumerate(qa_pairs):
    try:
        question = entry.get("question", "").strip()
        # answer might be a dict or string; convert to string consistently
        answer_raw = entry.get("answer", "")
        answer = convert_answer_to_string(answer_raw).strip()

        if not question or not answer:
            print(f"⚠️ Skipping entry {idx} due to missing text.")
            continue

        combined_text = f"Q: {question}\nA: {answer}"

        # Call embeddings endpoint via our OpenAI client
        response = aclient.embeddings.create(
            input=combined_text,
            model="text-embedding-ada-002"
        )
        # NOTE: Depending on your library version, data access may vary
        embedding = response.data[0].embedding

        # Store embedding
        embeddings_data.append({
            "id": entry.get("id", idx),
            "question": question,
            "answer": answer,
            "embedding": embedding
        })

        print(f"✅ Embedded entry {idx + 1}/{len(qa_pairs)}")

    except Exception as e:
        print(f"❌ Error embedding entry {idx}: {e}")

# ------------------------------------------------------------------------
# Save embeddings
# ------------------------------------------------------------------------
try:
    with open(output_embedding_file, "w", encoding="utf-8") as f:
        json.dump(embeddings_data, f, indent=4)
    print(f"✅ Embeddings successfully saved to {output_embedding_file}")
except Exception as e:
    print(f"❌ Error saving embeddings: {e}")


✅ API keys loaded successfully!
📄 Successfully loaded knowledge_base.json.


  response = aclient.embeddings.create(


✅ Embedded entry 1/135
✅ Embedded entry 2/135
✅ Embedded entry 3/135
✅ Embedded entry 4/135
✅ Embedded entry 5/135
✅ Embedded entry 6/135
✅ Embedded entry 7/135
✅ Embedded entry 8/135
✅ Embedded entry 9/135
✅ Embedded entry 10/135
✅ Embedded entry 11/135
✅ Embedded entry 12/135
✅ Embedded entry 13/135
✅ Embedded entry 14/135
✅ Embedded entry 15/135
✅ Embedded entry 16/135
✅ Embedded entry 17/135
✅ Embedded entry 18/135
✅ Embedded entry 19/135
✅ Embedded entry 20/135
✅ Embedded entry 21/135
✅ Embedded entry 22/135
✅ Embedded entry 23/135
✅ Embedded entry 24/135
✅ Embedded entry 25/135
✅ Embedded entry 26/135
✅ Embedded entry 27/135
✅ Embedded entry 28/135
✅ Embedded entry 29/135
✅ Embedded entry 30/135
✅ Embedded entry 31/135
✅ Embedded entry 32/135
✅ Embedded entry 33/135
✅ Embedded entry 34/135
✅ Embedded entry 35/135
✅ Embedded entry 36/135
✅ Embedded entry 37/135
✅ Embedded entry 38/135
✅ Embedded entry 39/135
✅ Embedded entry 40/135
✅ Embedded entry 41/135
✅ Embedded entry 42/135
✅

In [None]:
!pip install tiktoken

In [None]:
pip show openai
