# Pinecone Toolkit — Test Notebook

Test the Pinecone vector store toolkit: config, parsing, embeddings, and vector operations.

Sections 1-2 run without API keys. Sections 3+ require Pinecone/OpenAI keys.

In [None]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parent.parent if Path.cwd().name == "pinecone" else Path.cwd()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print(f"Project root: {PROJECT_ROOT}")

## 1. PineconeConfig

Test creating configs from different sources.

In [None]:
from tools.pinecone.config import PineconeConfig

# Direct instantiation
cfg = PineconeConfig(
    api_key="pk-test-key-123",
    index_name="my-chatbot-index",
    namespace="products",
    cloud="aws",
    region="us-east-1",
)

print(f"API key:    {cfg.api_key[:10]}...")
print(f"Index:      {cfg.index_name}")
print(f"Namespace:  {cfg.namespace}")
print(f"Cloud:      {cfg.cloud}")
print(f"Region:     {cfg.region}")

In [None]:
# From config.json (if it exists)
config_path = PROJECT_ROOT / "_config files" / "config.json"
if config_path.exists():
    cfg_from_json = PineconeConfig.from_json(str(config_path))
    print(f"Loaded from config.json: index={cfg_from_json.index_name}, ns={cfg_from_json.namespace}")
else:
    print(f"Config not found at {config_path} — skipping")

## 2. Document Parsing

Test parsing text and CSV into upsert-ready chunks.

In [None]:
from tools.pinecone.parser import parse_kb_text

# Example KB-formatted text (same format used in .docx files)
kb_text = """
KB_ID: faq-001
TYPE: faq
TITLE: Return Policy
TEXT:
You can return any item within 30 days of purchase for a full refund.
Items must be in original condition with tags attached.
--- KB_CHUNK_END ---

KB_ID: faq-002
TYPE: faq
TITLE: Shipping
TEXT:
We offer free shipping on orders over $50. Standard shipping takes 3-5
business days. Express shipping (1-2 days) is available for $9.99.
--- KB_CHUNK_END ---

KB_ID: product-001
TYPE: product
TITLE: Rose Gold Necklace
TEXT:
Beautiful 18k rose gold necklace with a delicate chain. Length: 18 inches.
Price: $149.99. Available in rose gold, yellow gold, and silver.
--- KB_CHUNK_END ---
"""

chunks = parse_kb_text(kb_text)
print(f"Parsed {len(chunks)} chunk(s):\n")
for chunk in chunks:
    print(f"  ID:    {chunk['id']}")
    print(f"  Type:  {chunk['type']}")
    print(f"  Title: {chunk['title']}")
    print(f"  Text:  {chunk['text'][:80]}...")
    print()

In [None]:
import tempfile, os, csv

from tools.pinecone.parser import parse_csv

# Create a temp CSV file to test parsing
csv_path = os.path.join(tempfile.gettempdir(), "test_kb.csv")
with open(csv_path, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["id", "text", "category"])
    writer.writerow(["item-1", "Silk scarf in midnight blue. Hand-rolled edges.", "accessories"])
    writer.writerow(["item-2", "Leather tote bag. Full-grain Italian leather.", "bags"])
    writer.writerow(["item-3", "Sterling silver earrings with moonstone.", "jewelry"])

csv_chunks = parse_csv(csv_path)
print(f"Parsed {len(csv_chunks)} row(s) from CSV:\n")
for chunk in csv_chunks:
    print(f"  {chunk}")

## 3. Embeddings (requires OpenAI API key)

Test creating embedding vectors from text.

In [None]:
from tools.pinecone.embeddings import embed_text, embed_batch, make_embed_fn, get_model_dimensions

# Check model dimensions
print(f"small model dimensions: {get_model_dimensions('small')}")
print(f"large model dimensions: {get_model_dimensions('large')}")
print(f"text-embedding-3-small: {get_model_dimensions('text-embedding-3-small')}")

In [None]:
# Single embedding (requires OPENAI_API_KEY or pass api_key=)
# Uncomment after setting your API key:

# OPENAI_API_KEY = "sk-..."

# vector = embed_text("What is your return policy?", api_key=OPENAI_API_KEY)
# print(f"Vector length: {len(vector)}")
# print(f"First 5 values: {vector[:5]}")

print("Uncomment and set OPENAI_API_KEY to test embeddings.")

In [None]:
# Batch embeddings
# vectors = embed_batch(
#     ["return policy", "shipping cost", "product catalog"],
#     api_key=OPENAI_API_KEY,
# )
# print(f"Batch: {len(vectors)} vectors, each {len(vectors[0])} dims")

print("Uncomment and set OPENAI_API_KEY to test batch embeddings.")

## 4. VectorStore Operations (requires Pinecone + OpenAI keys)

Test upserting and querying vectors.

In [None]:
from tools.pinecone.vector_store import VectorStore

# Uncomment after setting up config.json with valid keys:

# cfg = PineconeConfig.from_json(str(PROJECT_ROOT / "_config files" / "config.json"))
# embed_fn = make_embed_fn(api_key=OPENAI_API_KEY)
# store = VectorStore(cfg, embed_fn=embed_fn)

# # Check index stats
# stats = store.stats()
# print(f"Total vectors: {stats.get('total_vector_count', 0)}")

# # Query for context
# context = store.get_context("What is the return policy?", top_k=3)
# print(f"\nContext:\n{context}")

print("Uncomment after setting up config.json with valid Pinecone and OpenAI keys.")

In [None]:
# Upsert the KB chunks we parsed earlier

# store.upsert_texts(chunks, embed_fn=embed_fn)
# print(f"Upserted {len(chunks)} chunks")

# # Query them back
# results = store.query_text("Do you ship internationally?", top_k=3)
# for r in results:
#     print(f"  Score: {r['score']:.4f}  ID: {r['id']}")
#     print(f"  Text: {r['metadata'].get('text', '')[:80]}")
#     print()

print("Uncomment after setting up VectorStore above.")