# Pinecone RAG Test


# Import the necessary libraries

In [2]:

import agentops
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec
import numpy as np
import os
import time
from openai import OpenAI

load_dotenv()

# Initialize clients
openai_client = OpenAI()
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))


# Sample texts

In [3]:

# Test dataset - State of the Union paragraphs
SAMPLE_TEXTS = [
    "The state of our Union is strong because our people are strong. Over the last year, we've made progress. Created jobs. Reduced deficit. Lowered prescription drug costs.",
    "We are the only country that has emerged from every crisis stronger than when we entered it. That is what we are doing again.",
    "We have more to do, but here is the good news: Our country is stronger today than we were a year ago.",
    "As I stand here tonight, we have created a record 12 million new jobs – more jobs created in two years than any president has ever created in four years.",
    "For decades, the middle class was hollowed out. Too many good-paying manufacturing jobs moved overseas. Factories closed down.",
]


# Define helper functions

In [4]:

def get_embedding(text, model="text-embedding-3-small"):
    """Get OpenAI embedding for text"""
    response = openai_client.embeddings.create(
        model=model,
        input=text,
        encoding_format="float"
    )
    return response.data[0].embedding

def create_index(index_name, dimension):
    """Create Pinecone index"""
    print(f"Creating index {index_name}...")
    try:
        pc.create_index(
            name=index_name,
            dimension=dimension,
            metric="cosine",
            spec=ServerlessSpec(cloud="aws", region="us-east-1")
        )
        while True:
            try:
                description = pc.describe_index(index_name)
                status = description.get("status", {})
                if status.get("ready", False):
                    break
                time.sleep(2)
            except Exception as e:
                print(f"Error checking index status: {e}")
                time.sleep(2)
        return pc.Index(index_name)
    except Exception as e:
        print(f"Error creating index: {e}")
        raise

def index_documents(index, texts):
    """Index documents with their embeddings"""
    print("Indexing documents...")
    vectors = [(f"doc{i}", get_embedding(text), {"text": text}) for i, text in enumerate(texts)]
    index.upsert(vectors=vectors, namespace="test-namespace")
    print(f"Indexed {len(vectors)} documents")

def query_similar(index, query, top_k=2):
    """Query similar documents"""
    print(f"Querying: {query}")
    query_embedding = get_embedding(query)
    results = index.query(vector=query_embedding, top_k=top_k, namespace="test-namespace", include_metadata=True)
    for match in results.matches:
        print(f"Score: {match.score:.4f}, Text: {match.metadata['text'][:100]}...")
    return results

def generate_answer(query, context):
    """Generate answer using OpenAI"""
    prompt = f"Based on the following context, answer the question.\nContext:\n{context}\n\nQuestion: {query}"
    response = openai_client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content


# Test the RAG pipeline

In [5]:

def test_rag_pipeline():
    """Test complete RAG pipeline with additional operations"""
    index_name = "test-index-rag"
    dimension = 1536
    try:
        # Check existing indexes and create a new one if needed
        if index_name not in pc.list_indexes():
            index = create_index(index_name, dimension)
        else:
            pc.delete_index(index_name)
            time.sleep(2)
            index = create_index(index_name, dimension)
        
        # Index sample documents
        index_documents(index, SAMPLE_TEXTS)
        time.sleep(5)
        
        # Example queries and response generation
        test_queries = [
            "How many jobs were created according to the speech?",
            "What happened to manufacturing jobs and the middle class?",
            "What is the current state of the Union?",
            "What about education?"
        ]
        
        for query in test_queries:
            results = query_similar(index, query)
            context = "\n".join([match.metadata["text"] for match in results.matches])
            answer = generate_answer(query, context)
            print(f"Q: {query}\nA: {answer}\n---")
        
        # Clean up by deleting the index
        pc.delete_index(index_name)
        print(f"Index {index_name} deleted")
        
    except Exception as e:
        print(f"Error during testing: {e}")
        agentops.end_session(end_state="Fail")
        return
    
    agentops.end_session(end_state="Success")
    print("RAG pipeline test completed successfully!")


# Execute the test

In [6]:

if __name__ == "__main__":
    agentops.init(default_tags=["pinecone-rag-test"])
    test_rag_pipeline()


🖇 AgentOps: [34m[34mSession Replay: https://app.agentops.ai/drilldown?session_id=21dced24-4179-4784-8a86-1fba5a971f6b[0m[0m


Creating index test-index-rag...
Indexing documents...
Indexed 5 documents
Querying: How many jobs were created according to the speech?
Q: How many jobs were created according to the speech?
A: To provide an accurate answer, I would need the specific details or excerpt from the speech that mentions the number of jobs created. Please provide that information, and I’ll be glad to help!
---
Querying: What happened to manufacturing jobs and the middle class?
Q: What happened to manufacturing jobs and the middle class?
A: Manufacturing jobs have declined over the decades, significantly impacting the middle class. This decline can be attributed to various factors, including globalization, automation, and shifts in the economy towards service-oriented jobs. As manufacturing jobs, which often provided stable incomes and benefits, have decreased, many workers have struggled to find equivalent employment with similar pay and job security. This shift has contributed to the erosion of the middle 

🖇 AgentOps: Session Stats - [1mDuration:[0m 32.8s | [1mCost:[0m $0.000199 | [1mLLMs:[0m 4 | [1mTools:[0m 0 | [1mActions:[0m 18 | [1mErrors:[0m 0 | [1mVectors:[0m 11
🖇 AgentOps: [34m[34mSession Replay: https://app.agentops.ai/drilldown?session_id=21dced24-4179-4784-8a86-1fba5a971f6b[0m[0m


RAG pipeline test completed successfully!
