In [None]:
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd
import time
import pickle
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

load_dotenv()
client = OpenAI()

TEST_QUERIES = [
    "What are the key capabilities of Gemini models?",
    "How does Gemini compare to other multimodal models?", 
    "What are the different versions of Gemini?"
]

MAX_TOKENS_VALUES = [50, 100, 200, 400, 800]

In [None]:
with open('../data/rag_embeddings.pkl', 'rb') as f:
    rag_data = pickle.load(f)

chunks = rag_data['chunks']
embeddings = rag_data['embeddings']

def get_embedding(text):
    return client.embeddings.create(input=[text.replace("\n", " ")], model="text-embedding-3-small").data[0].embedding

def retrieve_chunks(query, k=5):
    query_embedding = get_embedding(query)
    similarities = cosine_similarity([query_embedding], embeddings)[0]
    top_indices = np.argsort(similarities)[::-1][:k]
    return [chunks[idx]['text'] for idx in top_indices]

In [None]:
def run_experiment(query, max_tokens):
    context = "\n\n".join(retrieve_chunks(query))
    
    prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
        max_tokens=max_tokens
    )
    
    answer = response.choices[0].message.content
    
    return {
        "query": query,
        "max_tokens": max_tokens,
        "tokens_used": response.usage.completion_tokens,
        "answer": answer,
        "word_count": len(answer.split())
    }

In [None]:
results = []

for query in TEST_QUERIES:
    for max_tokens in MAX_TOKENS_VALUES:
        print(f"Testing {max_tokens} tokens for: {query[:40]}...")
        result = run_experiment(query, max_tokens)
        results.append(result)
        time.sleep(1)

df = pd.DataFrame(results)
print(f"Completed {len(results)} experiments")

In [None]:
df[['query', 'max_tokens', 'tokens_used', 'word_count']]

In [None]:
for max_tokens in MAX_TOKENS_VALUES:
    subset = df[df['max_tokens'] == max_tokens]
    avg_used = subset['tokens_used'].mean()
    utilization = (avg_used / max_tokens) * 100
    print(f"Max: {max_tokens} | Used: {avg_used:.1f} | Utilization: {utilization:.1f}%")