In [26]:
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd
import time
import pickle
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

load_dotenv()
client = OpenAI()

TEST_QUERIES = [
    "What are the key capabilities of Gemini models?",
    "How does Gemini compare to other multimodal models?", 
    "What are the different versions of Gemini?"
]

MAX_TOKENS_VALUES = [50, 100, 200, 400, 800]

In [27]:
with open(r'C:\Users\Admin\Desktop\para-expe\data\rag_embeddings.pkl', 'rb') as f:
    rag_data = pickle.load(f)

chunks = rag_data['chunks']
embeddings = rag_data['embeddings']

def get_embedding(text):
    return client.embeddings.create(input=[text.replace("\n", " ")], model="text-embedding-3-small").data[0].embedding

def retrieve_chunks(query, k=5):
    query_embedding = get_embedding(query)
    similarities = cosine_similarity([query_embedding], embeddings)[0]
    top_indices = np.argsort(similarities)[::-1][:k]
    return [chunks[idx]['text'] for idx in top_indices]

In [28]:
def run_experiment(query, max_tokens):
    context = "\n\n".join(retrieve_chunks(query))
    
    prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
        max_tokens=max_tokens
    )
    
    answer = response.choices[0].message.content
    
    return {
        "query": query,
        "max_tokens": max_tokens,
        "tokens_used": response.usage.completion_tokens,
        "answer": answer,
        "word_count": len(answer.split())
    }

In [29]:
results = []

for query in TEST_QUERIES:
    for max_tokens in MAX_TOKENS_VALUES:
        print(f"Testing {max_tokens} tokens for: {query[:40]}...")
        result = run_experiment(query, max_tokens)
        results.append(result)
        time.sleep(1)

df = pd.DataFrame(results)
print(f"Completed {len(results)} experiments")

Testing 50 tokens for: What are the key capabilities of Gemini ...
Testing 100 tokens for: What are the key capabilities of Gemini ...
Testing 200 tokens for: What are the key capabilities of Gemini ...
Testing 400 tokens for: What are the key capabilities of Gemini ...
Testing 800 tokens for: What are the key capabilities of Gemini ...
Testing 50 tokens for: How does Gemini compare to other multimo...
Testing 100 tokens for: How does Gemini compare to other multimo...
Testing 200 tokens for: How does Gemini compare to other multimo...
Testing 400 tokens for: How does Gemini compare to other multimo...
Testing 800 tokens for: How does Gemini compare to other multimo...
Testing 50 tokens for: What are the different versions of Gemin...
Testing 100 tokens for: What are the different versions of Gemin...
Testing 200 tokens for: What are the different versions of Gemin...
Testing 400 tokens for: What are the different versions of Gemin...
Testing 800 tokens for: What are the different vers

In [33]:
df[['query', 'max_tokens', 'tokens_used', 'word_count']]

Unnamed: 0,query,max_tokens,tokens_used,word_count
0,What are the key capabilities of Gemini models?,50,50,38
1,What are the key capabilities of Gemini models?,100,100,72
2,What are the key capabilities of Gemini models?,200,200,137
3,What are the key capabilities of Gemini models?,400,378,275
4,What are the key capabilities of Gemini models?,800,419,303
5,How does Gemini compare to other multimodal mo...,50,50,35
6,How does Gemini compare to other multimodal mo...,100,100,69
7,How does Gemini compare to other multimodal mo...,200,200,141
8,How does Gemini compare to other multimodal mo...,400,400,306
9,How does Gemini compare to other multimodal mo...,800,437,340


In [34]:
df['answer'][1]

'The key capabilities of Gemini models include:\n\n1. **Multimodal Processing**: Gemini models can handle and integrate multiple data types, including text, code, images, audio, and video, allowing for versatile applications across various domains.\n\n2. **Advanced Reasoning**: The models exhibit strong reasoning capabilities, enabling them to tackle complex multi-step problems and perform tasks that require deliberate reasoning and understanding of context.\n\n3. **High Performance on Benchmarks**: Gemini Ultra, the most capable model in'