In [1]:
!pip install pinecone-client
!pip install openai

Collecting pinecone-client
  Downloading pinecone_client-3.2.2-py3-none-any.whl (215 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m215.9/215.9 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pinecone-client
Successfully installed pinecone-client-3.2.2
Collecting openai
  Downloading openai-1.23.1-py3-none-any.whl (310 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.0/311.0 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->http

In [4]:
import pandas as pd
import numpy as np
from pinecone import Pinecone
from openai import OpenAI
import openai

# Load the question sets
questions_a = pd.read_csv('dataset/SETA.csv')  # Adjust path as needed
questions_b = pd.read_csv('dataset/SETB.csv')  # Adjust path as needed
client = OpenAI(api_key="OPENAI_API_KEY")
# Initialize Sentence Transformer Model
def get_embeddings(text):
    response = client.embeddings.create(
        input=[text],
        model="text-embedding-ada-002"  # Or any model that suits your needs
    )

    # embedding = response['data'][0]['embedding']
    # embedding = response['data'][0].embedding
    embedding = response.data[0].embedding  # Adjust based on actual structure


    return embedding

# Connect to existing Pinecone Vector Database
pc = Pinecone(api_key="PINECONE_KEY")
index_name = 'technical-notes'  # The name of your existing index
index = pc.Index(index_name)

def gpt_query(question, context):
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Using the following context" + context + "Please give me the correct option. Just the Option number"},
        {"role": "user", "content": question}
    ]
    # Creating the chat completion
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages
    )

    # Returning the content of the response
    return response.choices[0].message.content


# Function to query and generate an answer using existing summaries in Pinecone
def generate_answer(question, index):
    query_vector = get_embeddings(question)
    results = index.query(vector = query_vector, top_k=3, include_metadata=True)
    relevant_texts = [result['metadata']['summary'] for result in results['matches']]
    context = ' '.join(relevant_texts)
    # Generate answer using GPT (pseudo-code, replace with your API call)
    answer = gpt_query(question, context)  # Define gpt_query to make a call to your GPT model
    return answer

# Evaluate answers for Set A and Set B
correct_answers_a = [generate_answer(q, index) for q in questions_a['Questions']]
correct_answers_b = [generate_answer(q, index) for q in questions_b['Questions']]

# Calculate accuracy
answer_option_a = [x[:2] for x in correct_answers_a]
correct_option_a = [x[16:18] for x in questions_a['Answers']]
accuracy_a = np.mean([answer_option_a[i] == correct_option_a[i] for i in range(len(correct_answers_a))])
answer_option_b = [x[:2] for x in correct_answers_b]
correct_option_b = [x[16:18] for x in questions_b['Answers']]
accuracy_b = np.mean([answer_option_b[i] == correct_option_b[i] for i in range(len(correct_answers_b))])

print(f'Accuracy for Set A: {accuracy_a:.2f}')
print(f'Accuracy for Set B: {accuracy_b:.2f}')

Accuracy for Set A: 0.88
Accuracy for Set B: 0.90
