In [50]:
import pandas as pd
import openai
import os
from openai import OpenAI
from dotenv import load_dotenv
from tqdm import tqdm

In [51]:
load_dotenv()

True

In [52]:
data = pd.read_excel(r"C:\Users\smrit\Work\Kenpath\NOS-QUALS\Sales NOS\parsed_nos_data.xlsx")

In [53]:
data.columns

Index(['uuid', 'nos_id', 'title', 'type', 'text'], dtype='object')

## Create Embeddings

In [5]:
#text-embedding-ada-002
#text-embedding-3-small


In [None]:
client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  
)

def get_embedding(text, model="text-embedding-ada-002"):
    response = client.embeddings.create(
        input=text,
        model=model
    )
    return response.data[0].embedding

data['embeddings'] = data['text'].apply(lambda x: get_embedding(x))

## Create Vectors list

In [55]:
vectors = [
    {
        'id': str(row['uuid']),  
        'values': row['embeddings'], 
        'metadata': {
            'nos_id': row['nos_id'],
            'title': row['title'],
            'type': row['type'],
            'text': row['text']
        }
    }
    for _, row in data.iterrows()
]

In [56]:
len(vectors)

81

In [57]:
os.chdir(r'C:\Users\smrit\Work\Kenpath\zavmo-api')

## Creating index and upsert vectors

In [61]:
from pinecone_index import PineconeIndex

# Initialize PineconeIndex
pinecone_index = PineconeIndex(index_name='test-nos', dimension=1536)

INFO:pinecone_index:Connecting to existing index: test-nos


In [45]:
#pinecone_index.delete_all()

In [62]:
# Upsert vectors into the index
pinecone_index.upsert_vectors(vectors)

INFO:pinecone_index:Upserting 81 vectors into the index


In [63]:
# Get the number of vectors in the index
vector_count = pinecone_index.get_vector_count()
print(f"Number of vectors in the index: {vector_count}")

INFO:pinecone_index:Total vectors in the index: 81


Number of vectors in the index: 81


### Querying 

In [None]:
# Example query
query_vector = data['embeddings'][0]  
search_results = pinecone_index.search_items([query_vector])

# Print search results
for result in search_results:
    print(f"Found match: {result['id']} with score: {result['score']}, metadata: {result['metadata']}")

In [65]:
question = "I am a sales manager, what are the required skills I should know and learn according to NOS?"
query_vector = get_embedding(question)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [66]:
search_results = pinecone_index.search_items([query_vector])

for result in search_results:
    print(f"Found match: {result['id']} with score: {result['score']}, metadata: {result['metadata']}")

INFO:pinecone_index:Searching the index with 1 query vectors


Found match: 63dd7e47-8d36-4c3b-bdae-18b4ed84e992 with score: 0.842815757, metadata: {'nos_id': 'INSSAL008', 'text': "Knowledge and understanding\nYou need to know and understand:\n1.    the current legal, ethical and regulatory sales requirements in relation to selling the \nproducts and services in the industry or sector \n2.    the types of competitor information for key competitor comparisons \n3.    the industry or sector pricing structures and their impact upon sales objections \n4.    the organisational procedures for negotiating and dealing with objections \n5.    how to define objectives for a negotiation based on an assessment of the \ncustomers’ negotiating stance and the organisation’s products’ and services’ strengths \nand weaknesses \n6.    the range of typical and unusual issues that might arise and the difference \nbetween sincere and insincere objections \n7.    the use of listening skills and how to summarise to confirm understanding \n8.    the types of questioning 