In [3]:
import pandas as pd
import os
from openai import OpenAI
from dotenv import load_dotenv
from tqdm import tqdm

In [4]:
load_dotenv()

True

In [5]:
data = pd.read_excel(r"C:\Users\smrit\Work\Kenpath\NOS-QUALS\ofqal_data.xlsx")

In [9]:
data['level'][0]

2

## Create Embeddings

In [5]:
#text-embedding-ada-002
#text-embedding-3-small


In [7]:
client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  
)

def get_embedding(text, model="text-embedding-3-small"):
    response = client.embeddings.create(
        input=text,
        model=model
    )
    return response.data[0].embedding

data['embeddings'] = data['text'].apply(lambda x: get_embedding(x))

## Create Vectors list

In [7]:
#nos
vectors = [
    {
        'id': str(row['uuid']),  
        'values': row['embeddings'], 
        'metadata': {
            'nos_id': row['nos_id'],
            'industry':row['industry'],
            'title': row['title'],
            'type': row['type'],
            'text': row['text']
        }
    }
    for _, row in data.iterrows()
]

In [10]:
#ofqal
vectors = [
    {
        'id': str(row['uuid']),  
        'values': row['embeddings'], 
        'metadata': {
            'level': row['level'],
            'industry':row['industry'],
            'title': row['title'],
            'text': row['text']
        }
    }
    for _, row in data.iterrows()
]

In [11]:
len(vectors)

273

In [12]:
os.chdir(r'C:\Users\smrit\Work\Kenpath\zavmo-api')

## Creating index and upsert vectors

In [1]:
#Index
#NOS: test-nos
#OFQAL: test-ofqual

In [15]:
from pinecone_index import PineconeIndex

# Initialize PineconeIndex
pinecone_index = PineconeIndex(index_name='test-ofqual', dimension=1536)

INFO:pinecone_index:Creating index: test-ofqual


In [12]:
#pinecone_index.delete_all()

In [16]:
#delete index
#pinecone_index.delete_index()

In [17]:
# Upsert vectors into the index
pinecone_index.upsert_vectors(vectors)

INFO:pinecone_index:Upserting 273 vectors into the index


In [19]:
# Get the number of vectors in the index
vector_count = pinecone_index.get_vector_count()
print(f"Number of vectors in the index: {vector_count}")

INFO:pinecone_index:Total vectors in the index: 273


Number of vectors in the index: 273


### Querying 

In [None]:
# Example query
query_vector = data['embeddings'][0]  
search_results = pinecone_index.search_items([query_vector])

# Print search results
for result in search_results:
    print(f"Found match: {result['id']} with score: {result['score']}, metadata: {result['metadata']}")

In [25]:
#question = "I am a sales manager, what are the required skills I should know and learn according to NOS?"
competency = "Use sales-related information for planning and implementing sales activities"
query_vector = get_embedding(competency)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [26]:
search_results = pinecone_index.search_items([query_vector])

for result in search_results:
    print(f"Found match: {result['id']} with score: {result['score']}, metadata: {result['metadata']}")

INFO:pinecone_index:Searching the index with 1 query vectors


Found match: 418eaa69-485e-44a7-9d6b-f62f81df6a00 with score: 0.646544218, metadata: {'industry': 'Sales', 'level': 'level 5', 'text': '2.1 Evaluate own organisation’s available sales information Scope and accuracy of sales information. Variables measured. 2.2 Evaluate other information available in own organisation that could contribute to sales forecasting activities Assess data such as customer records. Market research, marketing environment predictions, product/service developments and pricing changes. Finance records. 2.3 Evaluate how plans for product and service development may impact on sales forecasts Assess the likely contribution of any new product/service based on anticipated customer demand, timing of launch and expected competitor reaction. 2.4 Compare past variances between forecast sales and sales achieved and reasons for those variances Using historical data and feedback to match performance against forecast in previous sales periods. 3. Be able to forecast sales 3.1 U

In [33]:
#os.chdir(r"C:\Users\smrit\Work\Kenpath\zavmo-api\zavmo\helpers")

### Retrieving nos documents passing currentrole and filter=Sales

In [31]:
import search
results = search.fetch_nos_text(industry="Sales", current_role="Sales Manager")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [32]:
results

['Performance criteria\nYou must be able to:\n1.    identify the target markets for sales and prepare for sales activities \n2.    identify customers to contact and the range of products and services \n3.    prepare and follow call plans, email campaigns and other means of communication \nwith customers \n4.    help the customer become comfortable with making an investment into your \nproducts of services, understanding the value of doing so \n5.    prepare sales materials and accompanying messages during contacts with \ncustomers \n6.    adhere to your organisation’s dress code while selling to customers \n7.    agree procedures for collecting contact details of potential customers \n8.    adhere to health, safety and security requirements appropriate to the face-to-face \nsales environment \n9.    contact customers through relevant means of communication \n10.  identify customers’ requirements through questioning and confirm these by \nsummarising their needs and interests \n11.  emp