In [1]:
import pandas as pd
import os
from openai import OpenAI
from dotenv import load_dotenv
from tqdm import tqdm

In [2]:
load_dotenv()

True

In [10]:
data = pd.read_excel(r"C:\Users\smrit\Work\Kenpath\NOS-QUALS\ofqal_data.xlsx")

In [12]:
data.columns

Index(['uuid', 'page_no', 'level', 'industry', 'title', 'text'], dtype='object')

## Create Embeddings

In [5]:
#text-embedding-ada-002
#text-embedding-3-small


In [3]:
client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  
)

def get_embedding(text, model="text-embedding-3-small"):
    response = client.embeddings.create(
        input=text,
        model=model
    )
    return response.data[0].embedding

# data['embeddings'] = data['text'].apply(lambda x: get_embedding(x))

## Create Vectors list

In [7]:
#nos
vectors = [
    {
        'id': str(row['uuid']),  
        'values': row['embeddings'], 
        'metadata': {
            'nos_id': row['nos_id'],
            'industry':row['industry'],
            'title': row['title'],
            'type': row['type'],
            'text': row['text']
        }
    }
    for _, row in data.iterrows()
]

In [14]:
#ofqal
vectors = [
    {
        'id': str(row['uuid']),  
        'values': row['embeddings'], 
        'metadata': {
            'level': row['level'],
            'industry':row['industry'],
            'title': row['title'],
            'text': row['text']
        }
    }
    for _, row in data.iterrows()
]

In [15]:
len(vectors)

273

In [16]:
os.chdir(r'C:\Users\smrit\Work\Kenpath\zavmo-api')

## Creating index and upsert vectors

In [19]:
#Index
#NOS: test-nos
#OFQAL: test-ofqal

In [36]:
os.getcwd()

'C:\\Users\\Mumtaz Rahmani\\OneDrive\\Documents\\projects\\zavmo-api'

In [4]:
import os
os.chdir(os.path.dirname(os.getcwd()))

In [5]:
from pinecone_index import PineconeIndex

# Initialize PineconeIndex
pinecone_index = PineconeIndex(index_name='test-ofqual', dimension=1536)

  from tqdm.autonotebook import tqdm
INFO:pinecone_plugin_interface.logging:Discovering subpackages in _NamespacePath(['/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pinecone_plugins'])
INFO:pinecone_plugin_interface.logging:Looking for plugins in pinecone_plugins.inference
INFO:pinecone_plugin_interface.logging:Installing plugin inference into Pinecone
INFO:pinecone_index:Connecting to existing index: test-ofqual
INFO:pinecone_plugin_interface.logging:Discovering subpackages in _NamespacePath(['/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pinecone_plugins'])
INFO:pinecone_plugin_interface.logging:Looking for plugins in pinecone_plugins.inference


In [12]:
#pinecone_index.delete_all()

In [21]:
#delete index
#pinecone_index.delete_index()

In [22]:
# Upsert vectors into the index
pinecone_index.upsert_vectors(vectors)

INFO:pinecone_index:Upserting 273 vectors into the index


In [24]:
# Get the number of vectors in the index
vector_count = pinecone_index.get_vector_count()
print(f"Number of vectors in the index: {vector_count}")

INFO:pinecone_index:Total vectors in the index: 273


Number of vectors in the index: 273


### Querying 

In [None]:
# Example query
query_vector = data['embeddings'][0]  
search_results = pinecone_index.search_items([query_vector])

# Print search results
for result in search_results:
    print(f"Found match: {result['id']} with score: {result['score']}, metadata: {result['metadata']}")

In [6]:
#question = "I am a sales manager, what are the required skills I should know and learn according to NOS?"
assessment_area = "Identify target markets for sales and prepare for sales activities"
query_vector = get_embedding(assessment_area)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [48]:
from pinecone import Pinecone
pinecone_client = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
index = pinecone_client.Index('test-ofqual')  
response = index.query(
    vector=query_vector,
    top_k=100,
    include_metadata=True
)

INFO:pinecone_plugin_interface.logging:Discovering subpackages in _NamespacePath(['/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pinecone_plugins'])
INFO:pinecone_plugin_interface.logging:Looking for plugins in pinecone_plugins.inference
INFO:pinecone_plugin_interface.logging:Installing plugin inference into Pinecone
INFO:pinecone_plugin_interface.logging:Discovering subpackages in _NamespacePath(['/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pinecone_plugins'])
INFO:pinecone_plugin_interface.logging:Looking for plugins in pinecone_plugins.inference


In [52]:
for i in response['matches']:
    print(i['metadata']['level'])
    print(i['metadata']['text'])

3.0
Delegate, share, trade off tasks with other team members or negotiate tasks with line manager. Consolidate, postpone, mechanise, simplify tasks. Importance of saying No. 2. Be able to develop a sales call plan 2.1 Prioritise customer and prospects list based on sales revenue and sales potential Personal sales targets appropriate to customer base. List customers and prospects in terms of potential value and probability of a sale. 2.2 Calculate the time to spend on high, medium and low priority customers and prospects Determine the time available to spend on each customer and prospect. 2.3 Select the most appropriate method of contacting each customer or prospect Contact methods: phone, in person, email. Awareness of when a call will suffice and when a visit is required. Making an appointment ahead of a sales call. 2.4 Describe the purpose of each call, ensuring the call gives value to the customer or prospect and to the salesperson Know what you want to achieve from each contact. Kn

In [49]:
matching_ofqal_docs_levelwise = {}
for i in response['matches']:
    level = i['metadata']['level']
    if level in matching_ofqal_docs_levelwise.keys():
        continue
    else:
        matching_ofqal_docs_levelwise[level] = i['metadata']['text']


In [50]:
matching_ofqal_docs_levelwise

{3.0: 'Delegate, share, trade off tasks with other team members or negotiate tasks with line manager. Consolidate, postpone, mechanise, simplify tasks. Importance of saying No. 2. Be able to develop a sales call plan 2.1 Prioritise customer and prospects list based on sales revenue and sales potential Personal sales targets appropriate to customer base. List customers and prospects in terms of potential value and probability of a sale. 2.2 Calculate the time to spend on high, medium and low priority customers and prospects Determine the time available to spend on each customer and prospect. 2.3 Select the most appropriate method of contacting each customer or prospect Contact methods: phone, in person, email. Awareness of when a call will suffice and when a visit is required. Making an appointment ahead of a sales call. 2.4 Describe the purpose of each call, ensuring the call gives value to the customer or prospect and to the salesperson Know what you want to achieve from each contact.

In [51]:
len(response['matches'])


100

In [26]:
from IPython.display import Markdown
Markdown("".join(list(response['matches'][0]['metadata']['text'])))


Unit aim The aim of this unit is to develop an understanding of sales targets and their use. This unit is about being responsible for meeting a sales target. It involves agreeing annual targets. It also involves monitoring actual performance against the agreed target and taking necessary action in response to identified variances and unforeseen developments. Level: 2 ISM Unit no.: U204 RQF unit reference no.: K/502/7485 Credit Value: 2 TQT: 20 Guided Learning Hours: 16 Mapping to National Occupational Standards: Sales NOS 3 Learning outcomes The learner will: 1. Understand sales targets Assessment criteria The learner can: 1.1 Explain the purpose of sales targets in an organisation 1.2 Explain how sales targets are set Indicative content 1.1 To ensure sufficient revenue is generated for the organisation to cover costs and generate sufficient profit to cover the needs of shareholders, investment and growth. Budget is set and sales targets agreed to ensure desired sales are achieved. 1.2 Distribution of the organization revenue budget requires knowledge of the business use of historical sales figures, expected growth, sales force distribution and size, territory size, product range, economic climate, seasonal

In [45]:
query_vector = get_embedding("Occupation relavant to Sales Executive")
search_results = pinecone_index.search_items([query_vector], top_k=100)

for result in search_results:
    print(f"Found match: {result['id']} with score: {result['score']}, metadata: {result['metadata']}")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:pinecone_index:Searching the index with 1 query vectors


Found match: 30517f01-4cc5-4271-ad68-edc25c56bc04 with score: 0.496667236, metadata: {'industry': 'Sales', 'nos_id': 'INSSAL022', 'text': 'Performance criteria\nYou must be able to:\n1.    identify the target markets for sales and prepare for sales activities \n2.    identify customers to contact and the range of products and services \n3.    prepare and follow call plans, email campaigns and other means of communication \nwith customers \n4.    help the customer become comfortable with making an investment into your \nproducts of services, understanding the value of doing so \n5.    prepare sales materials and accompanying messages during contacts with \ncustomers \n6.    adhere to your organisation’s dress code while selling to customers \n7.    agree procedures for collecting contact details of potential customers \n8.    adhere to health, safety and security requirements appropriate to the face-to-face \nsales environment \n9.    contact customers through relevant means of communic

In [3]:
os.chdir(r"C:\Users\Mumtaz Rahmani\OneDrive\Documents\projects\zavmo-api\zavmo\helpers")

### Retrieving nos documents passing currentrole and filter=Sales

In [7]:
import search
results = search.fetch_nos_text(industry="Sales", current_role="Sales Manager", top_k=100)

TypeError: fetch_nos_text() got an unexpected keyword argument 'top_k'

In [46]:
query_vector = get_embedding("Occupation relevant to Sales Executive")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [69]:
## Get NOS ID
index = pinecone_client.Index('test-nos')  
nos_searched_from_relavant_occupations = index.query(
        vector=query_vector,
        top_k=1,
        include_metadata=True,
        filter={"industry": "Sales", "type":"Developed by"},

    )
nos_id = nos_searched_from_relavant_occupations['matches'][0]['metadata']['nos_id']


## Get NOS sections
nos_sections_from_nos_id = index.query(
        vector=query_vector,
        top_k=2,
        include_metadata=True,
        filter={"nos_id": nos_id,  
                "$or": [
        {"type": "Performance criteria"},
        {"type": "Knowledge and understanding"}]
        })

matching_nos_doc = "\n".join([match['metadata']['text'] for match in nos_sections_from_nos_id['matches']])
matching_nos_doc



'Performance criteria\nYou must be able to:\n1.    identify the target markets for sales and prepare for sales activities \n2.    identify customers to contact and the range of products and services \n3.    prepare and follow call plans, email campaigns and other means of communication \nwith customers \n4.    help the customer become comfortable with making an investment into your \nproducts of services, understanding the value of doing so \n5.    prepare sales materials and accompanying messages during contacts with \ncustomers \n6.    adhere to your organisation’s dress code while selling to customers \n7.    agree procedures for collecting contact details of potential customers \n8.    adhere to health, safety and security requirements appropriate to the face-to-face \nsales environment \n9.    contact customers through relevant means of communication \n10.  identify customers’ requirements through questioning and confirm these by \nsummarising their needs and interests \n11.  emph

In [53]:
response['matches'][0]['metadata']

{'industry': 'Sales',
 'nos_id': 'INSSAL022',
 'text': 'Developed by\nInstructus\nVersion Number\n1\nDate Approved\n13 Feb 2024\nIndicative Review Date\n28 Feb 2029\nValidity\nCurrent\nStatus\nOriginal\nOriginating Organisation\nInstructus \nOriginal URN\nCFASAL014\nRelevant Occupations\nBusiness Sales Executives, Marketing and Sales Managers, \nSales Accounts and Business Development Managers, Sales \nRelated Occupations, Telephone Salespersons\nSuite\nSales\nKeywords\nSelling; sales opportunities; identify targets for sales; create \nprospects; sales procedures; assess delegate lists; verbal and \nnon-verbal communication; unique selling points; points of \ndifferentiation; collect customer testimonials; gain commitment \nfor sales; evaluate sales approach; time management; \npromotional materials; cross-selling and up-selling\nINSSAL022 \nSell products and services face-to-face\nINSSAL022 \nSell products and services face-to-face\n4\n',
 'title': 'Sell products and services face-to-

In [28]:
response['matches'][0]['metadata']


{'industry': 'Sales',
 'nos_id': 'INSSAL022',
 'text': 'Performance criteria\nYou must be able to:\n1.    identify the target markets for sales and prepare for sales activities \n2.    identify customers to contact and the range of products and services \n3.    prepare and follow call plans, email campaigns and other means of communication \nwith customers \n4.    help the customer become comfortable with making an investment into your \nproducts of services, understanding the value of doing so \n5.    prepare sales materials and accompanying messages during contacts with \ncustomers \n6.    adhere to your organisation’s dress code while selling to customers \n7.    agree procedures for collecting contact details of potential customers \n8.    adhere to health, safety and security requirements appropriate to the face-to-face \nsales environment \n9.    contact customers through relevant means of communication \n10.  identify customers’ requirements through questioning and confirm these