In [13]:
import pandas as pd
import os
from openai import OpenAI
from dotenv import load_dotenv
from tqdm import tqdm

In [14]:
load_dotenv()

True

In [10]:
data = pd.read_excel(r"C:\Users\smrit\Work\Kenpath\NOS-QUALS\ofqal_data.xlsx")

In [12]:
data.columns

Index(['uuid', 'page_no', 'level', 'industry', 'title', 'text'], dtype='object')

## Create Embeddings

In [5]:
#text-embedding-ada-002
#text-embedding-3-small


In [15]:
client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  
)

def get_embedding(text, model="text-embedding-3-small"):
    response = client.embeddings.create(
        input=text,
        model=model
    )
    return response.data[0].embedding

# data['embeddings'] = data['text'].apply(lambda x: get_embedding(x))

## Create Vectors list

In [7]:
#nos
vectors = [
    {
        'id': str(row['uuid']),  
        'values': row['embeddings'], 
        'metadata': {
            'nos_id': row['nos_id'],
            'industry':row['industry'],
            'title': row['title'],
            'type': row['type'],
            'text': row['text']
        }
    }
    for _, row in data.iterrows()
]

In [14]:
#ofqal
vectors = [
    {
        'id': str(row['uuid']),  
        'values': row['embeddings'], 
        'metadata': {
            'level': row['level'],
            'industry':row['industry'],
            'title': row['title'],
            'text': row['text']
        }
    }
    for _, row in data.iterrows()
]

In [15]:
len(vectors)

273

In [16]:
os.chdir(r'C:\Users\smrit\Work\Kenpath\zavmo-api')

## Creating index and upsert vectors

In [19]:
#Index
#NOS: test-nos
#OFQAL: test-ofqal

In [36]:
os.getcwd()

'C:\\Users\\Mumtaz Rahmani\\OneDrive\\Documents\\projects\\zavmo-api'

In [35]:
import os
os.chdir(os.path.dirname(os.getcwd()))

In [37]:
from pinecone_index import PineconeIndex

# Initialize PineconeIndex
pinecone_index = PineconeIndex(index_name='test-nos', dimension=1536)

INFO:pinecone_index:Connecting to existing index: test-nos


In [12]:
#pinecone_index.delete_all()

In [21]:
#delete index
#pinecone_index.delete_index()

In [22]:
# Upsert vectors into the index
pinecone_index.upsert_vectors(vectors)

INFO:pinecone_index:Upserting 273 vectors into the index


In [24]:
# Get the number of vectors in the index
vector_count = pinecone_index.get_vector_count()
print(f"Number of vectors in the index: {vector_count}")

INFO:pinecone_index:Total vectors in the index: 273


Number of vectors in the index: 273


### Querying 

In [None]:
# Example query
query_vector = data['embeddings'][0]  
search_results = pinecone_index.search_items([query_vector])

# Print search results
for result in search_results:
    print(f"Found match: {result['id']} with score: {result['score']}, metadata: {result['metadata']}")

In [25]:
#question = "I am a sales manager, what are the required skills I should know and learn according to NOS?"
competency = "Use sales-related information for planning and implementing sales activities"
query_vector = get_embedding(competency)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [10]:
from pinecone import Pinecone
pinecone_client = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
index = pinecone_client.Index('test-ofqal')  
response = index.query(
    vector=query_vector,
    top_k=10,
    include_metadata=True,
    filter={"level":"level 3"}
)

In [47]:
from IPython.display import Markdown
Markdown("".join(list(response['matches'][0]['metadata']['text'])))


Organisational policy on data storage. Data protection. Legal and ethical issues of use of salesrelated information. GDPR compliant 2. Be able to obtain salesrelated information about customers, markets and competitors 2.1 Identify the information needed to develop knowledge about the organisation’s markets, customers and competitors Identify relevant information, on sector, competitors, own organisation policies, customers and product knowledge. 2.2 Identify sources of information about the organisation’s markets, customers and competitors Sector bodies, Industry bodies, Sector magazines, internet, Company’s house, networking events, colleagues, Competitor websites and sales brochures 2.3 Gather information about the organisation’s markets, customers and competitors Extract relevant information, list and update as required. Add to company database/CRM 3. Be able to use analytical tools and methods to provide salesrelated information 3.1 Select and use a variety of analytical tools and methods to analyse salesrelated information Know how to use appropriate software packages for analysing and presenting salesrelated information. CRM systems and reporting 3.2 Present your analysis of salesrelated information Identify trends in salesrelated data. Identify the target audience for

In [39]:
query_vector = get_embedding("Occupation relavant to Sales Executive")
search_results = pinecone_index.search_items([query_vector], top_k=100)

for result in search_results:
    print(f"Found match: {result['id']} with score: {result['score']}, metadata: {result['metadata']}")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:pinecone_index:Searching the index with 1 query vectors


Found match: 30517f01-4cc5-4271-ad68-edc25c56bc04 with score: 0.496667236, metadata: {'industry': 'Sales', 'nos_id': 'INSSAL022', 'text': 'Performance criteria\nYou must be able to:\n1.    identify the target markets for sales and prepare for sales activities \n2.    identify customers to contact and the range of products and services \n3.    prepare and follow call plans, email campaigns and other means of communication \nwith customers \n4.    help the customer become comfortable with making an investment into your \nproducts of services, understanding the value of doing so \n5.    prepare sales materials and accompanying messages during contacts with \ncustomers \n6.    adhere to your organisation’s dress code while selling to customers \n7.    agree procedures for collecting contact details of potential customers \n8.    adhere to health, safety and security requirements appropriate to the face-to-face \nsales environment \n9.    contact customers through relevant means of communic

In [3]:
os.chdir(r"C:\Users\Mumtaz Rahmani\OneDrive\Documents\projects\zavmo-api\zavmo\helpers")

### Retrieving nos documents passing currentrole and filter=Sales

In [7]:
import search
results = search.fetch_nos_text(industry="Sales", current_role="Sales Manager", top_k=100)

TypeError: fetch_nos_text() got an unexpected keyword argument 'top_k'

In [17]:
query_vector = get_embedding("Sales Executive")

In [23]:
index = pinecone_client.Index('test-nos')  
response = index.query(
        vector=query_vector,
        top_k=10,
        include_metadata=True,
        filter={"industry": "Sales", "type": "Performance criteria"}
    )

In [29]:
response['matches']

{'industry': 'Sales',
 'nos_id': 'INSSAL022',
 'text': 'Performance criteria\nYou must be able to:\n1.    identify the target markets for sales and prepare for sales activities \n2.    identify customers to contact and the range of products and services \n3.    prepare and follow call plans, email campaigns and other means of communication \nwith customers \n4.    help the customer become comfortable with making an investment into your \nproducts of services, understanding the value of doing so \n5.    prepare sales materials and accompanying messages during contacts with \ncustomers \n6.    adhere to your organisation’s dress code while selling to customers \n7.    agree procedures for collecting contact details of potential customers \n8.    adhere to health, safety and security requirements appropriate to the face-to-face \nsales environment \n9.    contact customers through relevant means of communication \n10.  identify customers’ requirements through questioning and confirm these

In [28]:
response['matches'][0]['metadata']


{'industry': 'Sales',
 'nos_id': 'INSSAL022',
 'text': 'Performance criteria\nYou must be able to:\n1.    identify the target markets for sales and prepare for sales activities \n2.    identify customers to contact and the range of products and services \n3.    prepare and follow call plans, email campaigns and other means of communication \nwith customers \n4.    help the customer become comfortable with making an investment into your \nproducts of services, understanding the value of doing so \n5.    prepare sales materials and accompanying messages during contacts with \ncustomers \n6.    adhere to your organisation’s dress code while selling to customers \n7.    agree procedures for collecting contact details of potential customers \n8.    adhere to health, safety and security requirements appropriate to the face-to-face \nsales environment \n9.    contact customers through relevant means of communication \n10.  identify customers’ requirements through questioning and confirm these