In [1]:
import os
from openai import OpenAI
from pinecone import Pinecone
from typing import List
from dotenv import load_dotenv
load_dotenv()

  from tqdm.autonotebook import tqdm


True

In [2]:
pinecone_client = Pinecone(api_key=os.environ['PINECONE_API_KEY'])

openai_client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  
)

In [3]:
def get_embedding(text: str, model: str = "text-embedding-3-small") -> List[float]:
    """
    Get the embedding of a text using OpenAI's API.
    """
    response = openai_client.embeddings.create(
        input=text,
        model=model
    )
    embedding = response.data[0].embedding
    return embedding

In [13]:
query = """
Job role is Energy Compliance Consultant, 
Department is Compliance, 
Industry is Energy and Gas.

Functional/Technical Skills:
▪ Effective communicator in both written and verbal communictions, including
ability to propose practical actions and tailor communications to a variety of
levels of stakeholder (including senior management and the Board).
▪ Confident in absorbing and manipulating information/data, from multiple
sources (and multiple formats, e.g Microsoft Excel, PowerBI, operational
systems), to drive Assurance insight, review conclusions and proportionate
actions.
▪ Able to understand, identify and clearly articulate risks and controls arising in
relation to a wide range of issues, with a particular focus on conduct risk and
customer outcomes, and ability to design testing activities in line with the
department methodology.
▪ Robust and independent mindset with the confidence to raise issues
appropriately.
▪ Advanced level stakeholder skills and comfortable dealing with all grades of
colleague.
▪ Able to work effectively individually and as part of a team to deliver to tight
deadlines.

Education and Certifications:
 ▪ Demonstrable experience of compliance activity within a regulatory sector
and/or educated to degree standard (Law or Economics degree preferable)
with aspiration to work in a high performing compliance function.

Competencies:
▪ Demonstrable experience of working in a regulated environment and
understanding of the energy supply regulatory framework.
▪ Credibility to build relationships with a variety of internal stakeholders and
influence accordingly.
▪ Experience in multi-tasking, being able to navigate complex tasks & prioritise
effectively.
▪ Developing close ‘trusted adviser’ relationships with key business leaders.
▪ Wins hearts and minds across the business and inspires others to act for the
betterment of our business and its customers.
▪ Proven oral and written communication skills with the ability to explain
complex compliance requirements and prove pragmatic advice to stakeholders.
▪ Engage with stakeholders in an open, honest, and productive manner to
promote collaborative working.
▪ Effectively manage stakeholders and senior management during meetings by
adapting the style of communication to suit the situation and the individual."""

In [43]:
query = """Energy Compliance Consultant - Ethics and Compliance, Compliance"""

In [105]:

query_vector = get_embedding(query)  

    # Query the Pinecone index
index = pinecone_client.Index('nos-202501-v2')  
nos_docs = index.query(vector=query_vector,
            top_k=1,
            include_metadata=True,
            # filter={"industry": "Compliance"}
            # filter={"type":"Developed by"}
            # filter={"type":"Performance criteria"}
            # filter={"type":"Knowledge and understanding"}
            filter={"nos_id": 'UNSML009'}
        )

In [106]:
nos_docs

{'matches': [], 'namespace': '', 'usage': {'read_units': 5}}

In [79]:
required_nos_ids = """FSPAML19, FSPAML10, FSPCOMP16, FSPAML3, FSPCOMP12, FSPCOMP15, FSPAML19, FSPAML10, INSML013, FSPAML1, FSPCOMP8, FSPAML5, CFAGOR6,
SFJCCBE4.1, INSML021, PROMPR4, SFJHD2, SFJCCBE2.1, INSML017,
CLDSCCD25, INSML032, LSI CM10, INSML019, INSML019, INSML043, REC1,
INSML052, INSCS030, INSCS034, INSCS043, SKSAS22, INSCS014, INSCS039, INSCS038,
INSCS030, INSCS039, INSCS038, INSML052, INSCS010,
INSCS028, SKAOSC34, INSCS039, INSCS010, INSCS002,
FSPFCC04, FSPCOMP9, CFAGOR6, FSPCOMP2, FSPCOMP10, FSPCOMP16,
FSPCOMP3, CFARMA006, FSPCOMP6, FSPCOMP8, INSML013, CFAGOR3, ASTFM324, LANEM17, CFAGOR5, FSPCOMP4, FSPCOMP16, CFAGOR6, LANEM18,
INSML013, FSPCOMP7, FSPCOMP15, UNSML009, FSPCOMP16, CFAGOR3, ASTFM324, LANEM17"""

required_nos_ids = required_nos_ids.split(',')
required_nos_ids = [id.strip() for id in required_nos_ids]

In [80]:
query_vector = get_embedding("") 

In [119]:
from tqdm import tqdm
import pandas as pd

excel_file_path = r"/Users/mumtaz/Documents/projects/zavmo/zavmo-api/zavmo/assets/static_data/JDs_NOS_OFQUAL.xlsx"

In [120]:
existing_data = pd.read_excel(excel_file_path, sheet_name='NOS model')
existing_data

Unnamed: 0,nos_id,performance_criteria,knowledge_criteria,text,industry
0,FSPCOMP7,Performance criteria You must be able to: 1. c...,"""Knowledge and understanding You need to know ...",**NOS ID:** FSPCOMP7 \n**Title:** Assist in d...,Compliance


In [121]:

missing_nos_ids = []
for nos_id in tqdm(required_nos_ids):
    existing_data = pd.read_excel(excel_file_path, sheet_name='NOS model')
    nos_data = {}
    nos_data['nos_id'] = nos_id

    ## Get the NOS text field made for search
    index   = pinecone_client.Index('nos-202501-v2')  
    nos_doc = index.query(vector=query_vector,
            top_k=1,
            include_metadata=True,
            filter={"nos_id": nos_id}
        )
    if not len(nos_doc['matches']) == 1:
        continue

    nos_data['text'] = nos_doc['matches'][0]['metadata']['text']
    nos_data['industry'] = nos_doc['matches'][0]['metadata']['industry']

    index = pinecone_client.Index('test-nos')  
    nos_docs = index.query(vector=query_vector,
            top_k=2,
            include_metadata=True,
            filter={"nos_id": nos_id,  
                    "$or": [
            {"type": "Performance criteria"},
            {"type": "Knowledge and understanding"}]
            }
        )['matches']
   
    performance_taken = False
    knowledge_taken = False

    if len(nos_docs)>0:    
        for i in nos_docs:
            if i['metadata']['type'] == "Performance criteria":
                nos_data['performance_criteria'] = i['metadata']['text']
                performance_taken = True
            elif i['metadata']['type'] == "Knowledge and understanding":
                nos_data['knowledge_criteria'] = i['metadata']['text']
                knowledge_taken = True
                
    if performance_taken and knowledge_taken:
        existing_data = pd.concat([existing_data, pd.DataFrame([nos_data])], ignore_index=True)
        existing_data = existing_data.drop_duplicates()
        existing_data.to_excel(excel_file_path, sheet_name='NOS model', index=False)
    elif not performance_taken and not knowledge_taken:
        missing_nos_ids.append({'nos_id': nos_id, 'missing': 'Both'})
    elif not performance_taken:
        missing_nos_ids.append({'nos_id': nos_id, 'missing': 'Performance criteria'})
    elif not knowledge_taken:
        missing_nos_ids.append({'nos_id': nos_id, 'missing': 'Knowledge and understanding'})
    

100%|██████████| 71/71 [03:16<00:00,  2.77s/it]


In [122]:
existing_data

Unnamed: 0,nos_id,performance_criteria,knowledge_criteria,text,industry
0,FSPCOMP7,Performance criteria You must be able to: 1. c...,"""Knowledge and understanding You need to know ...",**NOS ID:** FSPCOMP7 \n**Title:** Assist in d...,Compliance
1,FSPAML19,Performance criteria You must be able to: 1. i...,Knowledge and understanding You need to know a...,,
2,FSPAML10,Performance criteria You must be able to: 1. a...,Knowledge and understanding You need to know a...,,
3,FSPCOMP16,Performance criteria You must be able to: 1. i...,Knowledge and understanding You need to know a...,,
4,FSPAML3,Performance criteria You must be able to: 1. i...,Knowledge and understanding You need to know a...,,
5,FSPCOMP12,Performance criteria You must be able to: 1. i...,Knowledge and understanding You need to know a...,,
6,FSPCOMP15,Performance criteria You must be able to: 1. i...,Knowledge and understanding You need to know a...,,
7,INSML013,Performance criteria You must be able to: 1. m...,effective 14. the different ways that colleagu...,,
8,FSPAML1,Performance criteria You must be able to: 1. a...,Knowledge and understanding You need to know a...,,
9,FSPCOMP8,Performance criteria You must be able to: 1. a...,Knowledge and understanding You need to know a...,,


In [53]:
nos_docs['matches'][0]['metadata']['text']

'**NOS ID:** FSPAML10  \n**Title:** Assist Senior Management in Ensuring Compliance with Anti-Money Laundering and Countering Terrorist Financing Measures  \n**Industry:** Anti-money Laundering  \n**Overview:** This NOS focuses on assisting senior management to ensure that staff within the organization understand and comply with anti-money laundering (AML) and countering terrorist financing (CTF) measures. It emphasizes the importance of support, training, and record-keeping for compliance.  \n**Relevant Roles:** Compliance Officer, Anti-Money Laundering Officer, Financial Institution Manager, Finance Manager  \n**Keywords:** Anti-money Laundering, Countering Terrorist Financing, compliance, training, monitoring, record-keeping, competencies, openness, compliance requirements'

In [46]:
nos_docs['matches'][0]['metadata']

{'industry': 'Sales (2013)',
 'nos_id': 'CFASAL021',
 'text': "**NOS ID:** CFASAL021  \n**Title:** Ensure compliance with legal, regulatory and ethical requirements  \n**Industry:** Sales (2013)  \n**Overview:** This standard focuses on managing an organization's operations to adhere to its ethical values and comply with legal and regulatory requirements, ensuring responsible behavior towards staff, customers, stakeholders, and the community. It emphasizes the importance of following laws related to health and safety, employment, and finance while aligning with industry-specific regulations.  \n**Relevant Roles:** Sales professionals, Marketing and sales managers, Business sales executives, Sales accounts and business development managers, Telephone salespersons, Sales related occupations  \n**Keywords:** Legal requirements; Regulatory requirements; Ethical requirements; Sales policies; Organizational policies; Social concerns; Compliance monitoring; Governance; Policy effectiveness; R

'Energy Compliance Consultant'

In [24]:
nos_ids = [nos['metadata']['nos_id'] for nos in nos_docs['matches']]

In [107]:
index = pinecone_client.Index('test-nos')  
nos_docs = index.query(
            vector=query_vector,
            top_k=2,
            include_metadata=True,
            filter={"nos_id": 'UNSML009',  
                    "$or": [
            {"type": "Performance criteria"},
            {"type": "Knowledge and understanding"}]
            }
        )

In [110]:
nos_docs

{'matches': [], 'namespace': '', 'usage': {'read_units': 8}}

In [108]:
nos_docs['matches'][0]['metadata']['text']

IndexError: list index out of range

In [66]:
nos_docs['matches'][1]['metadata']


{'industry': 'money laundering and countering terrorist financing measures  -Anti-money Laundering',
 'nos_id': 'FSPAML10',
 'text': 'Performance criteria You must be able to: 1. assist in developing a climate of openness about meeting or not meeting the requirements for Anti- money Laundering and Countering Terrorist Financing 2. check that relevant staff have a clear understanding of Anti-money Laundering and Countering Terrorist Financing measures, how these apply to them and their work, and the importance of complying with them 3. monitor the way staff comply with measures at regular and appropriate intervals 4. identify appropriate training where staff need support in adhering to measures 5. identify potential barriers to training and determine strategies to deal with these 6. maintain relevant records of training and competence for all employees for the required length of time 7. take prompt action to correct any failures to meet the Anti-money Laundering and Countering Terrorist

In [19]:
[(n+1,nos['metadata']['nos_id']) for n,nos in enumerate(nos_docs) if nos['metadata']['nos_id'].startswith('FSPCOMP')]

[]

In [48]:
[(nos['metadata']['industry'],nos['metadata']['nos_id'],nos['metadata']['text']) for nos in nos_docs['matches']][:11]

[('Sales (2013)',
  'CFASAL021',
  "**NOS ID:** CFASAL021  \n**Title:** Ensure compliance with legal, regulatory and ethical requirements  \n**Industry:** Sales (2013)  \n**Overview:** This standard focuses on managing an organization's operations to adhere to its ethical values and comply with legal and regulatory requirements, ensuring responsible behavior towards staff, customers, stakeholders, and the community. It emphasizes the importance of following laws related to health and safety, employment, and finance while aligning with industry-specific regulations.  \n**Relevant Roles:** Sales professionals, Marketing and sales managers, Business sales executives, Sales accounts and business development managers, Telephone salespersons, Sales related occupations  \n**Keywords:** Legal requirements; Regulatory requirements; Ethical requirements; Sales policies; Organizational policies; Social concerns; Compliance monitoring; Governance; Policy effectiveness; Reporting procedures"),
 ('M

## To Format NOS text

#### > Generate Skills and Competencies
#### > Include Industry in text (to make it more relevant)

In [35]:
import pandas as pd

excel_file = r"/Users/mumtaz/Documents/projects/zavmo/zavmo-api/zavmo/assets/static_data/JDs_NOS_OFQUAL.xlsx"

# Read each sheet into separate dataframes
jds_df = pd.read_excel(excel_file, sheet_name='JD model')
nos_df = pd.read_excel(excel_file, sheet_name='NOS model') 
ofqual_df = pd.read_excel(excel_file, sheet_name='OFQUAL model')


In [36]:
jds_df.head()

Unnamed: 0,job_role,main_purpose,work_experience,responsibilities
0,Energy Compliance Consultant,▪ The Ethics & Compliance function provides as...,▪ Effective communicator in both written and v...,▪ Assisting in the establishment of a complian...


In [39]:
for index,item in jds_df.iterrows():
    print(item)

job_role                                 Energy Compliance Consultant
main_purpose        ▪ The Ethics & Compliance function provides as...
work_experience     ▪ Effective communicator in both written and v...
responsibilities    ▪ Assisting in the establishment of a complian...
Name: 0, dtype: object


In [None]:
FSPAML19, FSPAML10, FSPCOM16