In [8]:
import os
import json

from ai21 import AI21Client
from ai21.models.chat import ChatMessage

In [10]:
client = AI21Client()

In [11]:
with open('./../data/job_data.json', 'r') as json_file:
    job_data = json.load(json_file)

In [12]:
job_data[0].keys()

dict_keys(['id', 'title', 'company', 'locations', 'skills', 'posted_at', 'is_remote', 'snippet_fragments', 'description'])

In [46]:
# TODO: does not have to be questions, can be tasks

prompt_template = """
You emulate a user of a job search application which enables the retrival and analysis of relevant job postings.
Formulate 5 questions this user might ask based on the provided job section.
Make the questions specific to the job section so that it is clear from the question that the mentioned job is meant.
The job section should contain the answer to the questions, and the questions should
be complete and not too short. Use as few words as possible from the job section. DO NOT ASK GENERIC QUESTIONS THAT FIT TO EVERY JOB.
Make use of the job description to ask interesting questions concerning what the job, the company, etc, are about.

The job section:

job_title: {title}
company_name: {company}
work locations: {locations}
highlighted skills: {skills}
date of posting: {posted_at}
short job summary: {snippet_fragments}
detailed job description: {description}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

In [30]:
prompt = prompt_template.format(**job_data[0])

In [43]:
def llm(prompt, model='jamba-1.5-mini'):
    response = client.chat.completions.create(
    model=model,  
    messages=[ChatMessage(   
        role="user",
        content=prompt
    )],
        temperature=0.8,
        max_tokens=200
    )    
    return response.choices[0].message.content, response

In [39]:
eval_list = []

In [47]:
for job in job_data[22:]:
    prompt = prompt_template.format(**job)
    llm_resp, resp = llm(prompt, model='jamba-1.5-large')
    try:
        tmp_dict = json.loads(llm_resp)
        for q in tmp_dict['questions']: 
            eval_list.append({"id": job["id"], "question": q})
    except Exception:
        print(job["id"], llm_resp)
        print()

151 {
  "questions": [
    "What are the working locations for the Data Engineer - Business Intelligence / IT Beratung / SAP ERP / ABAP (m/w/d) position at cimt ag?",
    "What skills are highlighted for the Data Engineer - Business Intelligence / IT Beratung / SAP ERP / ABAP (m/w/d) position at cimt ag?",
    "What is the date of posting for the Data Engineer - Business Intelligence / IT Beratung / SAP ERP / ABAP (m/w/d) position at cimt ag?",
    "What does the short job summary mention for the Data Engineer - Business Intelligence / IT Beratung / SAP ERP / ABAP (m/w/d) position at cimt ag?",
    "What are the educational requirements for the Data Engineer - Business Intelligence / IT Beratung / SAP ERP / ABAP (



In [48]:
filename = './../data/retrival_evaluation_2.json'

with open(filename, 'w') as file:
    json.dump(eval_list, file, indent=2)

In [45]:
eval_list

[{'id': 18,
  'question': 'What are the primary responsibilities of the Lead Data Engineer at Almedia?'},
 {'id': 18,
  'question': 'Which technologies and tools does the Lead Data Engineer at Almedia primarily work with?'},
 {'id': 18,
  'question': 'What is the minimum experience required for the Lead Data Engineer position at Almedia?'},
 {'id': 18,
  'question': 'What specific skills are highlighted for the Lead Data Engineer role at Almedia?'},
 {'id': 18,
  'question': 'What benefits does Almedia offer to the Lead Data Engineer?'},
 {'id': 19,
  'question': 'What educational background is required for the (Senior) Fraud Data Scientist / Data Analyst position at Digital Charging Solutions GmbH?'},
 {'id': 19,
  'question': 'What are the highlighted skills for the (Senior) Fraud Data Scientist / Data Analyst position at Digital Charging Solutions GmbH?'},
 {'id': 19,
  'question': 'What programming languages are necessary for the (Senior) Fraud Data Scientist / Data Analyst positio

In [38]:
prompt

'You emulate a user of a job search application which enables the retrival and analysis of relevant job postings.\nFormulate 5 queries this user might have based on the provided job.\nMake the queries very specific to the provided job so that it is clear from the query that the mentioned job is meant.\nThe provided information should contain the response to the queries, and the queries should\nbe complete and not too short. DO NOT USE GENERIC QUESTIONS that can be asked about every job.\n\nExamples: \n- What skill are needed for the data engineering job at the company Zalando?\n- What education is required to become a Lead Data Engineer at the German government?\n\nThe job section:\n\njob_title: (Senior) Fraud Data Scientist / Data Analyst (f/d/m)\ncompany_name: Digital Charging Solutions GmbH\nwork locations: Berlin\nhighlighted skills: Python, SQL, Machine Learning, Data Analysis, Algorithms, Julia, R, Statistik\ndate of posting: 2024-09-26\nshort job summary:     Take care of fraud 

In [21]:
for job in job_data[17:]:
    print(prompt_template.format(**job))
    print()
    print("_______________________________")

You emulate a user of a job search application which enables the retrival and analysis of relevant job postings.
Formulate 5 questions this user might ask based on the provided job section.
Make the questions specific to the job section so that it is clear from the question that the mentioned job is meant.
The job section should contain the answer to the questions, and the questions should
be complete and not too short. Use as few words as possible from the job section.

The job section:

job_title: Mechatroniker / Elektroniker (m/w/d) Automatisierungstechnik für Spritzgussfertigung
company_name: Novapax Kunststofftechnik
work locations: Berlin
highlighted skills: Microsoft Office, Elektronik, Formenbau, Kunststofftechnik, Medizin, Massenspektrometrie
date of posting: 2024-10-02
short job summary: Abgeschlossene Ausbildung als Mechatroniker (m/w/d), Industrieelektriker (m/w/d) oder Elektroniker(m/w/d) für Automatisierungs- und Systemtechnik bzw,         Idealerweise praktische Erfahrun