In [61]:
import json

import os
import sys
import pandas as pd
from tqdm import tqdm
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from job_search_application.minsearch import Index

In [33]:
from ai21 import AI21Client
from ai21.models.chat import ChatMessage

In [35]:
client = AI21Client()

In [36]:
with open('../data/job_data.json', 'r') as json_file:
    job_data = json.load(json_file)

In [37]:
with open('../data/retrival_evaluation_data.json', 'r') as json_file:
    eval_data = json.load(json_file)

In [38]:
index = Index(
    text_fields=[
        "title",
        "company",
        "locations",
        "skills",
        "posted_at",
        "is_remote",
        "snippet_fragments",
        "description"
    ],
    keyword_fields=["id"]
)

index.fit(job_data)

<job_search_application.minsearch.Index at 0x7fe8fc742e10>

In [39]:
def search(query):
    boost = {
      'title': 2.37,
      'company': 2.19,
      'locations': 0.22,
      'skills': 1.71,
      'posted_at': 1.77,
      'is_remote': 0.29,
      'snippet_fragments': 1.96,
      'description': 0.08
    }

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=5
    )

    return results

In [40]:
prompt_template = """
You're an expert application coach. Answer the QUESTION based on the CONTEXT from the job database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
job_title: {title}
company_name: {company}
work locations: {locations}
highlighted skills: {skills}
date of posting: {posted_at}
short job summary: {snippet_fragments}
detailed job description: {description}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [41]:
def llm(prompt, model='jamba-1.5-mini'):
    response = client.chat.completions.create(
    model=model,  
    messages=[ChatMessage(   
        role="user",
        content=prompt
    )],
        temperature=0.8,
        max_tokens=200
    )    
    return response.choices[0].message.content, response

In [42]:
def rag(query, prompt_creator=build_prompt, model='jamba-1.5-mini'):
    search_results = search(query)
    prompt = prompt_creator(query, search_results)
    answer, resp = llm(prompt, model=model)
    return answer, resp

In [43]:
evaluation_prompt_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated response to the given queries.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Query: {query}
Response: {response}

Please analyze the content and context of the generated response in relation to the query
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a very brief explanation for your evaluation]"
}}
""".strip()

In [44]:
def evaluate_rag(query, prompt_creator=build_prompt):
    response_llm = rag(query, prompt_creator) 

    evaluation_prompt = evaluation_prompt_template.format(
        query=query,
        response=response_llm
    )

    evaluation_llm, a21_resp = llm(evaluation_prompt)
    try:
        evaluation_llm = json.loads(evaluation_llm)
    except Exception:
        evaluation_llm = "UNKOWN"

    return query, response_llm, evaluation_llm, a21_resp

In [45]:
# testing with one prompt
for record in eval_data[:1]:
    query = record['question']
    id_ = record['id']
    response_llm = rag(query) 
    
    evaluation_prompt = evaluation_prompt_template.format(
        query=query,
        response=response_llm
    )
    
    evaluation_llm, a21_resp = llm(evaluation_prompt)
    print(evaluation_llm)

{
  "Relevance": "RELEVANT",
  "Explanation": "The response provides a clear and detailed list of the qualifications and experience required for the Mechatroniker/Elektroniker position at Novapax Kunststofftechnik, directly addressing the query."
}


In [28]:
processed_queries = set()

In [29]:
a21_resps = []
evaluations = []

for record in eval_data:
    query = record['question']
    id_ = record['id']
    identifier = f'{id_}_{query}'

    if f'{id_}_{query}' not in processed_queries:
        _, response_llm, evaluation_llm, a21_resp = evaluate_rag(query)
    
        temp_eval_dict = {'id': id_, 'query': query, 'response': response_llm, 'relevance': evaluation_llm["Relevance"], 'explanation': evaluation_llm["Explanation"]}
        evaluations.append(temp_eval_dict)
        a21_resps.append(a21_resp)
        processed_queries.add(identifier)

KeyboardInterrupt: 

In [31]:
len(list(processed_queries))

552

In [67]:
def show_stats(evaluations):
    df = pd.DataFrame(evaluations)
    count_series = df['relevance'].value_counts()
    
    count_df = count_series.reset_index()
    count_df.columns = ['relevance', 'Count']
    count_df['Percentage'] = (count_df['Count'] / len(df)) * 100
    print(count_series)
    print(count_df)

In [69]:
# took too long!!! only use first 50
show_stats(evaluations[:50])

relevance
RELEVANT           32
PARTLY_RELEVANT    16
NON_RELEVANT        2
Name: count, dtype: int64
         relevance  Count  Percentage
0         RELEVANT     32        64.0
1  PARTLY_RELEVANT     16        32.0
2     NON_RELEVANT      2         4.0


In [70]:
show_stats(evaluations)

relevance
RELEVANT           391
PARTLY_RELEVANT    151
NON_RELEVANT        10
Name: count, dtype: int64
         relevance  Count  Percentage
0         RELEVANT    391   70.833333
1  PARTLY_RELEVANT    151   27.355072
2     NON_RELEVANT     10    1.811594


In [46]:
chatgpt_prompt_template = """
You're an expert job application coach. Answer the QUESTION based on the CONTEXT provided from the job database, using only the information available. 
Avoid making assumptions, and if necessary information is missing, acknowledge it.

Please respond concisely and focus on key details relevant to the QUESTION. Structure your response to be user-friendly and actionable.

If the CONTEXT lacks enough information, respond with: "The job posting does not provide enough information to answer your question directly."

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
job_title: {title}
company_name: {company}
work_locations: {locations}  # List all locations where the position is based
highlighted_skills: {skills}  # Key skills mentioned in the job posting
posting_date: {posted_at}  # When was this job posted
short_summary: {snippet_fragments}  # Brief summary or main highlights of the job
full_description: {description}  # Complete details and requirements for the position
""".strip()

def build_chatgpt_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [47]:
processed_queries_chatgpt_prompt = set()

In [72]:
a21_resps_chatgpt_prompt = []
evaluations_chatgpt_prompt = []

for record in tqdm(eval_data[:50]):
    query = record['question']
    id_ = record['id']
    identifier = f'{id_}_{query}'

    if f'{id_}_{query}' not in processed_queries_chatgpt_prompt:
        _, response_llm, evaluation_llm, a21_resp = evaluate_rag(query, build_chatgpt_prompt)
    
        temp_eval_dict = {'id': id_, 'query': query, 'response': response_llm, 'relevance': evaluation_llm["Relevance"], 'explanation': evaluation_llm["Explanation"]}
        evaluations_chatgpt_prompt.append(temp_eval_dict)
        a21_resps_chatgpt_prompt.append(a21_resp)
        processed_queries_chatgpt_prompt.add(identifier)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [02:03<00:00,  2.48s/it]


In [73]:
show_stats(evaluations_chatgpt_prompt)

relevance
RELEVANT           33
PARTLY_RELEVANT     9
NON_RELEVANT        5
Name: count, dtype: int64
         relevance  Count  Percentage
0         RELEVANT     33   70.212766
1  PARTLY_RELEVANT      9   19.148936
2     NON_RELEVANT      5   10.638298


In [74]:
claude_prompt_template = """
You are an expert application coach and career advisor with deep knowledge of various industries, job markets, 
and application processes. Your goal is to provide accurate, helpful, and actionable advice to job seekers based 
on the information provided in job postings. Always maintain a professional and supportive tone.

Key responsibilities:
1. Analyze job postings thoroughly.
2. Provide tailored advice for applicants.
3. Highlight important skills and qualifications.
4. Offer insights on company culture and work environment.
5. Suggest ways to align the applicant's experience with job requirements.
6. Provide guidance on application strategies and interview preparation.

Constraints:
- Base your responses solely on the information provided in the CONTEXT.
- If the CONTEXT doesn't contain relevant information to answer the QUESTION, state that you don't have enough information to provide a complete answer.
- Avoid making assumptions or providing information not present in the CONTEXT.
- Do not mention the CONTEXT or that you're using it in your responses.

CONTEXT:
{context}
""".strip()

entry_template = """
Job Title: {title}
Company: {company}
Work Locations: {locations}
Key Skills: {skills}
Posted On: {posted_at}
Job Summary: {snippet_fragments}
Detailed Job Description: {description}
""".strip()

def build_claude_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [75]:
processed_queries_claude_prompt = set()

In [76]:
a21_resps_claude_prompt = []
evaluations_claude_prompt = []

for record in tqdm(eval_data[:50]):
    query = record['question']
    id_ = record['id']
    identifier = f'{id_}_{query}'

    if f'{id_}_{query}' not in processed_queries_claude_prompt:
        _, response_llm, evaluation_llm, a21_resp = evaluate_rag(query, build_claude_prompt)
    
        temp_eval_dict = {'id': id_, 'query': query, 'response': response_llm, 'relevance': evaluation_llm["Relevance"], 'explanation': evaluation_llm["Explanation"]}
        evaluations_claude_prompt.append(temp_eval_dict)
        a21_resps_claude_prompt.append(a21_resp)
        processed_queries_chatgpt_prompt.add(identifier)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [02:10<00:00,  2.60s/it]


In [77]:
show_stats(evaluations_claude_prompt)

relevance
RELEVANT           30
PARTLY_RELEVANT    16
NON_RELEVANT        4
Name: count, dtype: int64
         relevance  Count  Percentage
0         RELEVANT     30        60.0
1  PARTLY_RELEVANT     16        32.0
2     NON_RELEVANT      4         8.0
