In [1]:
import json

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from job_search_application.minsearch import Index

In [2]:
from ai21 import AI21Client
from ai21.models.chat import ChatMessage

In [12]:
client = AI21Client()

In [3]:
with open('../data/job_data.json', 'r') as json_file:
    job_data = json.load(json_file)

In [4]:
with open('../data/retrival_evaluation_data.json', 'r') as json_file:
    evaluation_data = json.load(json_file)

In [7]:
index = Index(
    text_fields=[
        "title",
        "company",
        "locations",
        "skills",
        "posted_at",
        "is_remote",
        "snippet_fragments",
        "description"
    ],
    keyword_fields=["id"]
)

index.fit(job_data)

<job_search_application.minsearch.Index at 0x7fa451ff58b0>

In [8]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=3
    )

    return results

In [9]:
prompt_template = """
You're an expert application coach. Answer the QUESTION based on the CONTEXT from the job database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
job_title: {title}
company_name: {company}
work locations: {locations}
highlighted skills: {skills}
date of posting: {posted_at}
short job summary: {snippet_fragments}
detailed job description: {description}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [10]:
def llm(prompt, model='jamba-1.5-mini'):
    response = client.chat.completions.create(
    model=model,  
    messages=[ChatMessage(   
        role="user",
        content=prompt
    )],
        temperature=0.8,
        max_tokens=200
    )    
    return response.choices[0].message.content, response

In [11]:
def rag(query, model='jamba-1.5-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer, resp = llm(prompt, model=model)
    return answer, resp

In [13]:
evaluation_prompt_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated response to the given queries.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Query: {query}
Response: {response}

Please analyze the content and context of the generated response in relation to the query
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a very brief explanation for your evaluation]"
}}
""".strip()

In [22]:
def evaluate_rag(query):
    response_llm = rag(query) 

    evaluation_prompt = evaluation_prompt_template.format(
        query=query,
        response=response_llm
    )

    evaluation_llm, a21_resp = llm(evaluation_prompt)
    evaluation_llm = json.loads(evaluation_llm)

    return query, response_llm, evaluation_llm, a21_resp

In [19]:
for record in eval_data[:1]:
    query = record['question']
    id_ = record['id']
    response_llm = rag(query) 
    
    evaluation_prompt = evaluation_prompt_template.format(
        query=query,
        response=response_llm
    )
    
    evaluation_llm, a21_resp = llm(evaluation_prompt)
    print(evaluation_llm)

('{\n  "Relevance": "RELEVANT",\n  "Explanation": "The response provides detailed information about the qualifications and experience required for the Mechatroniker/Elektroniker position at Novapax Kunststofftechnik, directly addressing the query."\n}', ChatCompletionResponse(id='chat-e04523b5151a4a91b166ce76ab932659', choices=[ChatCompletionResponseChoice(index=0, message=AssistantMessage(role='assistant', content='{\n  "Relevance": "RELEVANT",\n  "Explanation": "The response provides detailed information about the qualifications and experience required for the Mechatroniker/Elektroniker position at Novapax Kunststofftechnik, directly addressing the query."\n}', tool_calls=None), logprobs=None, finish_reason='stop')], usage=UsageInfo(prompt_tokens=593, completion_tokens=63, total_tokens=656)))


In [37]:
processed_queries = set()

In [41]:
# TODO: compare two different approaches
# TODO: only take a sample

a21_resps = []
evaluations = []

for record in eval_data[:1]:
    query = record['question']
    id_ = record['id']
    identifier = f'{id_}_{query}'

    if f'{id_}_{query}' not in processed_queries:
        _, response_llm, evaluation_llm, a21_resp = evaluate_rag(query)
    
        temp_eval_dict = {'id': id_, 'query': query, 'response': response_llm, 'relevance': evaluation_llm["Relevance"], 'explanation': evaluation_llm["Explanation"]}
        evaluations.append(temp_eval_dict)
        a21_resps.append(a21_resp)
        processed_queries.add(identifier)