# QA Experiment - 5 levels

- Using 5 level answers with ChatGPT-4o-mini
  
Prompt available at: experiments\templates\

In [1]:
%load_ext autoreload
%autoreload 2

Loading internal package

In [2]:
import clizod_ranker as cr

In [3]:
import os
import re
import shutil
import pandas as pd
import re
import numpy as np
import json
import asyncio
import time
import datetime

from random import randrange
from os.path import join, exists

## Load Data
Loading the data cleaned from classifier-sanitize.ipynb notebook.

In [4]:
input_file_path = '../data/data_01_cleaned.csv'
df_sample = pd.read_csv(input_file_path)

print(f"There are {len(df_sample)} rows in this dataset.")
df_sample.head(3)

There are 2905 rows in this dataset.


Unnamed: 0,tag,disease,variable,target_hash,target,review,reference,id
0,0.0,cchf,rainfall,1266833a0c9bd183b54db85128f28024,Scientific Opinion on the Role of Tick Vectors...,1.0,Journal Article,0
1,0.0,cchf,rainfall,c025b214c7370b24a3776e85f6cd285b,A survey of rift valley fever and associated r...,0.0,Journal Article,1
2,0.0,cchf,rainfall,273c3107270be7343e0e43692cde82c3,Occurrence of Rickettsia spp. and Coxiella bur...,0.0,Journal Article,2


## Set experiment directory

In [5]:
exp_root_dir = "../experiments/"
templates_dir = join(exp_root_dir, "templates")
results_root_dir = join(exp_root_dir, "results")
reports_root_dir = join(exp_root_dir, "reports")
review_root_dir = join(exp_root_dir, "review")

#Regular expression to extract out the different parts of the file name. Using _ as a delimiter
reg_exp_file_template = re.compile("(?P<model>[A-za-z0-9-]+)_(?P<exp>[A-za-z0-9-]+).txt")


## Run the model

In [6]:
template_name = 'gpt-4o-mini_h-05-single-answer-5.txt'

components = reg_exp_file_template.search(template_name).groupdict()
model_alias, exp = components.values()
info = cr.MODEL_INFOS[model_alias]

print(f"Initializing to run '{model_alias}' using '{template_name}' template")

promptGenerator = cr.QAPromptGenerator(join(templates_dir, template_name))

llmClient = cr.AsyncLLMClient(info, cr.SINGLE_LEVEL_QA_RESPONSE)

runner = cr.ExperimentRunner(promptGenerator, llmClient, results_root_dir)
await runner.async_run(df_sample, model_alias, exp)

Initializing to run 'gpt-4o-mini' using 'gpt-4o-mini_h-05-single-answer-5.txt' template
Processing prompts for cchf - rainfall
Attempting to processing 454 records for cchf - rainfall
Batch completed in 0:02:36.144280.
Processing prompts for ebola - rainfall
Attempting to processing 915 records for ebola - rainfall
Batch completed in 0:06:27.428905.
Processing prompts for rvf - rainfall
Attempting to processing 537 records for rvf - rainfall
Batch completed in 0:03:13.451094.
Processing prompts for lepto - rainfall
Attempting to processing 999 records for lepto - rainfall
Batch completed in 0:05:29.906539.
All batches completed in 0:17:47.025548.


## Process results

### Read the results

In [6]:
exp_to_load = ['/h-05-single-answer-5/']

ans_map = {
    'definitely yes': 1.00,
    'probably yes': 0.75,
    'unsure': 0.5,
    'probably no': 0.25,
    'definitely no': 0.0
}

In [7]:
def parse_answers(response):
    # Parse the JSON string
    try:
        response_json = json.loads(response)
        
    except Exception as e:
        print(f"Error: An unexpected error occurred for request: {e}\n{response}")
        response_json = { "results": [] }
    
    
    # Initialize a dictionary to store answers for each question
    answer_dict = {}
    
    # Loop through the 'results' to extract answers
    for result in response_json['results']:
        question_num = result['question_number']     
        reason_text = result['reason'] 
        ans_text = result['answer'].lower()

        answer_dict[f'reason_{question_num}'] = reason_text

        # map the answer - if not 'yes' or 'no' then default to 0.5
        answer_dict[f'answer_{question_num}'] = ans_map.get(ans_text, 0.5)
        
    return pd.Series(answer_dict)

In [8]:
df_comb_results = cr.process_results(df_sample, exp_to_load, results_root_dir, parse_answers)
df_comb_results.head(5)

Reading - ../experiments/results/gpt-4o-mini/h-05-single-answer-5/cchf-rainfall
Reading - ../experiments/results/gpt-4o-mini/h-05-single-answer-5/ebola-rainfall
Reading - ../experiments/results/gpt-4o-mini/h-05-single-answer-5/lepto-rainfall
Reading - ../experiments/results/gpt-4o-mini/h-05-single-answer-5/rvf-rainfall
There are 2905 rows in this dataset.


Unnamed: 0,id,experiment,model,disease,variable,reason_1,answer_1,reason_2,answer_2,reason_3,answer_3,reason_4,answer_4,tag,reference
0,3,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,The abstract indicates that it is a review of ...,0.0,The abstract discusses vector-borne diseases a...,0.0,The abstract mentions climate change and its i...,0.5,The study is a review and does not focus on fi...,0.0,0.0,Journal Article
1,4,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,The abstract describes a study that involves t...,1.0,The study focuses on the Rift Valley fever vir...,0.0,The abstract does not discuss environmental fa...,0.0,The research is centered on immunoinformatics ...,0.0,0.0,Journal Article
2,5,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,The abstract describes original research invol...,1.0,The study focuses on Aedes albopictus and its ...,0.0,The abstract does not discuss environmental fa...,0.0,The research involves field surveys and the co...,1.0,0.0,Journal Article
3,6,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,The study investigates a specific outbreak of ...,1.0,The study focuses on Rift Valley fever virus (...,0.0,The abstract discusses the emergence of RVFV i...,0.75,The research is based on field cases of febril...,1.0,0.0,Journal Article
4,0,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,The abstract describes a report that provides ...,0.0,The study discusses the role of tick vectors a...,0.0,The abstract does not mention environmental fa...,0.0,The study appears to focus on the epidemiology...,0.75,0.0,Journal Article


### Calculate Ranking

In [9]:
selected_columns = [col for col in df_comb_results.columns if col in ['id', 'experiment', 'model', 'disease', 'variable', 'tag'] or col.startswith('answer_')]
df_rerank = df_comb_results[selected_columns].copy()

qscore_columns = [col for col in df_rerank.columns if col.startswith('answer_')]
df_rerank['answer_mean'] = df_rerank[qscore_columns].mean(axis=1)
df_rerank.head(5)

Unnamed: 0,id,experiment,model,disease,variable,answer_1,answer_2,answer_3,answer_4,tag,answer_mean
0,3,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,0.0,0.0,0.5,0.0,0.0,0.125
1,4,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,1.0,0.0,0.0,0.0,0.0,0.25
2,5,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,1.0,0.0,0.0,1.0,0.0,0.5
3,6,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,1.0,0.0,0.75,1.0,0.0,0.6875
4,0,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,0.0,0.0,0.0,0.75,0.0,0.1875


In [10]:
df_comb_results = cr.apply_len_tie_breaker(df_sample, df_rerank, 'answer_mean')
df_comb_results.head(5)

Unnamed: 0,id,experiment,model,disease,variable,answer_1,answer_2,answer_3,answer_4,tag,answer_mean,target,target_len,ranking
0,441,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,1.0,1.0,1.0,1.0,1.0,1.0,"Temporal tendency, seasonality and relationshi...",2385,1
1,19,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,1.0,1.0,1.0,1.0,1.0,1.0,Predicting CCHF incidence and its related fact...,1691,2
2,20,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,1.0,1.0,1.0,1.0,1.0,1.0,Crimean-Congo hemorrhagic fever and its relati...,1657,3
3,285,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,1.0,1.0,1.0,1.0,1.0,1.0,The effect of climate variables on the inciden...,1456,4
4,346,h-05-single-answer-5,gpt-4o-mini,cchf,rainfall,1.0,1.0,1.0,1.0,1.0,1.0,Status of Crimean-Congo haemorrhagic fever vir...,1322,5


## Review Document

Creating a CSV file for reviewing the results.

In [11]:
selected_columns = [col for col in df_comb_results.columns if col in ['id', 'experiment', 'model'] or col.startswith('reason_') ]
df_review_ref = pd.merge(
    df_comb_results[selected_columns], 
    df_sample[['id', 'target']], 
    how='left',
    left_on='id', 
    right_on='id'
)

df_review = pd.merge(
    df_comb_results,
    df_review_ref, 
    how='left',
    left_on=['id', 'experiment', 'model'], 
    right_on=['id', 'experiment', 'model']
)

Creating separate files for each experiment

In [12]:
print(df_review.shape)
groups = df_review.groupby(["model", "experiment"])
for key, data in groups:
    print(f"{key[0]} - {key[1]}")
    df_output = df_review.query(f"model == '{key[0]}' & experiment == '{key[1]}'")
    print(df_output.shape)
    df_output.to_csv(review_root_dir + f'/review_h-01_{key[0]}_{key[1]}.csv', header=True, index=False, encoding='utf-8')

    #reporting
    df_reporting = df_output[['id', 'experiment', 'model', 'disease', 'variable', 'tag', 'answer_mean', 'ranking']].copy()
    df_reporting.rename(columns={"answer_mean": "score"}, inplace=True)
    df_reporting.to_csv(reports_root_dir + f'/report_{key[0]}_{key[1]}.csv', header=True, index=False, encoding='utf-8')

(2905, 15)
gpt-4o-mini - h-05-single-answer-5
(2905, 15)
