In [309]:
import pandas as pd
import numpy as np
import re
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt


In [310]:

def get_auc(test_question):
    label_encoder = LabelEncoder()
    test_question['label_encoded'] = label_encoder.fit_transform(test_question['label'])
    test_question['extracted_answer_encoded'] = label_encoder.transform(test_question['extracted_answer'])
    auc_score = roc_auc_score(test_question['label_encoded'], test_question['extracted_answer_encoded'])
    return auc_score

def extract_answer(text):
    pattern = r"(True|False|Don't know)"
    matches = re.findall(pattern, text)
    return matches



In [336]:
RESPONSE_PATH = "../../../data/analysis_results/Llama_2_13b_chat_hf_rag_based_response.csv"


In [337]:
response_df = pd.read_csv(RESPONSE_PATH)

response_df.loc[:, 'extracted_answer'] = response_df['llm_answer'].apply(extract_answer)
response_df.loc[:, "answer_count"] = response_df.extracted_answer.apply(lambda x:len(x))

response_df_multiple_answers = response_df[response_df.answer_count > 1]
response_df_single_answer = response_df.drop(response_df_multiple_answers.index)
response_df_single_answer.drop("answer_count", axis=1, inplace=True)


response_df_multiple_answers_ = []
for index, row in response_df_multiple_answers.iterrows():
    if row["extracted_answer"][0] == row["extracted_answer"][1]:
        response_df_multiple_answers_.append((row["question"], row["label"], row["llm_answer"], row["extracted_answer"][0]))
    else:
        response_df_multiple_answers_.append((row["question"], row["label"], row["llm_answer"], "Don't know"))

response_df_multiple_answers_ = pd.DataFrame(response_df_multiple_answers_, columns=["question", "label", "llm_answer", "extracted_answer"])

response_df_final = pd.concat([response_df_single_answer, response_df_multiple_answers_], ignore_index=True)
response_df_final = response_df_final.explode("extracted_answer")
response_df_final['extracted_answer'].fillna("Don't know", inplace=True)
response_df_final.head()


Unnamed: 0,question,label,llm_answer,extracted_answer
0,enhanced S-cone syndrome is not a vitreoretina...,False,"Based on the information provided, the answe...",Don't know
1,metronidazole treats crohn's disease,True,"{\n""answer"": True\n}",True
2,KLEEFSTRA SYNDROME 1 is not associated with Ge...,False,"Based on the information provided, the answe...",Don't know
3,STARGARDT DISEASE 1 (disorder) is not associat...,False,"Based on the information provided, the answe...",False
4,Juvenile polyposis syndrome associates Gene SMAD4,True,"Based on the information provided, the answe...",True


In [338]:
response_df_uncertain_response = response_df_final[response_df_final.extracted_answer == "Don't know"]
response_df_certain_response = response_df_final[response_df_final.extracted_answer != "Don't know"]
# response_df_certain_response.loc[:, "extracted_answer"] = response_df_certain_response.extracted_answer.astype(bool)


response_transform = {
    "True" : True,
    "False" : False
}

response_df_certain_response.extracted_answer = response_df_certain_response.extracted_answer.apply(lambda x:response_transform[x])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  response_df_certain_response.extracted_answer = response_df_certain_response.extracted_answer.apply(lambda x:response_transform[x])


In [339]:
total_certain_response = response_df_certain_response.shape[0]
total_uncertain_response = response_df_uncertain_response.shape[0]
total_response = response_df_final.shape[0]

correct_response = response_df_certain_response[response_df_certain_response.label == response_df_certain_response.extracted_answer].shape[0]
incorrect_response = response_df_certain_response[response_df_certain_response.label != response_df_certain_response.extracted_answer].shape[0]

true_response = correct_response/total_certain_response
false_response = incorrect_response/total_certain_response
uncertainty = total_uncertain_response/total_response


print("True response = ",true_response)
print("False response = ",false_response)
print("Uncertainty = ",uncertainty)


True response =  0.9196940726577438
False response =  0.08030592734225621
Uncertainty =  0.1412151067323481


In [340]:
from scipy.stats import binom_test

N = response_df_certain_response.shape[0]
x = correct_response
p = response_df_certain_response[response_df_certain_response.label==True].shape[0]/N
p_value = binom_test(x, N, p=p, alternative='greater') 
p_value

H = np.divide(false_response, uncertainty)
print(p_value, H)

1.9802876738402566e-42 0.568678020365512


  p_value = binom_test(x, N, p=p, alternative='greater')


In [286]:
total_certain_response = response_df_certain_response.shape[0]
total_uncertain_response = response_df_uncertain_response.shape[0]
total_response = response_df_final.shape[0]

correct_response = response_df_certain_response[response_df_certain_response.label == response_df_certain_response.extracted_answer].shape[0]
incorrect_response = response_df_certain_response[response_df_certain_response.label != response_df_certain_response.extracted_answer].shape[0]

true_response = correct_response/total_certain_response
false_response = incorrect_response/total_certain_response
uncertainty = total_uncertain_response/total_response


print("True response = ",true_response)
print("False response = ",false_response)
print("Uncertainty = ",uncertainty)


True response =  0.8988195615514334
False response =  0.10118043844856661
Uncertainty =  0.026272577996715927


In [291]:
auc_score = get_auc(response_df_certain_response)
print("AUC score = ", auc_score)


AUC score =  0.8829975227085054


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_question['label_encoded'] = label_encoder.fit_transform(test_question['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_question['extracted_answer_encoded'] = label_encoder.transform(test_question['extracted_answer'])
