In [1]:
import pandas as pd
import numpy as np
import re
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt


In [2]:

def get_auc(test_question):
    label_encoder = LabelEncoder()
    test_question['label_encoded'] = label_encoder.fit_transform(test_question['label'])
    test_question['extracted_answer_encoded'] = label_encoder.transform(test_question['extracted_answer'])
    auc_score = roc_auc_score(test_question['label_encoded'], test_question['extracted_answer_encoded'])
    return auc_score

def extract_answer(text):
    pattern = r"(True|False|Don't know)"
    matches = re.findall(pattern, text)
    return matches



In [39]:
PROMPT_RESPONSE_PATH = "../../../data/analysis_results/Llama_2_13b_chat_hf_prompt_based_response.csv"
RAG_RESPONSE_PATH = "../../../data/analysis_results/Llama_2_13b_chat_hf_rag_based_response.csv"


In [46]:
response_transform = {
    "True" : True,
    "False" : False,
    "Don't know" : "Don't know"
}


In [48]:
prompt_response_df = pd.read_csv(PROMPT_RESPONSE_PATH)

prompt_response_df.loc[:, 'extracted_answer'] = prompt_response_df['llm_answer'].apply(extract_answer)
prompt_response_df.loc[:, "answer_count"] = prompt_response_df.extracted_answer.apply(lambda x:len(x))

prompt_response_df_multiple_answers = prompt_response_df[prompt_response_df.answer_count > 1]
prompt_response_df_single_answer = prompt_response_df.drop(prompt_response_df_multiple_answers.index)
prompt_response_df_single_answer.drop("answer_count", axis=1, inplace=True)


prompt_response_df_multiple_answers_ = []
for index, row in prompt_response_df_multiple_answers.iterrows():
    if row["extracted_answer"][0] == row["extracted_answer"][1]:
        prompt_response_df_multiple_answers_.append((row["question"], row["label"], row["llm_answer"], row["extracted_answer"][0]))
    else:
        prompt_response_df_multiple_answers_.append((row["question"], row["label"], row["llm_answer"], "Don't know"))

prompt_response_df_multiple_answers_ = pd.DataFrame(prompt_response_df_multiple_answers_, columns=["question", "label", "llm_answer", "extracted_answer"])

prompt_response_df_final = pd.concat([prompt_response_df_single_answer, prompt_response_df_multiple_answers_], ignore_index=True)
prompt_response_df_final = prompt_response_df_final.explode("extracted_answer")
prompt_response_df_final['extracted_answer'].fillna("Don't know", inplace=True)

prompt_response_df_final.extracted_answer = prompt_response_df_final.extracted_answer.apply(lambda x:response_transform[x])
prompt_response_df_certain = prompt_response_df_final[prompt_response_df_final.extracted_answer != "Don't know"]
prompt_response_df_uncertain = prompt_response_df_final[prompt_response_df_final.extracted_answer == "Don't know"]

label_encoder = LabelEncoder()
prompt_response_df_certain['label_encoded'] = label_encoder.fit_transform(prompt_response_df_certain['label'])
prompt_response_df_certain['extracted_answer_encoded'] = label_encoder.transform(prompt_response_df_certain['extracted_answer'])
prompt_response_df_correct_response = prompt_response_df_certain[prompt_response_df_certain.label_encoded == prompt_response_df_certain.extracted_answer_encoded]
prompt_response_df_incorrect_response = prompt_response_df_certain[prompt_response_df_certain.label_encoded != prompt_response_df_certain.extracted_answer_encoded]

prompt_response_df_certain.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prompt_response_df_certain['label_encoded'] = label_encoder.fit_transform(prompt_response_df_certain['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prompt_response_df_certain['extracted_answer_encoded'] = label_encoder.transform(prompt_response_df_certain['extracted_answer'])


Unnamed: 0,question,label,llm_answer,extracted_answer,label_encoded,extracted_answer_encoded
0,enhanced S-cone syndrome is not a vitreoretina...,False,"{\n""answer"": ""False""\n}\n\nEnhanced S-cone s...",False,0,0
1,metronidazole treats crohn's disease,True,"{\n""answer"": ""False""\n}\n\nMetronidazole is ...",False,1,0
2,KLEEFSTRA SYNDROME 1 is not associated with Ge...,False,"{\n ""answer"": ""False""\n}",False,0,0
3,STARGARDT DISEASE 1 (disorder) is not associat...,False,"{\n ""answer"": ""False""\n}",False,0,0
4,Juvenile polyposis syndrome associates Gene SMAD4,True,"{\n""answer"": ""True""\n}",True,1,1


In [49]:
rag_response_df = pd.read_csv(RAG_RESPONSE_PATH)

rag_response_df.loc[:, 'extracted_answer'] = rag_response_df['llm_answer'].apply(extract_answer)
rag_response_df.loc[:, "answer_count"] = rag_response_df.extracted_answer.apply(lambda x:len(x))

rag_response_df_multiple_answers = rag_response_df[rag_response_df.answer_count > 1]
rag_response_df_single_answer = rag_response_df.drop(rag_response_df_multiple_answers.index)
rag_response_df_single_answer.drop("answer_count", axis=1, inplace=True)


rag_response_df_multiple_answers_ = []
for index, row in rag_response_df_multiple_answers.iterrows():
    if row["extracted_answer"][0] == row["extracted_answer"][1]:
        rag_response_df_multiple_answers_.append((row["question"], row["label"], row["llm_answer"], row["extracted_answer"][0]))
    else:
        rag_response_df_multiple_answers_.append((row["question"], row["label"], row["llm_answer"], "Don't know"))

rag_response_df_multiple_answers_ = pd.DataFrame(rag_response_df_multiple_answers_, columns=["question", "label", "llm_answer", "extracted_answer"])

rag_response_df_final = pd.concat([rag_response_df_single_answer, rag_response_df_multiple_answers_], ignore_index=True)
rag_response_df_final = rag_response_df_final.explode("extracted_answer")
rag_response_df_final['extracted_answer'].fillna("Don't know", inplace=True)

rag_response_df_final.extracted_answer = rag_response_df_final.extracted_answer.apply(lambda x:response_transform[x])
rag_response_df_certain = rag_response_df_final[rag_response_df_final.extracted_answer != "Don't know"]
rag_response_df_uncertain = rag_response_df_final[rag_response_df_final.extracted_answer == "Don't know"]

label_encoder = LabelEncoder()
rag_response_df_certain['label_encoded'] = label_encoder.fit_transform(rag_response_df_certain['label'])
rag_response_df_certain['extracted_answer_encoded'] = label_encoder.transform(rag_response_df_certain['extracted_answer'])
rag_response_df_correct_response = rag_response_df_certain[rag_response_df_certain.label_encoded == rag_response_df_certain.extracted_answer_encoded]
rag_response_df_incorrect_response = rag_response_df_certain[rag_response_df_certain.label_encoded != rag_response_df_certain.extracted_answer_encoded]

rag_response_df_certain.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rag_response_df_certain['label_encoded'] = label_encoder.fit_transform(rag_response_df_certain['label'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rag_response_df_certain['extracted_answer_encoded'] = label_encoder.transform(rag_response_df_certain['extracted_answer'])


Unnamed: 0,question,label,llm_answer,extracted_answer,label_encoded,extracted_answer_encoded
1,metronidazole treats crohn's disease,True,"{\n""answer"": True\n}",True,1,1
3,STARGARDT DISEASE 1 (disorder) is not associat...,False,"Based on the information provided, the answe...",False,0,0
4,Juvenile polyposis syndrome associates Gene SMAD4,True,"Based on the information provided, the answe...",True,1,1
5,Glycogen storage disease type II associates Ge...,True,True,True,1,1
6,"USHER SYNDROME, TYPE IIA associates Gene USH2A",True,"Based on the information provided, the answe...",True,1,1


In [55]:
rag_response_df_uncertain[rag_response_df_uncertain.question.isin(prompt_response_df_correct_response.question)]


Unnamed: 0,question,label,llm_answer,extracted_answer
0,enhanced S-cone syndrome is not a vitreoretina...,False,"Based on the information provided, the answe...",Don't know
2,KLEEFSTRA SYNDROME 1 is not associated with Ge...,False,"Based on the information provided, the answe...",Don't know
7,"Muscular Dystrophy, Duchenne associates Gene DMD",True,"Yes. Based on the information provided, Duch...",Don't know
10,Bloom Syndrome associates Gene BLM,True,"Based on the information provided, the answe...",Don't know
13,Meleda Disease is not associated with Gene SLURP1,False,"Based on the information provided, the answe...",Don't know
...,...,...,...,...
582,Argininosuccinic Aciduria associates Gene ASL,True,"Based on the information provided, the answe...",Don't know
583,very long chain acyl-CoA dehydrogenase deficie...,False,"Based on the information provided, the answe...",Don't know
589,Basal Cell Nevus Syndrome associates Gene SUFU,True,"Based on the information provided, the answe...",Don't know
607,Juvenile polyposis syndrome associates Gene BM...,True,"Based on the information provided, the answe...",Don't know


In [53]:
rag_response_df_uncertain.shape

(86, 4)

In [54]:
prompt_response_df_uncertain.shape

(16, 4)

In [56]:
rag_response_df_uncertain[rag_response_df_uncertain.question.isin(prompt_response_df_correct_response.question)].question.values


array(['enhanced S-cone syndrome is not a vitreoretinal degeneration',
       'KLEEFSTRA SYNDROME 1 is not associated with Gene EHMT1',
       'Muscular Dystrophy, Duchenne associates Gene DMD',
       'Bloom Syndrome associates Gene BLM',
       'Meleda Disease is not associated with Gene SLURP1',
       'HMN (Hereditary Motor Neuropathy) Proximal Type I is not associated with Gene SMN1',
       'Juvenile Myoclonic Epilepsy is not associated with Gene EFHC1',
       'Malignant neoplasm of prostate is not associated with Gene PTEN',
       'May-Hegglin anomaly associates Gene MYH9',
       'Pheochromocytoma is not associated with Gene RET',
       'Severe autosomal recessive muscular dystrophy of childhood - North African type (disorder) associates Gene SGCG',
       'BLEPHAROPHIMOSIS, PTOSIS, AND EPICANTHUS INVERSUS (disorder) is not associated with Gene FOXL2',
       'Glycogen Storage Disease Type IIb associates Gene LAMP2',
       'Unverricht-Lundborg syndrome is a movement disorde