# 1. ADE Classifier

## 1.1 Load Datasets

In [1]:
import os
import pandas as pd

In [2]:
# ADE Positive Dataset

file_name = 'DRUG-AE.rel'
data_path = os.path.join('..', 'data', file_name)
print("Raw data path:", data_path)

df = pd.read_csv(data_path, header=None, delimiter="|")
df = df.sample(frac=1).reset_index(drop=True)
# Drop duplicates, if any
df = df.drop_duplicates()
df[["category"]]= "POSITIVE"
df.rename(columns={1: "description"}, inplace=True)
df = df[["description", "category"]]
df_pos = df.copy()
df_pos.head(5)

Raw data path: ../data/DRUG-AE.rel


Unnamed: 0,description,category
0,"He was started on digoxin, 0.25 mg daily, beca...",POSITIVE
1,The latter form (Macrodantin) is reported to e...,POSITIVE
2,A patient with rheumatoid arthritis developed ...,POSITIVE
3,A wide variety of adverse central nervous syst...,POSITIVE
4,CONCLUSIONS: We report this case of the concom...,POSITIVE


In [3]:
# ADE Negative Dataset

file_name = 'ADE-NEG.txt'
data_path = os.path.join('..', 'data', file_name)
print("Raw data path:", data_path)

df = pd.read_csv(data_path, header=None, delimiter="\t", names=["col1"])
df = df.sample(frac=1).reset_index(drop=True)
# Drop duplicates, if any
df = df.drop_duplicates()
df['description'] =  df.col1.str.split('NEG').str[1]
df["category"] = "NEGATIVE"
df= df[["description", "category"]]
df_neg = df.copy()
print(df_neg.shape)
df_neg.head()



Raw data path: ../data/ADE-NEG.txt
(16688, 2)


Unnamed: 0,description,category
0,"After clonidine was initiated, the patient's ...",NEGATIVE
1,"Despite maximal anti-glaucomatous medication,...",NEGATIVE
2,"METHODS: We present the clinical findings, pe...",NEGATIVE
3,Plasma drug concentrations were largely kept ...,NEGATIVE
4,A 51-year-old male with a huge chondrosarcoma...,NEGATIVE


## 1.2. Prepare Dataset

In [10]:
# Select x samples from positive and negative dataset
number_of_samples = 20
df_1 = df_neg.sample(number_of_samples)
df_2 = df_pos.sample(number_of_samples)
ade_df = pd.concat([df_1, df_2])
ade_df.loc[:, 'predictions'] = ["NA" for _ in range(len(ade_df))]
ade_df.sample(4)

Unnamed: 0,description,category,predictions
3354,Visual hallucinations associated with zonisamide.,POSITIVE,
736,"These cases suggest the possibility that, in s...",POSITIVE,
2582,"In this case, discontinuing piroxicam, a nonst...",POSITIVE,
8768,Inflammatory myopathy has been associated wit...,NEGATIVE,


# 2. GPT Predictions 

## 2.1 Load Prompt and Initiate Classifier

In [11]:
from modules import GPTClassifier 

In [12]:
prompt_file = "10_01_ADE_Classifier.txt"

prompt_file_path = os.path.join('.', 'prompts', prompt_file)
print(f"Prompt for [Gender Classification] task: ({prompt_file_path})")

with open(prompt_file_path, 'r') as file:
    print("PROMPT:\n")
    for line in file:
        print(line)
        
ade_classifier = GPTClassifier.Classifier(prompt_file_path)

Prompt for [Gender Classification] task: (./prompts/10_01_ADE_Classifier.txt)
PROMPT:

You are a highly experienced, skilled and helpful medical annotator who have been working on medical and clinical texts.

An adverse drug event (ADE) refers to any harm or negative reaction that occurs due to the use of a medication, regardless of whether it is caused by a mistake or error in use.

I will provide you a clinical texts related to possible Adverse Drug Events (ADE) and you will classify them as ADE POSITIVE or ADE NEGATIVE



Example ADE NEGATIVE Sentences:



    A 42-year-old woman had uneventful bilateral laser-assisted subepithelial keratectomy (LASEK) to correct myopia.

    We present a case report of a cytomegalovirus (CMV)-seronegative, 58-year-old male who received a CMV-seropositive donor liver transplant without CMV prophylaxis.

    His dibucaine number was 21, and the Michaelis constant was 5.5 times that of normal sera.

    Medical treatment of a grossly enlarged adenomyo

## 2.2 Get Predictions

In [13]:
import datetime

# Assing auto name to save prediction data as csv and excel
now = datetime.datetime.now()
file_name = f"ADE_Classifier_preds_{now.strftime('%m%d_%H%M')}"
processed_data_path = os.path.join('.', 'processed_data', file_name)

for idx, row in ade_df.iterrows():
    sentence = row['description']
    result = ade_classifier.do_query(sentence)
    predicted_ade = result['choices'][0]['message']['content']
    ade_df.at[idx, "predictions"] = ade_classifier.ade_fix_output_typo(predicted_ade)
    print(idx, "prediction: ", predicted_ade, "gt: ", row["category"])
    
ade_df.to_csv(processed_data_path)

3417 prediction:  NEGATIVE gt:  NEGATIVE
12420 prediction:  POSITIVE gt:  NEGATIVE
14168 prediction:  NEGATIVE gt:  NEGATIVE
8131 prediction:  POSITIVE gt:  NEGATIVE
7007 prediction:  POSITIVE gt:  NEGATIVE
683 prediction:  POSITIVE gt:  NEGATIVE
8768 prediction:  NEGATIVE gt:  NEGATIVE
4013 prediction:  POSITIVE gt:  NEGATIVE
2054 prediction:  POSITIVE gt:  NEGATIVE
15809 prediction:  NEGATIVE gt:  NEGATIVE
6850 prediction:  NEGATIVE gt:  NEGATIVE
10289 prediction:  POSITIVE gt:  NEGATIVE
2302 prediction:  POSITIVE. gt:  NEGATIVE
9001 prediction:  POSITIVE gt:  NEGATIVE
11004 prediction:  NEGATIVE gt:  NEGATIVE
1252 prediction:  NEGATIVE gt:  NEGATIVE
9830 prediction:  NEGATIVE gt:  NEGATIVE
4057 prediction:  POSITIVE gt:  NEGATIVE
8774 prediction:  POSITIVE gt:  NEGATIVE
13132 prediction:  POSITIVE gt:  NEGATIVE
2582 prediction:  POSITIVE gt:  POSITIVE
2878 prediction:  POSITIVE gt:  POSITIVE
3354 prediction:  POSITIVE gt:  POSITIVE
3080 prediction:  POSITIVE gt:  POSITIVE
1603 predi

# 3. Evaluation

In [14]:
evaluation_data = list(zip(ade_df['category'],ade_df['predictions']))

In [15]:
evaluation_result = ade_classifier.ade_metrics(evaluation_data, file_name)
evaluation_result

{'Version': 'ADE_Classifier_preds_0404_2132',
 'Positive Accuracy': 1.0,
 'Negative Accuracy': 0.4,
 'Positive Not Found': 0,
 'Negative Not Found': 0,
 'Positive False Positives': 0,
 'Negative False Positives': 12}

In [17]:
import json
# create folder if it doesn't exist
if not os.path.exists('eval_results'):
    os.makedirs('eval_results')
    
# write result to JSON file
with open('eval_results/ade_eval_result.json', 'a') as f:
    json.dump(evaluation_result, f)
    print("Results appended to file: eval_results/ade_eval_result.json")

Results appended to file: eval_results/ade_eval_result.json
