In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import time
import requests
import os
import json
from tqdm import tqdm
import pickle
from sklearn.metrics import classification_report

#### Data

In [2]:
layer_data = pd.read_csv('../data/layer-sample-data.csv')
# use only the data with binary labels
eval_data = layer_data.iloc[:50,:]

count max token

In [5]:
import tiktoken
from tqdm import tqdm
encoding = tiktoken.get_encoding("cl100k_base")
encoding = tiktoken.encoding_for_model("gpt-4-0125-preview")
max_len = 0
for text in tqdm(eval_data['text']):
    text_len = len(encoding.encode(text))
    max_len = max(max_len, text_len)
print('Longest input %i'%(max_len))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 6161.75it/s]

Longest input 632





#### OpenAI

In [3]:
access_token = OPENAI_API_KEY
headers = {
    'Content-Type': 'application/json',
    'Authorization': f'Bearer {access_token}'
}

instruction template

In [3]:
ins = """You are a medical doctor specialized in $$$ diagnosis.
From the provided document, assert if the patient historically  and currently has $$$.
For each condition, only pick from "YES", "NO", or "MAYBE". You must follow format without anything further. The results have to be directly parseable with python json.loads().
Sample output: {"$$$": "MAYBE"}
Never output anything beyond the format.
Provided document:"""

Query API

In [23]:
response_lst = {}
for condition in ['cancer', 'diabetes']:
  response_lst[condition] = []
  for msg in tqdm(eval_data['text'].to_list()):
        data = {"model": "gpt-4-0125-preview",
                "temperature": 0.1,
                "messages": [
                    {"role": "system", "content":ins.replace('$$$', condition)},
                    {"role": "user", "content":msg}]}
        time.sleep(6)
        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=data)
        response_lst[condition].append(response.json())


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [05:57<00:00,  7.16s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [06:09<00:00,  7.39s/it]


Save results

In [24]:
with open('../data/gpt4_response_update.pkl', 'wb') as f:
  pickle.dump(response_lst, f)

In [4]:
with open('../data/gpt4_response_update.pkl', 'rb') as f:
  gpt4_response = pickle.load(f)

Clean results for cancer

In [5]:
cancer_lst = []

for d in gpt4_response['cancer']:
   cancer_lst.append(json.loads(d['choices'][0]['message']['content'])['cancer'])

Clean results for diabetes

In [6]:
diabetes_lst = []

for d in gpt4_response['diabetes']:
   diabetes_lst.append(json.loads(d['choices'][0]['message']['content'])['diabetes'])

In [7]:
eval_data['cancer_response'] = cancer_lst
eval_data['cancer_label'] = [1 if r=='YES' else 0 for r in cancer_lst]
eval_data['diabetes_response'] = diabetes_lst
eval_data['diabetes_label'] = [1 if r=='YES' else 0 for r in diabetes_lst]
eval_data.head()

Unnamed: 0,patient_identifier,text,has_cancer,has_diabetes,cancer_response,cancer_label,diabetes_response,diabetes_label
0,2200,DISCHARGE SUMMARY:\n\nPatient Name: [Redacted]...,1,0,YES,1,NO,0
1,645,Discharge Summary:\n\nPatient: [Name]\n\nMedic...,0,0,NO,0,NO,0
2,2563,Discharge Summary:\nPatient name: [REDACTED]\n...,0,0,NO,0,NO,0
3,2275,Discharge Summary:\n\nPatient: 59-year-old Ita...,1,0,YES,1,NO,0
4,1828,Hospital Course:\n\nThe 80-year-old male prese...,0,1,NO,0,YES,1


#### Evaluation

Performance for cancer

In [8]:
print(classification_report(eval_data['has_cancer'].astype('int').to_list(), eval_data['cancer_label'].astype('int').to_list()))

              precision    recall  f1-score   support

           0       0.88      1.00      0.94        30
           1       1.00      0.80      0.89        20

    accuracy                           0.92        50
   macro avg       0.94      0.90      0.91        50
weighted avg       0.93      0.92      0.92        50



Performance for diabetes

In [9]:
print(classification_report(eval_data['has_diabetes'].astype('int').to_list(), eval_data['diabetes_label'].to_list()))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        45
           1       1.00      1.00      1.00         5

    accuracy                           1.00        50
   macro avg       1.00      1.00      1.00        50
weighted avg       1.00      1.00      1.00        50



In [10]:
# all false negative cases for cancer diagnosis
fn_cancer = eval_data[eval_data['has_cancer'].astype('int') != eval_data['cancer_label']]
fn_cancer

Unnamed: 0,patient_identifier,text,has_cancer,has_diabetes,cancer_response,cancer_label,diabetes_response,diabetes_label
26,1814,Discharge Summary:\n\nPatient Name: [REDACTED]...,1,0,MAYBE,0,NO,0
27,3146,Hospital Course Summary:\n\nAdmission Date: [D...,1,0,MAYBE,0,NO,0
41,2117,Hospital Course:\n\nThe patient is a 10-year-o...,1,0,NO,0,NO,0
46,2840,Patient Information:\n\nName: [Redacted]\nGend...,1,0,NO,0,NO,0
