In [1]:
import pandas as pd
import os
import sys


root_path = os.path.abspath(os.path.join(os.curdir, os.pardir, os.pardir))
sys.path.append(root_path)

from src.utils.preprocessing import load_data, save_data
from src.inference.infer_openai import GPTWrapper


%load_ext autoreload
%autoreload 2

In [2]:
path_template = 'data/openai_inputs/openai_input_{}_{}_shots.jsonl'
dfs = {}

for task in ['di', 'bhc']:
    for n_shots in [1,2,3]:
        path = path_template.format(task, n_shots)
        dfs[(task, n_shots)] = load_data(path, type='jsonl')

len_ = len(dfs[('di', 1)])

gpt = GPTWrapper()

In [3]:
costs_dict = {}

for key, df in dfs.items():
    costs_dict[key] = gpt.estimate_all_cost(df)

100%|██████████| 250/250 [00:00<00:00, 64730.91it/s]
100%|██████████| 250/250 [00:02<00:00, 92.91it/s] 
100%|██████████| 250/250 [00:00<00:00, 2630.75it/s]
100%|██████████| 250/250 [00:00<00:00, 114410.91it/s]
100%|██████████| 250/250 [00:03<00:00, 78.98it/s]
100%|██████████| 250/250 [00:00<00:00, 2615.89it/s]
100%|██████████| 250/250 [00:00<00:00, 197769.90it/s]
100%|██████████| 250/250 [00:04<00:00, 58.23it/s]
100%|██████████| 250/250 [00:00<00:00, 2757.10it/s]
100%|██████████| 250/250 [00:00<00:00, 226229.99it/s]
100%|██████████| 250/250 [00:01<00:00, 137.63it/s]
100%|██████████| 250/250 [00:00<00:00, 1184.27it/s]
100%|██████████| 250/250 [00:00<00:00, 137356.04it/s]
100%|██████████| 250/250 [00:02<00:00, 95.06it/s] 
100%|██████████| 250/250 [00:00<00:00, 1212.04it/s]
100%|██████████| 250/250 [00:00<00:00, 169837.38it/s]
100%|██████████| 250/250 [00:03<00:00, 67.75it/s]
100%|██████████| 250/250 [00:00<00:00, 1161.60it/s]


In [4]:
print(costs_dict)

{('di', 1): {'input_token_count': 1695119, 'input_token_cost': 16.95119, 'output_token_count': 68793, 'output_token_cost': 2.06379}, ('di', 2): {'input_token_count': 2522254, 'input_token_cost': 25.222540000000002, 'output_token_count': 68793, 'output_token_cost': 2.06379}, ('di', 3): {'input_token_count': 3368426, 'input_token_cost': 33.68426, 'output_token_count': 68793, 'output_token_cost': 2.06379}, ('bhc', 1): {'input_token_count': 1458893, 'input_token_cost': 14.588930000000001, 'output_token_count': 167875, 'output_token_cost': 5.03625}, ('bhc', 2): {'input_token_count': 2228715, 'input_token_cost': 22.28715, 'output_token_count': 167875, 'output_token_cost': 5.03625}, ('bhc', 3): {'input_token_count': 3065210, 'input_token_cost': 30.6521, 'output_token_count': 167875, 'output_token_cost': 5.03625}}


In [6]:
costs_df = pd.DataFrame({
                        'BHC_input': [costs_dict[('bhc', n_shots)]['input_token_cost'] for n_shots in [1,2,3]],
                        'BHC_output (estimated with gold)': [costs_dict[('bhc', n_shots)]['output_token_cost'] for n_shots in [1,2,3]],
                        'DI_input': [costs_dict[('di', n_shots)]['input_token_cost'] for n_shots in [1,2,3]],
                        'DI_output (estimated with gold)': [costs_dict[('di', n_shots)]['output_token_cost'] for n_shots in [1,2,3]],})

costs_df['total'] = costs_df['BHC_input'] + costs_df['BHC_output (estimated with gold)'] + costs_df['DI_input'] + costs_df['DI_output (estimated with gold)']
display(costs_df)

Unnamed: 0,BHC_input,BHC_output (estimated with gold),DI_input,DI_output (estimated with gold),total
0,14.58893,5.03625,16.95119,2.06379,38.64016
1,22.28715,5.03625,25.22254,2.06379,54.60973
2,30.6521,5.03625,33.68426,2.06379,71.4364


In [7]:
costs_df = (costs_df/(len_))*100
costs_df.insert(0, 'n_shots', [1, 2, 3])

In [8]:
display(costs_df)

Unnamed: 0,n_shots,BHC_input,BHC_output (estimated with gold),DI_input,DI_output (estimated with gold),total
0,1,5.835572,2.0145,6.780476,0.825516,15.456064
1,2,8.91486,2.0145,10.089016,0.825516,21.843892
2,3,12.26084,2.0145,13.473704,0.825516,28.57456


In [9]:
display(dfs[('di', 1)])

Unnamed: 0,idx,prompt,gold
0,27645935,"""You are a medical assistant. Your task is to ...",You were admitted to ___ with abdominal pain a...
1,21507328,"""You are a medical assistant. Your task is to ...","Dear Mr. ___,\n\nIt was a pleasure taking care..."
2,28570159,"""You are a medical assistant. Your task is to ...","Dear Mr. ___,\nIt was a pleasure caring for yo..."
3,22123054,"""You are a medical assistant. Your task is to ...",* You were admitted to the hosptial after a f...
4,28857998,"""You are a medical assistant. Your task is to ...","Dear Ms. ___,\n\nIt was a pleasure taking care..."
...,...,...,...
245,22419624,"""You are a medical assistant. Your task is to ...","Dear Ms. ___,\nIt was our pleasure participati..."
246,22092234,"""You are a medical assistant. Your task is to ...","Dear Ms. ___,\n\nIt was a pleasure to particip..."
247,23044177,"""You are a medical assistant. Your task is to ...","Mr. ___,\nYou were admitted to us because of p..."
248,23899546,"""You are a medical assistant. Your task is to ...","Ms. ___,\nYou were admitted due to a gallstone..."


In [10]:
print(dfs[('bhc', 2)]['prompt'][5])

"You are a medical assistant. Your task is to write the brief hospital course corresponding to a given hospital discharge, as shown in the following examples:

Example 1:
START OF DISCHARGE:
 
Name:  ___                Unit No:   ___
 
Admission Date:  ___              Discharge Date:   ___
 
Date of Birth:  ___             Sex:   F
 
Service: MEDICINE
 
Allergies: 
lisinopril
 
Attending: ___.
 
Chief Complaint:
AMS / UTI
 
Major Surgical or Invasive Procedure:
None
 
History of Present Illness:
This is a ___ year old woman with a history of fetal alcohol 
syndrome/mental retardation, hearing-impaired, schizophrenia, 
NIDDM, who is presenting with fever and AMS. The patient lives 
in a group home but went to her ___ house today for 
___ dinner and was noted to be febrile and more altered 
from baseline. The patient is usually conversant and able to 
follow instructions at baseline and walks with a walker.  

In the ED, initial vitals were: 100.9 94 140/90 20 94% RA  
 - Labs were sign

In [2]:
output = load_data("/Users/paul/Desktop/MAKE/make-discharge-me/data/infered_openai/gpt4/openai_output_di_1_shots.jsonl", type = 'jsonl')

In [3]:
display(output)

Unnamed: 0,idx,prompt,answer,gold
0,27645935,"""You are a medical assistant. Your task is to ...",START OF EXPECTED Discharge Instructions OUTPU...,You were admitted to ___ with abdominal pain a...
1,21507328,"""You are a medical assistant. Your task is to ...","Dear Mr. ___,\n\nIt was a pleasure taking care...","Dear Mr. ___,\n\nIt was a pleasure taking care..."
2,28570159,"""You are a medical assistant. Your task is to ...",You have been admitted to the hospital due to ...,"Dear Mr. ___,\nIt was a pleasure caring for yo..."
3,22123054,"""You are a medical assistant. Your task is to ...",START OF DISCHARGE INSTRUCTIONS:\n\n- Please e...,* You were admitted to the hosptial after a f...
4,28857998,"""You are a medical assistant. Your task is to ...",INSTRUCTIONS AFTER COMPLEX CARDIOVASCULAR AND ...,"Dear Ms. ___,\n\nIt was a pleasure taking care..."
...,...,...,...,...
245,22419624,"""You are a medical assistant. Your task is to ...","Dear Ms. ___,\n\nYou were admitted to our hosp...","Dear Ms. ___,\nIt was our pleasure participati..."
246,22092234,"""You are a medical assistant. Your task is to ...",**Pacemaker Care and Monitoring**\n- Monitor t...,"Dear Ms. ___,\n\nIt was a pleasure to particip..."
247,23044177,"""You are a medical assistant. Your task is to ...","Dear ___,\n\nYou were admitted to our Neurolog...","Mr. ___,\nYou were admitted to us because of p..."
248,23899546,"""You are a medical assistant. Your task is to ...","Dear [Patient's Name],\n\nYou were admitted to...","Ms. ___,\nYou were admitted due to a gallstone..."


In [4]:
print(output['prompt'][0])
print("=========yyyyyyyyyy========")
print(output['answer'][0])
print("=========yyyyyyyyyy========")
print(output['gold'][0])

"You are a medical assistant. Your task is to write the discharge instructions corresponding to a given hospital discharge, as shown in the following example:

Example 1:
START OF DISCHARGE:
 
Name:  ___                  Unit No:   ___
 
Admission Date:  ___              Discharge Date:   ___
 
Date of Birth:  ___             Sex:   M
 
Service: SURGERY
 
Allergies: 
No Known Allergies / Adverse Drug Reactions
 
Attending: ___.
 
Chief Complaint:
Abdominal Pain/Lack of Bowel Function
 
Major Surgical or Invasive Procedure:
None

 
History of Present Illness:
Mr. ___ is a pleasant ___ y/o gentleman, previously healthy
save for some hypertension, who presented to ___
yesterday with 14h of abd pain, as well as nausea but no emesis. 

He reports a bowel movement and flatus yesterday morning but has
not had any since that time.  His pain improved with some
morphine at presentation yesterday and he has not required any
additional pain medication, but he still had some lower 
abdominal
discom