Extract admission date, discharge date and Subject ID from ADMISSIONS.csv

In [1]:
import csv
sub_id = "00000"
admt_time = ""
disch_time = ""

with open('samples/ADMISSIONS.csv') as adm:
   csv_reader = csv.reader(adm)
   for index, row in enumerate(csv_reader):
        if index == 1:
            sub_id = (row[1])
        if sub_id in row:
            admt_time = (row[3])
            disch_time = (row[4])

adm.close()

Extract ICD9 code of diagnosis for the same patient, and decode the ICD9 code from DIAGNOSES_ICD.csv

In [2]:
from icd9cms.icd9 import search
icd9_code = []
with open('samples/DIAGNOSES_ICD.csv') as icd:
   csv_reader = csv.reader(icd)
   for index, row in enumerate(csv_reader):
        if sub_id in row:
            icd9_code.append(row[4])
icd.close()
if len(icd9_code) == 0:
    print("N/A. No Diagnoses data found.")
else:
    icd = str(icd9_code[0])
    # icd = icd[:3] + '.' + icd[3:]
    icd_desc = search(icd)
    icd_desc = str(icd_desc).replace(str(icd), "")
    icd_desc = icd_desc.replace(":", ", ") + "."



Extract all Administered Medication list for the patient from PRESCRIPTIONS.csv

In [3]:
PRESCRIPTIONS = []
with open('samples/PRESCRIPTIONS.csv') as presc:
   csv_reader = csv.reader(presc)
   for index, row in enumerate(csv_reader):
        if sub_id in row:
            PRESCRIPTIONS.append(row[7])
presc.close()

PRESCRIPTIONS = list(dict.fromkeys(PRESCRIPTIONS))
if len(PRESCRIPTIONS) == 0:
    print("N/A. No prescription data found.")
else:
    print(str(PRESCRIPTIONS))
presc = str(PRESCRIPTIONS)

['Sodium Chloride 0.9%  Flush', 'Glipizide', 'Metoprolol', 'Sevelamer', 'Insulin', 'Pantoprazole', 'Acetaminophen', 'Magnesium Oxide', 'Docusate Sodium', 'Senna', 'Magnesium Sulfate', 'Linezolid', 'Potassium Chloride', 'Diphenhydramine HCl', 'Zolpidem Tartrate', 'Midazolam HCl', 'Metronidazole', 'Levofloxacin', 'Phytonadione', 'Oxycodone-Acetaminophen', 'Alteplase (Catheter Clearance)', 'Warfarin', 'Moexipril HCl', 'Aspirin', 'Atorvastatin', 'Metoclopramide', 'Pneumococcal Vac Polyvalent', 'Ceftriaxone', 'Vancomycin HCl', 'Ampicillin Sodium', 'Unasyn', 'Iso-Osmotic Dextrose', 'NS', 'D5W']


Load the GPT2 model and Tokenizer 

In [4]:
import tensorflow as tf
from transformers import TFGPT2LMHeadModel, GPT2Tokenizer
# from transformers import OpenAIGPTTokenizer, OpenAIGPTModel


tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# add the EOS token as PAD token to avoid warnings
model = TFGPT2LMHeadModel.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id)



2023-08-02 21:10:39.099800: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.
All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


Build the PDF format and write data

In [8]:
import os
from fpdf import FPDF

import random

no_of_reports = 1

for j in range(1,int(no_of_reports+1)):
    pdf = FPDF(format = 'A4')
    pdf.add_page()
    pdf.set_font('helvetica', '', 13.0)
    pdf.set_xy(60,8)
    pdf.cell(ln=1, h=22.0, align='C', w=75.0, txt='Auto Generated Clinical Note.', border=0)
    pdf.set_x(25)

    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Subject ID', border=1)
    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=sub_id, border=1)

    pdf.set_xy(25,40)
    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Admission Date and time', border=1)
    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=admt_time, border=1)


    pdf.set_xy(25, 50)
    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Discharge Date and time', border=1)
    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=disch_time, border=1)

    pdf.dashed_line(20, 70, 180, 70, dash_length = 1, space_length = 1)
    pdf.set_xy(60, 75)
    pdf.set_font('helvetica', '', 13.0)
    pdf.cell(ln=1, h=13.0, align='C', w=75.0, txt='Full Prescription Summary', border=0)

    pdf.set_font('helvetica', '', 10.0)


    pdf.set_line_width(0.0)
    pdf.rect(15.0, 15.0, 170.0, 245.0)
    pdf.set_x(25)
    pdf.multi_cell( h=4.0, align='C', w=160, txt=presc, border=0)

    pdf.dashed_line(20, 130, 180, 130, dash_length = 1, space_length = 1)

    pdf.set_xy(60, 185)
    pdf.set_font('helvetica', '', 13.0)
    pdf.cell(ln=1, h=13.0, align='C', w=75.0, txt='Notes', border=0)

    pdf.set_font('helvetica', '', 10.0)

    pdf.set_line_width(0.0)
    pdf.rect(15.0, 15.0, 170.0, 245.0)

    ''' Below, we write the initial text, and later on,
        the GPT2 generator concatenates the relevant medical notes, based on the initial text '''
    
    input_ids = tokenizer.encode('The patient was diagnosed with ' + icd_desc + " and showed symptoms of ", return_tensors='tf')
    sample_outputs = model.generate(input_ids,do_sample=True, max_length=150, top_k=100, top_p=0.85, num_return_sequences=1)
    output = tokenizer.decode(sample_outputs[0], skip_special_tokens=True).replace("\r\n", "").replace('\n\n', '').replace('\n', '')
    #print(output)
    pdf.set_xy(25,150)
    pdf.multi_cell( h=4.0, align='L', w=160, txt=output, border=0)
                 
    pdf.output(f"./{j}_summary.pdf", 'F')
    print("Report Generated Successfully.")

Report Generated Successfully.


In [6]:
print("Input Data Provided from Dataset: ")
print("")
print("ADMISSIONS.csv: ")
print("Subject ID: " + sub_id)
print("Admission Time: " + str(admt_time))
print("Discharge Time: " + str(disch_time))
print("")
print("DIAGNOSIS_ICD.csv: ")
print("Diagnosis ICD9 Code: " + icd_desc)
print("")
print("PRESCRIPTIONS.csv: ")
print(presc)

Input Data Provided from Dataset: 

ADMISSIONS.csv: 
Subject ID: 10006
Admission Time: 2164-10-23 21:09:00
Discharge Time: 2164-11-01 17:15:00

DIAGNOSIS_ICD.csv: 
Diagnosis ICD9 Code: , Sepsis, Sepsis.

PRESCRIPTIONS.csv: 
['Sodium Chloride 0.9%  Flush', 'Glipizide', 'Metoprolol', 'Sevelamer', 'Insulin', 'Pantoprazole', 'Acetaminophen', 'Magnesium Oxide', 'Docusate Sodium', 'Senna', 'Magnesium Sulfate', 'Linezolid', 'Potassium Chloride', 'Diphenhydramine HCl', 'Zolpidem Tartrate', 'Midazolam HCl', 'Metronidazole', 'Levofloxacin', 'Phytonadione', 'Oxycodone-Acetaminophen', 'Alteplase (Catheter Clearance)', 'Warfarin', 'Moexipril HCl', 'Aspirin', 'Atorvastatin', 'Metoclopramide', 'Pneumococcal Vac Polyvalent', 'Ceftriaxone', 'Vancomycin HCl', 'Ampicillin Sodium', 'Unasyn', 'Iso-Osmotic Dextrose', 'NS', 'D5W']
