In [9]:
import pandas as pd
from transformers import LongformerTokenizer, LongformerForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import classification_report
from datasets import Dataset
import torch

# Load datasets
train_df = pd.read_csv("train_50.csv")
val_df = pd.read_csv("val_50.csv")
test_df = pd.read_csv("test_50.csv")

# Define ICD-9 code columns
icd9_columns = [
    '038.9', '244.9', '250.00', '272.0', '272.4', '276.1', '276.2', '285.1', '285.9',
    '287.5', '305.1', '311', '36.15', '37.22', '37.23', '38.91', '38.93', '39.61',
    '39.95', '401.9', '403.90', '410.71', '412', '414.01', '424.0', '427.31', '428.0',
    '486', '496', '507.0', '511.9', '518.81', '530.81', '584.9', '585.9', '599.0',
    '88.56', '88.72', '93.90', '96.04', '96.6', '96.71', '96.72', '99.04', '99.15',
    '995.92', 'V15.82', 'V45.81', 'V45.82', 'V58.61'
]

# Initialize tokenizer
tokenizer = LongformerTokenizer.from_pretrained("yikuan8/Clinical-Longformer")

# Preprocessing function
def preprocess_dataset(df, tokenizer, icd9_columns):
    # Extract labels and convert to float
    labels = df[icd9_columns].values.astype(float).tolist()
    
    # Tokenize text
    tokenized = tokenizer(
        df["TEXT"].tolist(),
        padding="max_length",
        truncation=True,
        max_length=512
    )
    
    # Create Dataset
    dataset_dict = {
        "input_ids": tokenized["input_ids"],
        "attention_mask": tokenized["attention_mask"],
        "labels": labels
    }
    return Dataset.from_dict(dataset_dict)

# Preprocess datasets
train_data = preprocess_dataset(train_df, tokenizer, icd9_columns)
val_data = preprocess_dataset(val_df, tokenizer, icd9_columns)
test_data = preprocess_dataset(test_df, tokenizer, icd9_columns)

# Load model for multi-label classification
num_labels = len(icd9_columns)
model = LongformerForSequenceClassification.from_pretrained(
    "yikuan8/Clinical-Longformer",
    num_labels=num_labels,
    problem_type="multi_label_classification"
)

# Adjust class weights to handle label imbalance
label_counts = train_df[icd9_columns].sum()
class_weights = torch.tensor(label_counts.max() / label_counts, dtype=torch.float32)
model.classifier.loss_fct = torch.nn.BCEWithLogitsLoss(pos_weight=class_weights)

# Define metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = (logits > 0.5).astype(int)
    report = classification_report(labels, preds, target_names=icd9_columns, output_dict=True)
    macro_f1 = report["macro avg"]["f1-score"]
    micro_f1 = report["micro avg"]["f1-score"]
    return {"macro_f1": macro_f1, "micro_f1": micro_f1}

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=20,  # Train for 20 epochs
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch",
    save_total_limit=2,
    fp16=True,  # Enable mixed precision training
    lr_scheduler_type="cosine"  # Use cosine learning rate scheduler
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=val_data,
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()

# Save model and tokenizer
model.save_pretrained("./clinical_longformer_icd91")
tokenizer.save_pretrained("./clinical_longformer_icd91")

# Evaluate the model on test data
results = trainer.evaluate(eval_dataset=test_data)
print("Evaluation Results:", results)

# Predict on test data
predictions = trainer.predict(test_data)
logits = predictions.predictions
predicted_labels = (logits > 0.5).astype(int)  # Apply threshold for binary predictions

# Add predicted labels to the DataFrame
for i, column in enumerate(icd9_columns):
    test_df[f"predicted_{column}"] = predicted_labels[:, i]

# Save predictions
test_df.to_csv("test_predictions_with_labels.csv", index=False)
print("Predictions saved to 'test_predictions_with_labels.csv'")


Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at yikuan8/Clinical-Longformer and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  class_weights = torch.tensor(label_counts.max() / label_counts, dtype=torch.float32)


Epoch,Training Loss,Validation Loss,Macro F1,Micro F1
1,0.2789,0.287505,0.09935,0.210099
2,0.2418,0.267974,0.157286,0.299346
3,0.2499,0.256423,0.211826,0.366105
4,0.2441,0.249863,0.263424,0.430102
5,0.205,0.247727,0.297674,0.45307
6,0.1804,0.247478,0.319489,0.472111
7,0.1757,0.24647,0.326982,0.483356
8,0.2054,0.251085,0.345546,0.492161
9,0.1924,0.255861,0.360987,0.496922
10,0.1621,0.255664,0.363211,0.495248


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Evaluation Results: {'eval_loss': 0.27813971042633057, 'eval_macro_f1': 0.3980581950664842, 'eval_micro_f1': 0.5123201383313777, 'eval_runtime': 25.41, 'eval_samples_per_second': 67.887, 'eval_steps_per_second': 17.001, 'epoch': 20.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Predictions saved to 'test_predictions_with_labels.csv'


In [2]:
import torch
from transformers import LongformerTokenizer, LongformerForSequenceClassification

# Load the fine-tuned model and tokenizer
model_path = "./clinical_longformer_icd91"
tokenizer = LongformerTokenizer.from_pretrained(model_path)
model = LongformerForSequenceClassification.from_pretrained(model_path)
model.eval()  # Set the model to evaluation mode

# ICD-9 code columns used during training
icd9_columns = [
    '038.9', '244.9', '250.00', '272.0', '272.4', '276.1', '276.2', '285.1', '285.9',
    '287.5', '305.1', '311', '36.15', '37.22', '37.23', '38.91', '38.93', '39.61',
    '39.95', '401.9', '403.90', '410.71', '412', '414.01', '424.0', '427.31', '428.0',
    '486', '496', '507.0', '511.9', '518.81', '530.81', '584.9', '585.9', '599.0',
    '88.56', '88.72', '93.90', '96.04', '96.6', '96.71', '96.72', '99.04', '99.15',
    '995.92', 'V15.82', 'V45.81', 'V45.82', 'V58.61'
]

# Function for making predictions
def predict_icd9(texts, tokenizer, model, threshold=0.5):
    """
    Predict ICD-9 codes for the given input texts.
    
    Args:
        texts (list): A list of clinical texts to predict ICD-9 codes for.
        tokenizer: The tokenizer for the model.
        model: The fine-tuned Longformer model.
        threshold (float): Threshold to apply for binary classification.
    
    Returns:
        list: Predicted ICD-9 codes for each input text.
    """
    inputs = tokenizer(
        texts,
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    
    with torch.no_grad():
        outputs = model(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"]
        )
        logits = outputs.logits
        probabilities = torch.sigmoid(logits)  # Apply sigmoid to get probabilities
        predictions = (probabilities > threshold).int()  # Apply threshold to get binary predictions
    
    # Map predictions to ICD-9 codes
    predicted_icd9 = []
    for pred in predictions:
        codes = [icd9_columns[i] for i, val in enumerate(pred) if val == 1]
        predicted_icd9.append(codes)
    
    return predicted_icd9

# Example input texts
input_texts = [
    "Patient has .",
    "Patient diagnosed with pneumonia and acute kidney injury. Requires immediate intervention."
]

# Make predictions
predictions = predict_icd9(input_texts, tokenizer, model)
for i, (text, pred) in enumerate(zip(input_texts, predictions)):
    print(f"Text {i + 1}:\n{text}\nPredicted ICD-9 Codes: {pred}\n")


Some weights of the model checkpoint at ./clinical_longformer_icd91 were not used when initializing LongformerForSequenceClassification: ['classifier.loss_fct.pos_weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Text 1:
Patient has headache.
Predicted ICD-9 Codes: ['38.93', '401.9']

Text 2:
Patient diagnosed with pneumonia and acute kidney injury. Requires immediate intervention.
Predicted ICD-9 Codes: ['584.9']



In [5]:
# Example discharge summaries as input
discharge_summaries = [
    """
    date of birth:               sex:   m service neurology allergies nkda chief complaint multifocal pontine and cerebrellar infarcts major surgical or invasive procedure pt arrived intubated trach peg multiple bals history of present illness per admitting resident information for the note was taken from the medical records from as well as the patient s daughter hpi year old male with significant medical problems including afib noncompliant on coumadin cad s p stent placement x6 chf with diastolic dysfunction htn morbid obesity alcoholism and dm was transferred from for possible intervention following cva briefly he presented to ed by ambulance on the morning of complaining of days of general malaise and abdominal pain and one bout of emesis he was prompted to call the ambulance after an episode of near syncope during micturition as well as tinnitus he was noted to be hypotensive upon arrival to the ed which resolved following fluid boluses work up in the ed with a head ct showed likely old parietal infarct and possible area of low attenuation in the central pons infarct ct of the abdomen and chest showed no evidence of aortic dissection on presentation his inr was pt 5s and ptt 1s at 35am on he was noted to have transient slurring of his speech with normal upper and lower extremity strength tone and sensation bilaterally he also had labile blood pressures becoming hypertensive to at his speech was still slurred with decreased lue and lle strength tone and sensation were normal and intact at that time at he began to have an expressive language difficulty along with his slurred speech his tongue was noticed to deviate to the right with right facial drooping as well as drooping of the right side of his mouth he also had a poor cough and difficulty clearing his secretions lue and lle strength were decreased to tone in lue and lle was flaccid sensation was intact at on his gag reflex was absent and his lue strength was still with flaccid tone at he was unable to keep his right eye closed against resistance and his lue and lle sensation was decreased mri and mra of the head and neck on the morning of showed possible basilar artery thrombosis with multiple infarcts in the brainstem and cerebellar hemispheres he was note to have abssent gag later in morning and he was intubated prior to arrival in icu for airway protection past medical history atrial fibrillation cad s p stent placement x6 htn chf with diastolic dysfunction morbid obesity alcoholism with enlarged liver dm borderline not on medications sleep apnea r eye blindness since childhood psh s p stent placement at r cea at social history social security disability habits long standing alcoholic and smoker daughter unable to quantify family history htn physical exam on admission t afebrile bp hr irregular o2sat on ventilator gen lying in bed intubated drowsy heent nc at anicteric neck no tenderness to palpation normal rom supple no carotid or vertebral bruit cv afib with heart rate 80s no murmurs gallops rubs lung decreased air entry on left abd bs soft nontender ext no edema cyanosis neurologic examination mental status intubated drowsy however follows commands such as opening eyes moving limbs other exam diificult owing to intubated status cranial nerves pupils equally round and reactive to light to mm bilaterally extraocular movements intact bilaterally no nystagmus trapezius normal bilaterally slight facial droop on right but difficcult to characterise given intubated status motor limited owing to intubated and sedated status he was actively moving his right arm and leg was not actively moving his left ul however partially cooperated with power testing he was noted to have full power in all major muscle groups on right and was antigravity on lul in deltoids and triceps but not against resistance same was noted in lll in ip and hamstrings sensation withdraws to pain bl reflexes hyper reflexic throughout l more than r with left toe downgoing and right toe mute coordination unable to test gait deferred neurological exam at time of discharge awake alert peg and tracheostomy in place brief general exam obese l subclavian line cta b no m g r obese soft abdomen nttp edematous in yues and les non pitting pt nods and shakes head but irreproducibly to questions and requests does not follow commands reproducibly titubation after nodding his head persists for seconds eomi 2mm b l brisk symmetric face tongue to midline responds to mimicking spontaneous fingermovement in l hand noxious stim to rue leads to withdrawal flexor of the lue to noxious bilaterally l foot wiggles toes spontaneously r to noxious only responds with grimace toes extensor bilaterally pertinent results admission labs wbc rbc hgb hct mcv mch mchc rdw glucose urea n creat sodium potassium chloride total co2 anion gap calcium phosphate magnesium pt ptt inr pt hypercoagulability evaluation prothrombin mutation negative factor v leiden negative crp mini bal gram stain final per 1000x field polymorphonuclear leukocytes per 1000x field gram negative rod s per 1000x field gram positive cocci in pairs respiratory culture final ml commensal respiratory flora sputum gram stain final this is a corrected report pmns and epithelial cells 100x field gram stain indicates extensive contamination with upper respiratory secretions bacterial culture results are invalid please submit another specimen previously reported as pmns and epithelial cells 100x field per 1000x field gram negative rod s per 1000x field gram positive rod s per 1000x field gram positive cocci in pairs respiratory culture final sparse growth commensal respiratory flora catheter tips no significant growth no significant growth pending blood cultures bottle staphylococcus coagulase negative isolated from one set only gram stain gram positive cocci in clusters bottle no growth bottle no growth bottle no growth bottle no growth bottle no growth bottle pending bottle pending urine cultures no growth mrsa screens no mrsa isolated discharge labs imaging mri a head and neck impression extensive confluent and multifocal infarcts involving the inferomedial aspect of both cerebellar hemispheres with signal characteristics suggesting that these are late acute or early subacute and correspond to the infarcts demonstrated on the study performed one day earlier no associated hemorrhage herniation or evidence of obstructive hydrocephalus abnormalities involving the vertebral arteries bilaterally with possible origin stenosis on the right and distal occlusion thrombosis on the left given the clinical context the latter in particular could reflect impacted embolic material from a more proximal source the 2d tof and the cranial portion of the enhanced cervical mra both suggest very poor flow in the distal left vertebral artery as well as throughout the basilar artery and its branches including the superior cerebellar vessels bilaterally which may relate to the embolic event above there is no finding on these sequences to specifically suggest vertebral arterial dissection which in general would not account for the bilaterality of the cerebellar hemispheric findings relatively mild atherosclerotic disease involving particularly the left common and internal carotid arteries with no flow limiting stenosis there is no flow limiting stenosis in the intracranial anterior circulation focal cystic encephalomalacia and surrounding gliosis involving the left frontovertex likely related to previous embolic infarct chest x ray impression ap chest reviewed in the absence of prior chest radiographs et tube is in standard placement left pic line tip projects over the anticipated location of the mid svc mediastinum and left hemithorax are very abnormal suggesting extensive mediastinal adenopathy and pleural abnormality perhaps lingular collapse overall the findings are strongly suggestive of extensive maligcy alternatively there could be widespread hemorrhage in both mediastinum and left pleural space right lung clear no pneumothorax ct torso impression et tube approximately cm above the carina left thyroid lobe nodule clinical correlation is recommended small bilateral pleural effusions and lower lobe consolidation versus atelectasis aspiration can have a similar appearance mediastinal and hilar lymph nodes measuring up to cm in short axis diameter these may be reactive but are nonspecific coronary artery calcifications and vascular calcifications trace pericardial effusion chest x ray severe cardiomegaly is stable widened mediastinum mainly due to increase in the mediastinal fat as seen in prior ct from is unchanged et tube remains at the level of the thoracic inlet right ij catheter tip is in the upper svc left retrocardiac opacities have improved consistent with improving previously large areas of atelectasis there is no pneumothorax or large pleural effusions chest x ray indication lymphadenopathy findings endotracheal tube terminates above the thoracic inlet level and could be advanced several centimeters for standard positioning cardiomediastinal contours remain widened based upon review of recent ct torso this appears to be due to extensive mediastinal lipomatosis mild volume overload is present dense left retrocardiac opacity has developed likely a combination of atelectasis and effusion transthoracic echo the left atrium is dilated the right atrium is moderately dilated there is mild symmetric left ventricular hypertrophy the left ventricular cavity is moderately dilated due to suboptimal technical quality a focal wall motion abnormality cannot be fully excluded lv systolic function appears depressed the right ventricular cavity is dilated with depressed free wall contractility the aortic valve leaflets are mildly thickened the aortic valve is not well seen there is no aortic valve stenosis significant aortic regurgitation is present but cannot be quantified the mitral valve leaflets are mildly thickened mitral regurgitation is present but cannot be quantified there is a trivial physiologic pericardial effusion impression poor technical quality due to patient s body habitus both ventricles are dilated and hypokinetic however exact function and size cannot be determined mitral and aortic regurgitation unable to quantify pulmonary artery systolic pressure could not be determined unable to assess if lv thrombus eeg impression this is a mildly abnormal extended routine eeg in the waking state there is occasional theta and delta frequency slowing in the right temporal regions and to a lesser degree there was also mild left temporal slowing consisting of theta frequencies the cardiac rhythm was irregularly irregular with frequent pvcs there were no epileptiform features on this study ct head findings there is no intracranial hemorrhage multifocal hypodensities within the pons and the cerebellum are consistent with the prior infarcts and are better assessed on previous mri left frontal area of cystic encephalomalacia is unchanged otherwise current white matter differentiation of the hemispheres is maintained the ventricles are unchanged in size and configuration there is no uncal or transtentorial herniation there are no fractures there is opacification of scattered ethmoid air cells and mucosal thickening of the sphenoid sinus and maxillary sinuses the mastoid air cells are under pneumatized and opacified impression no intracranial hemorrhage evolving pontine and cerebellar infarcts mastoid air cell opacification and paranasal sinus disease labs at time of discharge cbc 56a pt ptt inr brief hospital course mr is a year old gentleman with a past medical history including atrial fibrillation non compliant on coumadin inr diastolic dysfunction hypertension diet controlled dm and cad s p stenting who initially presented to with tinnitus and presyncope and was transferred to the when he was discovered to have multifocal infarcts involving the pons and both cerebellar hemispheres in the setting of poor posterior circulation he was admitted to the stroke service in the icu from to neuro following his arrival to the an mri and angiography of the head and neck was performed to better characterize the lesions the studies demonstrated extensive confluent and multifocal infarcts involving the pons and inferomedial aspect of both cerebellar hemispheres the strokes appreared in the context of abnormalities in the vertebral arteries possible origin stenosis on the right and distal occlusion thrombosis on the left with poor flow in the basilar artery and its branches including the superior cerebellar vessels there was no associated hemorrhage herniation or evidence of obstructive hydrocephalus the heparin drip initiated at was continued with an initial ptt goal of and subsequent goal of following the placement of a peg coumadin was started with a target inr of two to three the patient s neurological examination remained relatively constant he appeared alert and interactive he could follow basic midline and appendicular requests eg stick out your tongue and lift your right arm there was evidence of gaze evoked nystagmus he could voluntarily shake is head yes and no in addition to moving the distal aspect of his left upper extremity and left toes occasionally clonus was noted in the left upper extremity a bilateral extensor response was observed cvs the patient was monitored by telemetry which demonstrated chronic atrial fibrillation metoprolol was given for rate control and cardioprotection however minimal doses of the beta blocker were used in an attempt to allow a target systolic blood pressure of to a statin was continued per the patient s cardiologist and pcp aspirin and plavix were not necessary for prophylaxis for his cardiac stents and thus these meds were not started as the appearance of the vessel occlusions and infarcts were suggestive of embolic phenomena from a proximal source a transthoracic echocardiogram was performed due to the technical challenges of the study the presence of an lv thrombus could not be explored there is no comment regarding the presence of asd pfo and vegetations note was made of the irregularly irregular rhythm as noted above following the placement of the peg coumadin was started for the atrial fibrillation inr goal of ptt goal while bridging is resp upon arrival mr had an endotracheal tube placed to protect the airway the ett was transitioned to trach in the course of the hospitalization he required mmv respiratory support due to having been noted to have episodes of apnea while on trach mask these occured o n only and were of 30s min duration repeat hct on was performed to evaluate for pontine hemorrhage possibly affecting the respiratory centers this was negative it was felt that etiology was most likely due to pontine infarction will need further monitoring and weaning as tolerated if apneic episodes do not recur id in the course of the hospitalization the patient developed persistent fevers the peak wbc count was the bronchealveolar lavage revealed commensal respiratory flora urine cultures were repeatedly negative one of six blood cultures grew coagulase negative staph and the result was thought to reflect contamination two mrsa screens were negative two sputum cultures were negative and gram stains were considered contaminated by upper respiratory secretions two of three catheter tip cultures were negative iv tip catheter culture grew vre no blood cultures were positive vancomycin was discontinued and he was started on linezolid for a seven day course day with last day of although daily chest x rays failed to reveal clear evidence of the condition treatment with ciprofloxacin cefuroxime and vancomycin was initiated to treat presumed ventilator associated pneumonia he completed day course of ciprofloxacin and cerfuroxime and vancomycin on heme onc there was some concern that the patient could be hypercoaguable analyses for factor v leiden and the prothrombin gene mutation were negative it might be worth conducting a more thorough study eg protein c protein s anticardiolipin ab etc for potential coagulopathy in the non acute setting because the initial chest x ray was thought to show findings concerning for maligcy a ct torso was done the study showed non specific possibly reactive mediastinal and hilar lymph nodes measuring up to cm in short axis diameter there were also small bilateral pleural effusions and lower lobe consolidation versus atelectasis no further investigatory studies were pursued abd gi ultimately on a peg was placed by the interventional radiology service to provide nutrition tube feeds were administered endo insulin was administered by sliding scale with a goal of maintaining normoglycemia renal prior to discharge the lasix was restarted for diastolic dysfunction intermittently however was eventually discontinued due to no oxygen requirement rehabilitation the physical and occupational therapy teams participated in the patient s care code full hcp medications on admission coumadin 10mg qd simvastatin 80mg qd plavix 75mg qd lasix 40mg qd doxazosin 1mg qd nitroglycerin prn chest pain discharge medications chlorhexidine gluconate mouthwash sig one ml mucous membrane times a day miconazole nitrate powder sig one appl topical times a day white petrolatum mineral oil ointment sig one appl ophthalmic prn as needed as needed for dry eyes bisacodyl mg suppository sig one suppository rectal hs at bedtime as needed for constipation famotidine mg tablet sig one tablet po bid times a day simvastatin mg tablet sig two tablet po daily daily nystatin unit ml suspension sig five ml po qid times a day as needed for thrush glucagon human recombit mg recon soln sig one recon soln injection q15min as needed for hypoglycemia protocol multivitamin tx minerals tablet sig one tablet po daily daily metoprolol tartrate mg tablet sig one tablet po tid times a day linezolid mg tablet sig one tablet po q12h every hours last day dextrose gm iv prn hypoglycemia protocol sodium chloride flush ml iv q8h prn line flush peripheral line flush with ml normal saline every hours and prn heparin flush units ml ml iv prn line flush picc heparin dependent flush with 10ml normal saline followed by heparin as above daily and prn per lumen outpatient lab work patient will need inr monitoring to goal inr of cbc monitoring should be peroformed on weekly basis until inr is stable coumadin mg tablet sig one tablet po once a day insulin nph regular human unit ml suspension sig thirty u nph subcutaneous twice a day am and hs for nph and as per sliding scale regular discharge disposition extended care facility rehab center discharge diagnosis primary embolic strokes secondary atrial fibrillation cad htn chf discharge condition hemodynamically stable on ventilator support for tracheostomy neurological exam at time of discharge remarkable for awake alert eyes open tracks past midline mimicks as permitted by motor status but does not reproducibly follow verbal commands brainstem reflexes intact eomi nystagmus on b l gaze perrl 2mm face symmetric but weak able to stick out tongue scm b l is motor reflexes r paraplegia frimaces to noxious trace l finger and l toe movement dtrs increased on l mute on r toes extensor bilaterally discharge instructions you were admitted to after having suffered severe strokes to your cerebellum and your pons parts of your brain you were left with significant neurological deficits at time of discharge you were treated with medicatoins to treat your strokes in addition you had a course complicated by a lung infection and blood infection you were treated with antibiotics because you could not breathe on your own or eat on your own you underwent placement of a breathing tube tracheostomy and feeding tube peg you were started on multiple medications please take them as prescribed you were discharged to a rehabiliation facility for further treatment of your breathing and stroke should you develop any symptoms of concern to you please call your doctor or go to emergency room followup instructions neurology provider md phone at 1pm please call the office of dr danka to set up a follow up appointment after your discharge from rehabilitation completed by 
    
    """
]

# Use the predict_icd9 function to predict ICD-9 codes
predicted_codes = predict_icd9(discharge_summaries, tokenizer, model)

# Print the results
for i, (summary, codes) in enumerate(zip(discharge_summaries, predicted_codes)):
    print(f"Discharge Summary {i + 1}:\n{summary}\nPredicted ICD-9 Codes: {codes}\n")


Discharge Summary 1:

    date of birth:               sex:   m service neurology allergies nkda chief complaint multifocal pontine and cerebrellar infarcts major surgical or invasive procedure pt arrived intubated trach peg multiple bals history of present illness per admitting resident information for the note was taken from the medical records from as well as the patient s daughter hpi year old male with significant medical problems including afib noncompliant on coumadin cad s p stent placement x6 chf with diastolic dysfunction htn morbid obesity alcoholism and dm was transferred from for possible intervention following cva briefly he presented to ed by ambulance on the morning of complaining of days of general malaise and abdominal pain and one bout of emesis he was prompted to call the ambulance after an episode of near syncope during micturition as well as tinnitus he was noted to be hypotensive upon arrival to the ed which resolved following fluid boluses work up in the ed with

In [6]:
# Another discharge summary as input
discharge_summaries.append(
    """Admission Date:                Discharge Date:     Date of Birth:               Sex:   M  Service: NEUROSURGERY  Allergies: Penicillins  Attending: Chief Complaint: SAH  Major Surgical or Invasive Procedure: : EVD Placement  TRACH  IVCF  PEG  CEREBRAL ANGIOGRAM  SUB OCCIPITAL CRANIECTOMY FOR DECOMPRESSION   History of Present Illness: 57 y/o male with history of migranes, Afib and HTN transferred here from an outside facility after a SAH was seen on CT.  Patient was in his usual state of health per his wife, he had been having a mild headache for the past few days, but today developed a  headache in the back of his head expressed that he thought he was going to pass out, his wife got him to a chair and he became unresponsive and developed respiratory distress. EMS was called, he was intubated in the field and taken to a Hospital. Upon arrival he was hypotensive with HR in the 40's. outside records indicate that he recieved one amp. of Atropine. CT revealed a SAH in aneurysmal distribution and he was transferred to  for further care.   Past Medical History: Prostate CA Afib HTn OSA- sleeps with bypap   Social History: Works as computer programer. No Tobacco, social ETOH 2x week   Family History: non-contributory  Physical Exam: Hunt and : 5        :2   GCS 3 E: 1   V:1 Motor T:  BP: 80/53         HR: 49   R 17      O2Sats100% CMV 100/450/16/10 Gen: intubated HEENT: Pupils: 5 to 4mm sluggish        EOMs  Cardiac: RRR. S1/S2. Abd: Soft, Extrem: Warm and well-perfused. No C/C/E.  Neuro: Mental status: Unresponsive to all stimuli  Cranial Nerves: I: Not tested II: Pupils equally round and reactive to light, to mm bilaterally. Visual fields are full to confrontation. III, IV, VI: Extraocular movements intact bilaterally without nystagmus. V, VII: Facial strength and sensation intact and symmetric. VIII: Hearing intact to finger rub bilaterally. IX, X: Palatal elevation symmetrical. : Sternocleidomastoid and trapezius normal bilaterally. XII: Tongue midline without fasciculations.  Motor: No response to nox stimuli No cough No gag No corneals  PHYSICAL EXAM UPON DISCHARGE:  Eyes open to voice / + tracking /pupils  bilaterally / conjugate gaze / no facial assymtety noted /  +/- commands / no attempt to verbalize / smiles on occassion / attempts to stick tongue out / right shoulder shrug noted to noxious stimuli / no other motor function noted at this time  Incisions: EVD site right frontal region with staples in place to come on   sub occipital crani site with slight fluctuance and fibrinous exudate at superior portion.  No drainage. No erythema .  He remains vent dependent at this time.  Peg site is clean and dry.   Pertinent Results:  CTA Head/Neck: 1. Fusiform 5-mm dilatation of the V4 segment of left vertebral artery is most suggestive of a fusiform aneurysm. 2. Extensive subarachnoid hemorrhage.   ECHO: The left atrium is normal in size. There is mild symmetric left ventricular hypertrophy with normal cavity size. Due to suboptimal technical quality, a focal wall motion abnormality cannot be fully excluded. Overall left ventricular systolic function is normal (LVEF>55%). Right ventricular chamber size and free wall motion are normal. The aortic valve is not well seen. There is no aortic valve stenosis. No aortic regurgitation is seen. The mitral valve leaflets are mildly thickened. No mitral regurgitation is seen. The pulmonary artery systolic pressure could not be determined. There is an anterior space which most likely represents a prominent fat pad.  IMPRESSION: Suboptimal image quality. Mild symmetric left ventricular hypertrophy with probably preserved left ventricular function. Cannot exclude regional wall motion abnormality due to technically limited windows.   Head CT: IMPRESSION: 1. Status post right frontal approach ventriculostomy catheter placement, with stable appearance of the ventricle size. 2. Extensive subarachnoid hemorrhage and intraventricular hemorrhage, stable.   Head CT: IMPRESSION: 1. Status post coiling of the left vertebral artery aneurysm, stable appearance of the extensive subarachnoid and intraventricular hemorrhage. 2. Status post right frontal ventriculostomy, with interval decompression of the right frontal  greater than the left. No significant interval change in size of the ventricles. 3.A right frontal approach ventriculostomy catheter appears to terminate in the right frontal lobe medial to the anterior clinoid process on the right.   Head CT: 1. Interval repositioning of a right frontal ventriculostomy tube, which appears to terminate within the anterior  of the right lateral ventricle.  There is a new 13 x 12 mm hyperdense focus adjacent to this, just right of midline, which appears to be a focus of hemorrhage. 2. Interval decrease in effacement of the right lateral ventricle. 3. The remainder of the examination is stable   CTA HEAD W&W/O C & RECONS Study Date of  IMPRESSION: 1. Extensive subarachnoid hemorrhage is unchanged in extent, though slightly less dense. Small subdural hemorrhage is unchanged. 2. Slightly decreased amount of intraventricular blood. Decreased size of lateral and third ventricles since . Persistent blood along the right frontal ventriculostomy catheter. 3. Unchanged diffuse cerebellar edema with sparing of a small portion of the right inferior cerebellar hemisphere. Unchanged compression of the fourth ventricles and herniation of the cerebellar tonsils. Concurrent head MRI demonstrates extensive cerebellar infarction. 4. Severe vasospasm in the posterior circulation. Mild vasospasm in the anterior circulation. 5. Questionable mild increase in mean transit time in the superior right middle cerebral artery territory, without a matched decrease in cerebral blood volume. In the setting of vasospasm, this could indicate mild ischemia.  Concurrent head MRI demonstrates no infarction in this territory.   CT PERFUSION : There is a questionable minimal increase in mean transit time in the superior right middle cerebral artery territory, without a matched decrease in cerebral blood volume.   HEAD MRI WITHOUT CONTRAST  IMPRESSION: 1. Evolving acute to early subacute infarction involving almost the entire cerebellum, with sparing of a small portion of the right inferior cerebellar hemisphere. Associated herniation of the cerebellar tonsils and compression of the fourth ventricle, similar to prior studies. 2. Unchanged extensive subarachnoid hemorrhage. Unchanged small subdural hemorrhage. Decreased intraventricular blood. 3. Decreased size of the lateral and third ventricles compared to .  Head CT : IMPRESSION: 1. Interval decrease in size of lateral ventricles, with stable effacement of the basal cisterns. The foramen magnum is poorly evaluated due to metallic streak artifact. 2. Stable appearance of external ventricular drain, as well as a focus of high density adjacent to it consistent with hemorrhage which is stable in size. 3. Hypodensity of the posterior fossa consistent with evolution of infarction. 4. Opacification of the ethmoid, sphenoid and maxillary sinuses consistent with intubated state.  Head CT : IMPRESSION: 1. Slightly decreased mass effect on the fourth ventricle status post occipital craniectomy. 2. Evolving cerebellar hemispheric infarcts. 3. Interval decrease of diffuse subarachnoid and intraventricular hemorrhage. 4. Unchanged effacement of the basal cisterns. 5. No new acute intracranial hemorrhage or new acute infarction.   LENIS BLE : IMPRESSION: Deep vein thrombosis seen within both of the right posterior tibial veins and partial thrombus seen within the right common femoral vein and superficial femoral vein. No DVT seen in the left leg.  CTA Head : IMPRESSIONS: 1. Severe diffuse vasospasm in the posterior circulation and mild diffuse vasospasm in the anterior circulation are overall little changed compared to , except to note slight worsening in the now marked narrowing of the P1 segment of the right posterior cerebral artery. The appearance on both the  and current study is new since the admission study of . 2. Progressive confluent low-attenuation throughout both cerebellar hemispheres; while this may represent widespread infarction, as suggested previously, the findings on the MR study of , including the virtual sparing of the pons, are somewhat atypical, and a component of diffuse vasogenic edema is a consideration.  EEG  - : Diffuse encephalopathy, no seizure activity.  : MRI C-spine:IMPRESSION: 1. No gross cord signal abnormality. If there is continued clinical concern for cord infarction, axial T2 and diffusion sequences can be performed which significantly increases sensitivity for subtle cord signal abnormality. 2. Areas of altered signal intensity in the CSF space likely relates to blood products from the known subarachnoid hemorrhage. Incompletely imaged postoperative changes of the suboccipital craniectomy, better evaluated on the brain MRI performed the same day.  : CT Head: IMPRESSION: 1. Expected evolution of intracranial hemorrhage without evidence of new hemorrhage. 2. Increased size of the fourth ventricle, likely related to decreased cerebellar swelling. 3. Decreased size of the lateral and third ventricles, which are now slit-like. New subdural fluid collections along the cerebral convexities and cerebellar hemispheres. These findings suggest the possibility of overshunting.  : MRI Head: IMPRESSIONS: 1. No evidence of acute infarction. 2. Decreased cerebellar swelling. Some of the persistent edema could be vasogenic due to vasospasm, and some of it could be due to subacute infarction. Recommend continued follow-up. 3. Increased size of the fourth ventricle due to decreased cerebellar swelling. New subdural fluid collections along cerebral convexities and cerebellar hemispheres, which suggest the possibility of overshunting, given the decreased size of lateral and third ventricles. 4. Decreased severity of upward transtentorial herniation as well as downward tonsillar herniation compared to MRI of .   EEG : IMPRESSION: This EEG gives evidence principally for a moderately severe to severe diffuse encephalopathy. There are no clear epileptic features to this. Additionally, the cardiac rhythm is significantly abnormal.    Portable AP chest radiograph was reviewed in comparison to .  The tracheostomy tip is at the midline being 6 cm above the carina. The right central venous line tip is at the level of superior SVC. Cardiomediastinal silhouette is stable. Multifocal opacities mostly located in the lower lobes are grossly unchanged. No interval increase in pleural effusion has been demonstrated.  IMPRESSION: Overall no significant change since the prior study including multifocal mostly basal opacity.   CHEST HISTORY: PICC line placement. One view. Comparison with . Multifocal opacities persist. There appears to be slight interval worsening at the lung bases. The tracheostomy tube and right-sided central venous line remain in place. A PICC line has been inserted on the left and terminates in the mid superior vena cava. There is no other definite change.  IMPRESSION: Multifocal opacities with evidence for slight interval worsening at the bases. PICC line as described.  Head CT : IMPRESSION: 1. Resolution of cerebellar swelling and herniation through the craniectomy defect. While small infarcts may be present in the inferior cerebellar hemispheres, the remainder of the cerebellum appears to have resumed normal architecture. 2. The lateral and third ventricles remain small s/p ventriculostomy removal. The bilateral posterior fossa subdural fluid collections remain unchanged.  The right convexity subdural fluid collection is smaller, and the left convexity subdural fluid collection is larger, with new minimal rightward shift of midline structures.    Head CT : Final read is pending at this time.  It was reviewed by Dr.  and is stable.   Brief Hospital Course: The CT scan was evaluated in the ED and it was determined that EVD placement was necessary. This was performed in the ED without complication and was set to 20cm H20 and open. The pt was then admitted to the Neurosurgical service for Q1hr Neurochecks in the ICU. He was started on nimodipine and keppra for prophylactic purposes.  Cerebral angiogram was requested and obtained on  and coils were placed and the left vertebral artery was sacrificed. Post-angio a Heparin drip was initiated and discontinued the next morning.  On  the EVD was pulled back.  eve he had new Afib and required a Dilt drip to be initiated. His ICPs were also high and required two doses of Mannitol.   AM a head CT was done to reassess the shunt which showed the ventricles to still be large but stable bleed. His drain was dropped to 10cm from 15cm.  Over the weekend of  his ventriculostomy drain was flushed as there was air in the line and likely inaccurate readings.  Following the flushing, a good waveform was obtained, and his true ICP readings were .  Mannitol was discontinued. Overnight on  the patient had  isolated episodes of asystole lasting 5-10 seconds.  he spontaneously converted to NSR, and difib pads were placed on the patient.  His code status remained full code.  He was noted to have a fever >101.3 on , and CSF/Blood/Urine cultures were obtained.  A CTA head was done which showed vasospasm in the posterior circulation and mild vasospasm in the anterior circulation.  On  he was taken to the OR for a posterior fossa craniectomy/duratomy as his ICPs remained elevated and there was concern for increased edema due to a cerebellar infarct. He remained stable on . Stroke Neurology was consulted.  On  a CTA was done which showed no change from the  CTA but there was increased vasospasm to the R P1. He was then noted to have rigors. He was brought to angiogram and verapamil was injected. On  he also underwent a trach/peg at bedside and an IVC filter in IR. Decadron was started per Stroke Neurology recommendations.  On  he was again febrile and cultures were sent. Decadron was discontinued.  On  he remained stable and his EVD was elevated to 15cm from 10cm. His blood pressure parameters were liberalized.  On  his neurochecks where liberalized to q4hours and his EVD was increased to 20cm from 15cm.  He had no ICP issues.  Later in the day he underwent a brochoscopy which showed thick secretions and he continued to be febrile.  CSF was sent overnight as well.  On  in the early morning, he was experiencing 4 second pauses which occured approximately 10 times per the SICU staff.  His diltizaem drip was discontinued at that time. On morning rounds his EVD was raised to 25cm from 20cm however he had increased ICP's and the drainw as again lowered to 20cm.  He continued to have pauses and episodes of bradycardia so the SICU consulted Electrophysiology to evaluate him.   patient was evaluated by Cardiology who recommended that patient's heart rate be moderately controlled. They believed that overly agressive management of his heart rate in Afib had lead to frequent pauses and asystolic events. He has at this time tolerated his EVD at 25cm of H2O for 24 hours, but clinically it apeared that he was not tolerating the pressure and started leaking CSF from his posterior fossa incision, therefore the EVD was dropped to 15cmH20.  A CT of his Brain showed expected evolution of intracranial hemorrhage without evidence of new hemorrhage. Overnight EEG revealed seizure activity lasting 1-2 minutes therefore Keppra was increased to 1500mg .  On  the patient remained neurologically unchanged. The wound remained dry with the EVD at 15cmH20. On  &  the patient again remained stable without evidence of leakage from the incisional site, therefore it was determined that he did not need to go to the operating room at this time.  His EVD remained clamp for 48 hours and was finally removed on .  His SOC site remained dry and his sutures were removed on .  A Ct scan was performed on  to evaluate for increase in external hydrocephalus and this scan was stable.  We would like a repeat scan in one week.  Medications on Admission: Sotalol HCTZ   Discharge Medications: 1. Heparin Flush (10 units/ml) 2 mL IV PRN line flush PICC, heparin dependent: Flush with 10mL Normal Saline followed by Heparin as above daily and PRN per lumen. 2. HydrALAzine 5 mg IV Q6H:PRN SBP > 200 3. Dextrose 50% 12.5 gm IV PRN hypoglycemia protocol 4. Sodium Chloride 0.9%  Flush 10 mL IV PRN line flush Temporary Central Access-ICU: Flush with 10mL Normal Saline daily and PRN. 5. Fentanyl Citrate 25-100 mcg IV Q2H:PRN pain 6. heparin (porcine) 5,000 unit/mL Solution Sig: One (1) Injection TID (3 times a day). 7. insulin regular human 100 unit/mL Solution Sig: One (1) Injection every six (6) hours. 8. miconazole nitrate 2 % Powder Sig: One (1) Appl Topical TID (3 times a day) as needed for rash. 9. diltiazem HCl 30 mg Tablet Sig: One (1) Tablet PO QID (4 times a day). 10. famotidine 20 mg Tablet Sig: One (1) Tablet PO BID (2 times a day). 11. bisacodyl 10 mg Suppository Sig: One (1) Suppository Rectal DAILY (Daily). 12. white petrolatum-mineral oil 56.8-42.5 % Ointment Sig: One (1) Appl Ophthalmic PRN (as needed) as needed for dry eyes. 13. ibuprofen 100 mg/5 mL Suspension Sig: Twenty (20) ml PO Q6H (every 6 hours) as needed for fever. 14. acetaminophen 650 mg/20.3 mL Solution Sig:   PO Q6H (every 6 hours) as needed for pain. 15. glucagon (human recombinant) 1 mg Recon Soln Sig: One (1) Recon Soln Injection Q15MIN () as needed for hypoglycemia protocol. 16. docusate sodium 50 mg/5 mL Liquid Sig: Ten (10) ml PO BID (2 times a day). 17. senna 8.6 mg Tablet Sig: One (1) Tablet PO BID (2 times a day) as needed for constipation. 18. aspirin 325 mg Tablet Sig: One (1) Tablet PO DAILY (Daily).    Discharge Disposition: Extended Care  Facility:  Northeast -   Discharge Diagnosis: SAH ATRIAL FIBRILATION OBSTRUCTIVE HYDROCEPHALUS QUADRAPLEGIA RFESPIRATORY FAILURE DYSPHAGIA FEVER ANEMIA REQUIRING TRANSFUSION + DEEP VENOUS THROMBOSIS IN RLE SEIZURE   Discharge Condition: Mental Status: Confused - always. Level of Consciousness: Lethargic but arousable. Activity Status: Bedbound.   Discharge Instructions: Angiogram with Embolization and/or Stent placement Medications: ??????	Take Aspirin 325mg (enteric coated) once daily. ??????	Continue all other medications you were taking before surgery, unless otherwise directed ??????	You make take Tylenol or prescribed pain medications for any post procedure pain or discomfort.   Followup Instructions: PLEASE CALL THE OFFICE OF DR   /NEUROSURGEON FOR AN APPT TO BE SEEN IN ONE MONTH WITH A CAT SCAN OF THE BRAIN.  THE OFFICE PHONE NUMBER IS     Completed by:"""

)

# Predict ICD-9 codes for all summaries
predicted_codes = predict_icd9(discharge_summaries, tokenizer, model)

# Print results for all summaries
for i, (summary, codes) in enumerate(zip(discharge_summaries, predicted_codes)):
    print(f"Discharge Summary {i + 1}:\n{summary}\nPredicted ICD-9 Codes: {codes}\n")


Discharge Summary 1:

    date of birth:               sex:   m service neurology allergies nkda chief complaint multifocal pontine and cerebrellar infarcts major surgical or invasive procedure pt arrived intubated trach peg multiple bals history of present illness per admitting resident information for the note was taken from the medical records from as well as the patient s daughter hpi year old male with significant medical problems including afib noncompliant on coumadin cad s p stent placement x6 chf with diastolic dysfunction htn morbid obesity alcoholism and dm was transferred from for possible intervention following cva briefly he presented to ed by ambulance on the morning of complaining of days of general malaise and abdominal pain and one bout of emesis he was prompted to call the ambulance after an episode of near syncope during micturition as well as tinnitus he was noted to be hypotensive upon arrival to the ed which resolved following fluid boluses work up in the ed with