In [1]:
import os
os.chdir('../')

from promptehr import PromptEHR

# load pytrial demodata, supported by PyTrial package to load the demo EHR data
from pytrial.data.demo_data import load_synthetic_ehr_sequence
from pytrial.tasks.trial_simulation.data import SequencePatient

# see the input format
demo = load_synthetic_ehr_sequence(n_sample=100)

# build sequence dataset
seqdata = SequencePatient(data={'v':demo['visit'], 'y':demo['y'], 'x':demo['feature'],},
    metadata={
        'visit':{'mode':'dense'},
        'label':{'mode':'tensor'}, 
        'voc':demo['voc'],
        'max_visit':20,
        }
    )

print('visit', demo['visit'][0]) # a list of visit events
print('mortality', demo['y'][0]) # array of labels
print('feature', demo['feature'][0]) # array of patient baseline features
print('voc', demo['voc']) # dict of dicts containing the mapping from index to the original event names
print('order', demo['order']) # a list of three types of code
print('n_num_feature', demo['n_num_feature']) # int: a number of patient's numerical features
print('cat_cardinalities', demo['cat_cardinalities']) # list: a list of cardinalities of patient's categorical features

visit [[[0, 1, 2, 3, 5, 7, 41, 313, 1], [0, 1, 82], [2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 51, 19, 26]], [[0, 1, 10, 69], [1, 4], [0, 2, 3, 6, 7, 41, 12, 13, 14, 16, 52, 54, 22, 28]]]
mortality False
feature [-1.02022052  0.          0.        ]
voc {'diag': <promptehr.data.Voc object at 0x7fb1aa085670>, 'prod': <promptehr.data.Voc object at 0x7fb1aa0857f0>, 'med': <promptehr.data.Voc object at 0x7fb1aa085b80>}
order ['diag', 'prod', 'med']
n_num_feature 1
cat_cardinalities [2, 37]


In [2]:
# fit the model
model = PromptEHR(
    code_type=demo['order'],
    n_num_feature=demo['n_num_feature'],
    cat_cardinalities=demo['cat_cardinalities'],
    num_worker=0,
    eval_step=1,
    epoch=1,
    device=[3],
)
model.fit(
    train_data=seqdata,
    val_data=seqdata,
)

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'BartTokenizer'. 
The class this function is called from is 'DataTokenizer'.


Step,Training Loss,Validation Loss,Ppl Diag,Ppl Prod,Ppl Med
1,7.5612,No log,1914.010986,788.348633,117.875397
2,4.7692,No log,1717.88501,801.725342,89.296257
3,7.3762,No log,1548.198364,830.639343,85.684441
4,7.3,No log,1490.872803,813.329712,85.889137
5,4.5591,No log,1450.355347,818.956726,81.854637


evaluation for code diag.
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
evaluation for code prod.
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
evaluation for code med.
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
evaluation for code diag.
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
evaluation for code prod.
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
evaluation for code med.
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
evaluation for code diag.
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
evaluation for code prod.
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
evaluation for code med.
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
evaluation for code diag.
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
evaluation for code prod.
***** Running Eva

Token indices sequence length is longer than the specified maximum sequence length for this model (522 > 512). Running this sequence through the model will result in indexing errors


OutOfMemoryError: CUDA out of memory. Tried to allocate 204.00 MiB (GPU 0; 10.75 GiB total capacity; 9.09 GiB already allocated; 124.50 MiB free; 9.89 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
# save the model
model.save_model('./simulation/promptEHR')

In [None]:
# generate fake records
res = model.predict(seqdata, n_per_sample=10, n=100, verbose=True)

In [None]:
print(res)

In [None]:
import os
os.chdir('../')

In [None]:
# if you want pretrained model downloaded
from promptehr import PromptEHR
model = PromptEHR()
model.from_pretrained()

In [None]:
print('we are done! :)')