In [1]:
%cd ..

/Users/floydluo/Library/CloudStorage/GoogleDrive-jjluo@terpmail.umd.edu/My Drive/0-Research-Project/MedStar/MS_CODE/FieldNN


In [2]:
import os
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import transforms
import lightning.pytorch as pl

ModuleNotFoundError: No module named 'lightning'

## Define a dataset and a dataloader

In [3]:
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
from recfldgrn.datapoint import convert_PID_to_PIDgroup
from recfldgrn.datapoint import RANGE_SIZE, write_df_to_folders, load_df_data_from_folder
from fieldnn.utils.layerfn import traverse, convert_relational_list_to_numpy

class PatientDataset(Dataset):
    def __init__(self, TaskTensor_folder, recfldgrn_list):
        self.recfldgrn_list = recfldgrn_list
        self.TaskTensor_folder = TaskTensor_folder
        
        data = pd.DataFrame(columns = ['PID'])
    
        for recfldgrn in recfldgrn_list:
    
            # (1) get tensor_folder
            tensor_folder = os.path.join(TaskTensor_folder, recfldgrn)

            # (2) get df_Pat and full_recfldgrn
            df_Pat = load_df_data_from_folder(tensor_folder)# .set_index('PID')
            # print(df_Pat.col
            data  = pd.merge(data, df_Pat, on = 'PID', how = 'right')

        self.data = data
            

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        x = self.data.iloc[index]# [full_recfldgrn]
        
        y = np.random.choice([0,1]) # go back to label later.
        return x, y # torch.tensor(x), torch.tensor(y)

In [4]:
def my_collate_fn(batch_input):
    batch_rfg = {}
    ##############
    # inputs: you can check the following inputs in the above cells.
    # (1): relational_list
    # (2): new_full_recfldgrn
    # (3): suffix
    ##############
    df_batch = pd.DataFrame([i[0].to_dict() for i in batch_input])
    # df_batch
    recfldgrn_list = [i for i in df_batch.columns if 'PID' not in i]
    for full_recfldgrn in recfldgrn_list:
        suffix = '_' + full_recfldgrn.split('_')[-1]
        relational_list = df_batch[full_recfldgrn].to_list()

        new_full_recfldgrn = 'B-' + full_recfldgrn # B- means Batch. 
        # output: this function will return a Dictionary to hold outcome
        D = convert_relational_list_to_numpy(relational_list, new_full_recfldgrn, suffix)
        # have a look at B-P-EC-A1C@DT-DTDftGrn_idx, the final tensor
        tensor_idx = D[new_full_recfldgrn]
        # print(new_full_recfldgrn, '<--- new_full_recfldgrn')
        # print(tensor_idx.shape, '<------- the shape of tensor_idx')
        
        batch_rfg[new_full_recfldgrn] = torch.Tensor(tensor_idx)
    ##############
    
    batch_y = torch.LongTensor([i[1] for i in batch_input])  # ignore this
    return batch_rfg, batch_y

In [5]:
from torch.utils.data import DataLoader

TaskTensor_folder = 'data/ProcData/TensorFolder/Task2YearXXX'
recfldgrn_list = [
                  'P@age-AgeNumeDftGrn',
                  'P@basicInfo-basicInfoDftGrn',
    
                  'EC@BasicInfo-BasicDftGrn',
                  'EC@DT_min-DTDftGrn',
    
                  'A1C@DT-DTDftGrn',
                  'A1C@V-A1CNumeDftGrn',
                  
                  'Diag@DT-DTDftGrn',
                  'Diag@Value-DiagDftGrn',
                  
                  'PNSectSent@Sentence-Tk@TknzLLMGrn']


train_dataset = PatientDataset(TaskTensor_folder, recfldgrn_list)
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=my_collate_fn)

# add valid_dataset and valid_dataloader
# add test_dataset and test_dataloader

## Embed and Repr Block Imports

In [6]:
from fieldnn.dataflowfn.embedflowfn import get_EmbeddingBlock_SubUnit
from fieldnn.dataflowfn.baseflowfn import mapping_SubUnitName_to_SubUnitNNList
from fieldnn.dataflowfn.baseflowfn import get_SubUnit_Default_NNPara_List
from fieldnn.dataflowfn.baseflowfn import get_SubUnit_BasicNN_Config_List
from fieldnn.module.embedblock import EmbedBlockLayer


from fieldnn.dataflowfn.reprflowfn import get_Repr_dataflow_table
from fieldnn.dataflowfn.reprflowfn import update_df_Repr_dataflow
from fieldnn.dataflowfn.reprflowfn import update_df_Repr_dataflow_completename
from fieldnn.dataflowfn.reprflowfn import get_Repr_SubUnit_List

from fieldnn.dataflowfn.baseflowfn import mapping_SubUnitName_to_SubUnitNNList
from fieldnn.dataflowfn.baseflowfn import generate_BasicNN_Config
from fieldnn.dataflowfn.baseflowfn import get_SubUnit_Default_NNPara_List
from fieldnn.dataflowfn.baseflowfn import get_SubUnit_BasicNN_Config_List

from fieldnn.module.reprblock import ReprBlockLayer

## Define parameters

In [7]:
output_name = 'B-P'
embed_size = 128
output_size = 1
actn_fn_name = 'Sigmoid' # torch.nn.Sigmoid()
loss_fn_name = 'BCELoss' # torch.nn.BCELoss()
fldgrn_folder = 'data/ProcData/FldGrnInfo'

## Define Models

In [8]:
OutputBlock = torch.nn.Linear(embed_size, output_size)

if actn_fn_name == 'Sigmoid':
    actn_method = torch.nn.Sigmoid()
    actn_fn = lambda outputvecs: actn_method(outputvecs) # will return probs 
elif self.actn_fn_name == 'Softmax':
    actn_method = torch.nn.Softmax()
    actn_fn = lambda outputvecs: actn_method(outputvecs, dim = 1) # will return probs 
else:
    raise ValueError(f'Activation Function Name {actn_fn_name} is not available yet')
    

if loss_fn_name == 'BCELoss':
    assert actn_fn_name == 'Sigmoid'
    loss_method = torch.nn.BCELoss()
    loss_fn = lambda probs, targets: loss_method(probs, targets) # will return loss
elif self.loss_fn_name == 'CrossEntropyLoss':
    assert actn_fn_name == 'Softmax'
    loss_method = torch.nn.CrossEntropyLoss()
    loss_fn = lambda probs, targets: loss_method(probs, targets) # will return loss
else:
    raise ValueError(f'Loss Function Name {loss_fn_name} is not available yet')

## Define a LightningModule

- The **training_step** defines how the nn.Modules interact together.

- In the **configure_optimizers** define the optimizer(s) for your models.

In [64]:
# define the PatientLevelPredictionModel
class LitAutoModel(pl.LightningModule):
    def __init__(self, fldgrn_folder,
                 OutputBlock, actn_fn, loss_fn, 
                 output_name, embed_size, output_size):
        super().__init__()
        
        self.output_name = output_name
        
        self.OutputBlock = OutputBlock
        self.actn_fn = actn_fn
        self.loss_fn = loss_fn
        
        self.embed_size = embed_size
        self.output_size = output_size
            
        self.default_BasicNNtype_To_NNName = {
            'expander': None,
            'reducer': 'Max',
            'merger': 'Merger',
            'learner': None,
        }
        self.process = {'activator': 'gelu',
           'dropout': {'p': 0.5, 'inplace': False},
           'layernorm': {'eps': 1e-05, 'elementwise_affine': True}}
        
        self.default_E_subunit_name = 'E'
        self.fldgrn_folder = fldgrn_folder
        self.learner_default_dict = {}
        self.default_R_subunit_name = 'R'
        self.default_MR_subunit_name = 'MR'

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        # it is independent of forward
        
        batch_rfg, y = batch
        REPR_TENSOR = self.get_REPR_TENSOR(batch_rfg)
        info_dict = REPR_TENSOR[self.output_name]
        featvecs = info_dict['info']
        
        outputvecs = self.OutputBlock(featvecs)
        probs = self.actn_fn(outputvecs)
        
        probs = probs.squeeze(1)
        probs = probs.to(torch.float64)
        y = y.to(torch.float64)
        
        loss = self.loss_fn(probs, y)
        
        print("loss: ", loss.detach().numpy())
        self.log("train_loss", loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        # this is the validation loop
        batch_rfg, y = batch
        
        REPR_TENSOR = self.get_REPR_TENSOR(batch_rfg)
        info_dict = REPR_TENSOR[self.output_name]
        featvecs = info_dict['info']
        
        outputvecs = self.OutputBlock(featvecs)
        probs = self.actn_fn(outputvecs)
        
        probs = probs.squeeze(1)
        probs = probs.to(torch.float64)
        y = y.to(torch.float64)
        
        val_loss = self.loss_fn(probs, y)
        self.log("val_loss", val_loss)
    
    def test_step(self, batch, batch_idx):
        # this is the test loop
        batch_rfg, y = batch
        
        REPR_TENSOR = self.get_REPR_TENSOR(batch_rfg)
        info_dict = REPR_TENSOR[self.output_name]
        featvecs = info_dict['info']
        
        outputvecs = self.OutputBlock(featvecs)
        probs = self.actn_fn(outputvecs)
        
        probs = probs.squeeze(1)
        probs = probs.to(torch.float64)
        y = y.to(torch.float64)
        
        
        test_loss = self.loss_fn(probs, y)
        self.log("test_loss", test_loss)

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=1e-3)
        return optimizer
    
    def get_REPR_TENSOR(self, batch_rfg):
        # get the full_recfldgrn_list
        full_recfldgrn_list = [i for i in batch_rfg]

        # prepare RECFLD_TO_TENSOR
        RECFLD_TO_TENSOR = {}
        for full_recfldgrn in full_recfldgrn_list:
            # (1) get the info_raw from batch_rfg
            info_raw = batch_rfg[full_recfldgrn]

            # (2) get the holder (input_idx) and holder_wgt (for nume embedding only)
            if '_idx' in full_recfldgrn:
                holder_wgt = 'Empty'
                # holder = torch.LongTensor(info_raw)
                holder = torch.tensor(info_raw).to(torch.int64)
            elif '_wgt' in full_recfldgrn:
                # holder_wgt = torch.FloatTensor(info_raw)
                holder_wgt = torch.FloatTensor(info_raw)
                # ATTENTION: here holder_wgt could contain zeros in some valid positions.
                holder = torch.ones_like(holder_wgt).cumsum(-1).masked_fill(holder_wgt == 0, 0).long()
            else:
                raise ValueError(f'Invalid full_recfldgrn "{full_recfldgrn}"')

            info_dict = {'holder': holder, 'holder_wgt': holder_wgt}
            RECFLD_TO_TENSOR[full_recfldgrn] = info_dict
    
        
        # get df_Embed_SubUnit 
        df_Embed_SubUnit = self.get_df_Embed_SubUnit(full_recfldgrn_list)
        
        # define EmbedBlock
        self.EmbedBlock = EmbedBlockLayer(df_Embed_SubUnit)
        
        # get RECLD_TO_EMBEDTENSOR from RECFLD_TO_TENSOR and EmbedBlock
        RECFLD_TO_EMBEDTESNOR = self.EmbedBlock(RECFLD_TO_TENSOR)
        
        # update the full_recfldgrn_list
        full_recfldgrn_list = [i for i in RECFLD_TO_EMBEDTESNOR]

        # get df_Repr_SubUnit 
        df_Repr_SubUnit = self.get_df_Repr_SubUnit(full_recfldgrn_list)
        
        # define ReprBlock
        self.ReprBlock = ReprBlockLayer(df_Repr_SubUnit)
        
        # update the names of full_recfldgrn_list
        fld_updates_dict = {}
        for i in RECFLD_TO_EMBEDTESNOR:
            layernum = len(i.split('-'))
            fld = i.split('-')[-1]
            if '@' not in fld: continue
            
            # print(fld)
            neat_i = '-'.join(i.split('-')[:-1]) + '-' + fld.split('@')[0]
            # print(neat_i)
            same_neat_list = [t for t in RECFLD_TO_EMBEDTESNOR if neat_i + '@' in t]
            # print(same_neat_list)
            if len(same_neat_list) == 1: fld_updates_dict[i] = neat_i

        for old, new in fld_updates_dict.items():
            RECFLD_TO_EMBEDTESNOR[new] = RECFLD_TO_EMBEDTESNOR.pop(old)
            
        # get the OUTPUT_TO_TENSOR data holder
        REPR_TENSOR = self.ReprBlock(RECFLD_TO_EMBEDTESNOR)
        return REPR_TENSOR
    
    def get_df_Embed_SubUnit(self, full_recfldgrn_list):
        df_Embed_SubUnit = get_EmbeddingBlock_SubUnit(full_recfldgrn_list, self.default_E_subunit_name)
        s = df_Embed_SubUnit.apply(lambda x: mapping_SubUnitName_to_SubUnitNNList(x['SubUnitName'], 
                                                                            x['input_names'],
                                                                            self.default_BasicNNtype_To_NNName), 
                                                                            axis = 1)
        df_Embed_SubUnit['SubUnit_BasicNN_List'] = s
        s = df_Embed_SubUnit.apply(lambda x: get_SubUnit_Default_NNPara_List(x['SubUnit_BasicNN_List'], 
                                                                       x['input_names'],
                                                                       self.fldgrn_folder, 
                                                                       self.learner_default_dict), axis = 1)

        df_Embed_SubUnit['SubUnit_DefaultBasicNN_List'] = s
        s = df_Embed_SubUnit.apply(lambda x: get_SubUnit_BasicNN_Config_List(x['SubUnit_BasicNN_List'], 
                                                                       x['SubUnit_DefaultBasicNN_List'], 
                                                                       x['input_names'], 
                                                                       x['output_name'], 
                                                                        self.embed_size, 
                                                                        self.process, 
                                                                       ), axis = 1)
        df_Embed_SubUnit['SubUnit_BasicNN_Config_List'] = s
        
        return df_Embed_SubUnit
    
    def get_df_Repr_SubUnit(self, full_recfldgrn_list):
        
        df_dataflow = get_Repr_dataflow_table(full_recfldgrn_list)
        
        df_dataflow_new = update_df_Repr_dataflow(df_dataflow, style = 'Reducer&Merger')
        df_dataflow = df_dataflow_new.copy()
        
        df_Repr_SubUnit = get_Repr_SubUnit_List(df_dataflow, self.default_R_subunit_name, self.default_MR_subunit_name)
        
        s = df_Repr_SubUnit.apply(lambda x: mapping_SubUnitName_to_SubUnitNNList(x['SubUnitName'], 
                                                                    x['input_names'],
                                                                    self.default_BasicNNtype_To_NNName), 
                                                                    axis = 1)

        df_Repr_SubUnit['SubUnit_BasicNN_List'] = s

        s = df_Repr_SubUnit.apply(lambda x: get_SubUnit_Default_NNPara_List(x['SubUnit_BasicNN_List'], 
                                                               x['input_names'],
                                                               self.fldgrn_folder, 
                                                               self.learner_default_dict), axis = 1)

        df_Repr_SubUnit['SubUnit_DefaultBasicNN_List'] = s
        
        s = df_Repr_SubUnit.apply(lambda x: get_SubUnit_BasicNN_Config_List(x['SubUnit_BasicNN_List'], 
                                                               x['SubUnit_DefaultBasicNN_List'], 
                                                               x['input_names'], 
                                                               x['output_name'], 
                                                                self.embed_size, 
                                                                self.process, 
                                                               ), axis = 1)

        df_Repr_SubUnit['SubUnit_BasicNN_Config_List'] = s
        
        return df_Repr_SubUnit

In [59]:
# init the LitAutoModel
PatientLevelPredictionModel = LitAutoModel(fldgrn_folder, OutputBlock, actn_fn, loss_fn,
                                    output_name, embed_size, output_size)

## Train the model

In [60]:
num_epochs = 2
num_gpus = 0
checkpoint_path = 'checkpoint/'

In [61]:
trainer = pl.Trainer(max_epochs=num_epochs, default_root_dir=checkpoint_path)
trainer.fit(model=PatientLevelPredictionModel, train_dataloaders=train_dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")

  | Name        | Type   | Params
---------------------------------------
0 | OutputBlock | Linear | 129   
---------------------------------------
129       Trainable params
0         Non-trainable params
129       Total params
0.001     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Epoch 0:   0%|                                            | 0/4 [00:00<?, ?it/s]

  holder = torch.tensor(info_raw).to(torch.int64)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


loss:  tensor(0.8641, dtype=torch.float64, grad_fn=<BinaryCrossEntropyBackward0>)
Epoch 0:  25%|██████▊                    | 1/4 [00:18<00:54, 18.28s/it, v_num=0]

  holder = torch.tensor(info_raw).to(torch.int64)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


loss:  tensor(0.9713, dtype=torch.float64, grad_fn=<BinaryCrossEntropyBackward0>)
Epoch 0:  50%|█████████████▌             | 2/4 [01:18<01:18, 39.09s/it, v_num=0]

  holder = torch.tensor(info_raw).to(torch.int64)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


loss:  tensor(0.8551, dtype=torch.float64, grad_fn=<BinaryCrossEntropyBackward0>)
Epoch 0:  75%|████████████████████▎      | 3/4 [01:46<00:35, 35.64s/it, v_num=0]

  holder = torch.tensor(info_raw).to(torch.int64)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


loss:  tensor(0.6858, dtype=torch.float64, grad_fn=<BinaryCrossEntropyBackward0>)
Epoch 1:   0%|                                   | 0/4 [00:00<?, ?it/s, v_num=0]

  holder = torch.tensor(info_raw).to(torch.int64)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


loss:  tensor(0.8227, dtype=torch.float64, grad_fn=<BinaryCrossEntropyBackward0>)
Epoch 1:  25%|██████▊                    | 1/4 [00:27<01:21, 27.17s/it, v_num=0]

  holder = torch.tensor(info_raw).to(torch.int64)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


loss:  tensor(0.9038, dtype=torch.float64, grad_fn=<BinaryCrossEntropyBackward0>)
Epoch 1:  50%|█████████████▌             | 2/4 [01:23<01:23, 41.72s/it, v_num=0]

  holder = torch.tensor(info_raw).to(torch.int64)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


loss:  tensor(0.7308, dtype=torch.float64, grad_fn=<BinaryCrossEntropyBackward0>)
Epoch 1:  75%|████████████████████▎      | 3/4 [02:08<00:42, 42.84s/it, v_num=0]

  holder = torch.tensor(info_raw).to(torch.int64)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


loss:  tensor(0.6616, dtype=torch.float64, grad_fn=<BinaryCrossEntropyBackward0>)
Epoch 1: 100%|███████████████████████████| 4/4 [02:47<00:00, 41.89s/it, v_num=0]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|███████████████████████████| 4/4 [02:48<00:00, 42.10s/it, v_num=0]


In [None]:
# with validation set
# Train the model
trainer.fit(model=PatientLevelPredictionModel, train_loader, val_loader)

## Test the model

In [65]:
# test the model
# trainer.test(model, dataloaders=DataLoader(test_set))

trainer.test(model=PatientLevelPredictionModel, dataloaders=train_dataloader)

  rank_zero_warn(
  rank_zero_warn(


Testing DataLoader 0:   0%|                               | 0/4 [00:00<?, ?it/s]

  holder = torch.tensor(info_raw).to(torch.int64)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Testing DataLoader 0:  25%|█████▊                 | 1/4 [00:17<00:53, 17.83s/it]

  holder = torch.tensor(info_raw).to(torch.int64)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Testing DataLoader 0:  50%|███████████▌           | 2/4 [00:23<00:23, 11.53s/it]

  holder = torch.tensor(info_raw).to(torch.int64)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Testing DataLoader 0:  75%|█████████████████▎     | 3/4 [00:36<00:12, 12.19s/it]

  holder = torch.tensor(info_raw).to(torch.int64)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Testing DataLoader 0: 100%|███████████████████████| 4/4 [00:48<00:00, 12.06s/it]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss           0.6864618353303682
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.6864618353303682}]