In [1]:
from kmembert.utils import Config
from kmembert.models import HealthBERT

import torch

config = Config()
config.mode = "density"

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = HealthBERT(device, config)

[1m
Loading camembert and its tokenizer...[0m
[92mSuccessfully loaded
[0m


In [2]:
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("-d", "--data_folder", type=str, default="ehr", 
    help="data folder name")
parser.add_argument("-m", "--mode", type=str, default="regression", choices=['regression', 'density'],
    help="name of the task")
parser.add_argument("-b", "--batch_size", type=int, default=8, 
    help="dataset batch size")
parser.add_argument("-e", "--epochs", type=int, default=2, 
    help="number of epochs")
parser.add_argument("-drop", "--drop_rate", type=float, default=None, 
    help="dropout ratio. By default, None uses p=0.1")
parser.add_argument("-nr", "--nrows", type=int, default=None, 
    help="maximum number of samples for training and validation")
parser.add_argument("-k", "--print_every_k_batch", type=int, default=1, 
    help="prints training loss every k batch")
parser.add_argument("-f", "--freeze", type=bool, default=False, const=True, nargs="?",
    help="whether or not to freeze the Bert part")
parser.add_argument("-dt", "--days_threshold", type=int, default=365, 
    help="days threshold to convert into classification task")
parser.add_argument("-lr", "--learning_rate", type=float, default=1e-4, 
    help="model learning rate")
parser.add_argument("-r_lr", "--ratio_lr_embeddings", type=float, default=1, 
    help="the ratio applied to lr for embeddings layer")
parser.add_argument("-wg", "--weight_decay", type=float, default=0, 
    help="the weight decay for L2 regularization")
parser.add_argument("-v", "--voc_file", type=str, default=None, 
    help="voc file containing camembert added vocabulary")
parser.add_argument("-r", "--resume", type=str, default=None, 
    help="result folder in which the saved checkpoint will be reused")
parser.add_argument("-p", "--patience", type=int, default=4, 
    help="number of decreasing accuracy epochs to stop the training")

args = parser.parse_args("")

In [3]:
# Summary of the model
from torchsummary import summary

summary(model)

Layer (type:depth-idx)                        Param #
├─CamembertForSequenceClassification: 1-1     --
|    └─RobertaModel: 2-1                      --
|    |    └─RobertaEmbeddings: 3-1            24,976,896
|    |    └─RobertaEncoder: 3-2               85,054,464
|    └─RobertaClassificationHead: 2-2         --
|    |    └─Linear: 3-3                       590,592
|    |    └─Dropout: 3-4                      --
|    |    └─Linear: 3-5                       1,538
Total params: 110,623,490
Trainable params: 110,623,490
Non-trainable params: 0


Layer (type:depth-idx)                        Param #
├─CamembertForSequenceClassification: 1-1     --
|    └─RobertaModel: 2-1                      --
|    |    └─RobertaEmbeddings: 3-1            24,976,896
|    |    └─RobertaEncoder: 3-2               85,054,464
|    └─RobertaClassificationHead: 2-2         --
|    |    └─Linear: 3-3                       590,592
|    |    └─Dropout: 3-4                      --
|    |    └─Linear: 3-5                       1,538
Total params: 110,623,490
Trainable params: 110,623,490
Non-trainable params: 0

In [4]:
# To decompose the model: We need to get children of model.camembert
# There are 2 childrens: First is BERT and second is CLASSIFIER (to be removed)
a = list(model.camembert.children())
len(a)

2

In [5]:
a[1]

RobertaClassificationHead(
  (dense): Linear(in_features=768, out_features=768, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (out_proj): Linear(in_features=768, out_features=2, bias=True)
)

In [6]:
# Import data to train and test the model
from kmembert.training import train_and_validate 
from kmembert.utils import get_label_threshold, create_session
from kmembert.dataset import EHRDataset
import torch
from torch.utils.data import DataLoader

path_dataset, _, device, config = create_session(args)

assert not (args.freeze and args.voc_file), "Don't use freeze argument while adding vocabulary. It would not be learned"

config.label_threshold = get_label_threshold(config, path_dataset)

train_dataset, validation_dataset = EHRDataset.get_train_validation(path_dataset, config)

train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=1, shuffle=True)

[1m> DEVICE:  cpu[0m
[1m> ROOT:    c:\Users\DIPIAZZA\Documents\CLB Projet\Projet1\Test Load BERTS\KmemBERT[0m
[1m> SESSION: c:\Users\DIPIAZZA\Documents\CLB Projet\Projet1\Test Load BERTS\KmemBERT\results\ipykernel_launcher_22-04-20_14h19m33s[0m


In [7]:
for i, (*data, labels) in enumerate(train_loader):
    if config.nrows and i*config.batch_size >= config.nrows:
        break
    loss, outputs = model.step(*data, labels)
    print("Outputs : ", outputs[0])

Input dans forward HB: (tensor([[    5,   124,   118,   135,     6,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1],
        [    5,   188,   607,     6,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1],
        [    5,   160,   360,   358,   361,     6,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1],
        [    5,   239,   988, 17762,  8161,   988,   188,  5354,  4801,  6046,
            67, 18008,  4791,    26,  4331,   585, 16401,  3325,     9, 14608,
          3219, 22375, 15075,   988, 22581, 22078,  6565,     6],
        [    5,    76,  1482,    17,     6,     1,     1,     1,     1,     1,
 

In [8]:
type(model)

kmembert.models.health_bert.HealthBERT

In [9]:
import torch.nn as nn
toto = nn.Sequential(*list(model.camembert.children())[:-1])

In [10]:
type(model.camembert)

transformers.models.camembert.modeling_camembert.CamembertForSequenceClassification

In [16]:
type(toto)

torch.nn.modules.container.Sequential

In [17]:
toto

Sequential(
  (0): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(32005, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=