## CXRFE (Chest X-ray Fact Encoder)

In [10]:
from medvqa.models.huggingface_utils import _adapt_checkpoint_keys
from medvqa.models.checkpoint import load_model_state_dict, get_checkpoint_filepath
from transformers import AutoTokenizer, AutoModel
import torch

In [19]:
model_checkpoint_folder_path = '/mnt/data/pamessina/workspaces/medvqa-workspace/models/fact_embedding/20240629_084405_MIMIC-CXR(triplets+classif+entcont+nli+radgraph+autoencoder)_FactEncoder(microsoft-BiomedVLP-CXR-BERT-specialized)'
model = AutoModel.from_pretrained('microsoft/BiomedVLP-CXR-BERT-specialized', revision="6cfc310817fb7d86762d888ced1e3709c57ac578", trust_remote_code=True)
model_checkpoint_filepath = get_checkpoint_filepath(model_checkpoint_folder_path)
checkpoint = torch.load(model_checkpoint_filepath)
load_model_state_dict(model, _adapt_checkpoint_keys(checkpoint['model']), strict=False)

checkpoint_names = ['checkpoint_196_cacc+chf1+chf1+cscc+encc+hscc+nlcc+sass+spss+ta0)+ta1)+ta2)+ta0)+ta1)+ta2)+ta3)+ta4)+ta5)+ta6)+ta7)=0.9189.pt']
[93mExamples of keys in loaded state dict but not in model:[0m
[93m  fact_decoder.start_idx[0m
[93m  spert_rel_classifier.bias[0m
[93m  fact_decoder.decoder.layers.0.linear2.weight[0m
[93m  fact_decoder.decoder.layers.0.linear2.bias[0m
[93m  fact_decoder.W_vocab.bias[0m
[93m  chest_imagenome_anatloc_classifier.bias[0m
[93m  fact_decoder.pos_encoder.pe[0m
[93m  fact_decoder.decoder.layers.0.multihead_attn.in_proj_weight[0m
[93m  comparison_status_classifier.bias[0m
[93m  category_classifier.weight[0m


In [20]:
model.config

CXRBertConfig {
  "_name_or_path": "microsoft/BiomedVLP-CXR-BERT-specialized",
  "architectures": [
    "CXRBertModel"
  ],
  "attention_probs_dropout_prob": 0.25,
  "auto_map": {
    "AutoConfig": "microsoft/BiomedVLP-CXR-BERT-specialized--configuration_cxrbert.CXRBertConfig",
    "AutoModel": "microsoft/BiomedVLP-CXR-BERT-specialized--modeling_cxrbert.CXRBertModel"
  },
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.25,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "cxr-bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "projection_size": 128,
  "torch_dtype": "float32",
  "transformers_version": "4.41.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

In [22]:
model.save_pretrained("/home/pamessina/huggingface_models/CXRFE_debug/", revision="refs/pr/5")

In [13]:
tokenizer = AutoTokenizer.from_pretrained('microsoft/BiomedVLP-CXR-BERT-specialized', revision="refs/pr/5", trust_remote_code=True)
tokenizer.save_pretrained("/home/pamessina/huggingface_models/CXRFE_test/")

('/home/pamessina/huggingface_models/CXRFE/tokenizer_config.json',
 '/home/pamessina/huggingface_models/CXRFE/special_tokens_map.json',
 '/home/pamessina/huggingface_models/CXRFE/vocab.txt',
 '/home/pamessina/huggingface_models/CXRFE/added_tokens.json')

## T5 Fact Extractor

In [5]:
from transformers import T5ForConditionalGeneration
from transformers import T5TokenizerFast

In [6]:
model = T5ForConditionalGeneration.from_pretrained('t5-small')
model_checkpoint_folder_path = '/mnt/data/pamessina/workspaces/medvqa-workspace/models/seq2seq/20240320_195545_sentence2facts(S2F)_Seq2Seq(t5-small)/'
model_checkpoint_filepath = get_checkpoint_filepath(model_checkpoint_folder_path)
checkpoint = torch.load(model_checkpoint_filepath)
load_model_state_dict(model, _adapt_checkpoint_keys(checkpoint['model']), strict=False)

checkpoint_names = ['checkpoint_25_s2s_loss=0.9127.pt']


In [7]:
model.save_pretrained("/home/pamessina/huggingface_models/T5FactExtractor/")

In [9]:
tokenizer = T5TokenizerFast.from_pretrained('t5-small')
tokenizer.save_pretrained("/home/pamessina/huggingface_models/T5FactExtractor/")

('/home/pamessina/huggingface_models/T5FactExtractor/tokenizer_config.json',
 '/home/pamessina/huggingface_models/T5FactExtractor/special_tokens_map.json',
 '/home/pamessina/huggingface_models/T5FactExtractor/spiece.model',
 '/home/pamessina/huggingface_models/T5FactExtractor/added_tokens.json',
 '/home/pamessina/huggingface_models/T5FactExtractor/tokenizer.json')