In [1]:
import os
import shutil
import json
import torch
import transformers
from  transformers.models.volta.convert import transfer_from
from volta.datasets._image_features_reader import ImageFeaturesH5Reader


def compile_model(config_path, weight_path, output_path, dummy_imgfeats):
    
    if os.path.exists(output_path):
        print('output path already exists', output_path)
        return
    os.makedirs(output_path)
    
    config = None
    with open(config_path, 'r') as f:
        config = json.load(f)
        del config['clf_hidden_size']
        config = transformers.models.volta.VoltaConfig.from_dict(config)
    
    tokenizer = transformers.models.volta.VoltaTokenizer.from_pretrained(
        'bert-base-uncased', 
        model_max_length=config.max_position_embeddings,
    )
    
    model = transformers.models.volta.VoltaModel(config)
    model = transfer_from(torch.load(weight_path), model)
    set_dummy_imgfeats(dummy_imgfeats, config, model)
    
    config.save_pretrained(output_path)
    tokenizer.save_pretrained(output_path)
    model.save_pretrained(output_path)
    

def make_reinit_model(base_model_path, output_path):
    
    if os.path.exists(output_path):
        print('output path already exists', output_path)
        return
    os.makedirs(output_path)
    
    config = transformers.models.volta.VoltaConfig.from_pretrained(base_model_path)
    model = transformers.models.volta.VoltaModel(config)
    model = transfer_from('bert-base-uncased', model)
    set_dummy_imgfeats(dummy_imgfeats, config, model)
    
    config.save_pretrained(output_path)
    transformers.models.volta.VoltaTokenizer.from_pretrained(base_model_path).save_pretrained(output_path)
    model.save_pretrained(output_path)

    
def set_dummy_imgfeats(dummy_imgfeats, config, model):
    
    feat_reader = ImageFeaturesH5Reader(dummy_imgfeats['path'], config)
    features, num_boxes, image_location, image_location_ori = feat_reader[dummy_imgfeats['key']]
    
    target = model.dummy_input_imgs
    x = torch.tensor(features[None], dtype=target.dtype)
    assert x.shape == target.data.shape
    target.data = x
    
    target = model.dummy_image_loc
    x = torch.tensor(image_location[None], dtype=target.dtype)
    assert x.shape == target.data.shape
    target.data = x

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
03/30/2021 18:27:46 - INFO - pytorch_transformers.modeling_bert -   Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .
03/30/2021 18:27:46 - INFO - pytorch_transformers.modeling_xlnet -   Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .


In [2]:
dummy_imgfeats = {
    'path': 'hf_volta_models/dummy_imgfeats.lmdb',
    'key': 'black_224',
}
targets = [
    {'config_path':'volta/config/ctrl_vilbert_base.json',
         'weight_path':'volta/checkpoints/distributed/ctrl_vilbert', 
         'output_path':'hf_volta_models/ctrl_vilbert_base'},
    
    {'config_path':'volta/config/ctrl_lxmert.json',
         'weight_path':'volta/checkpoints/distributed/ctrl_lexmert', 
         'output_path':'hf_volta_models/ctrl_lxmert_base'},
    
    {'config_path':'volta/config/ctrl_uniter_base.json',
         'weight_path':'volta/checkpoints/distributed/ctrl_uniter', 
         'output_path':'hf_volta_models/ctrl_uniter_base'},
    
    {'config_path':'volta/config/ctrl_visualbert_base.json',
         'weight_path':'volta/checkpoints/distributed/ctrl_visual_bert', 
         'output_path':'hf_volta_models/ctrl_visual_bert_base'},
    
    {'config_path':'volta/config/ctrl_vl-bert_base.json',
         'weight_path':'volta/checkpoints/distributed/ctrl_vl_bert', 
         'output_path':'hf_volta_models/ctrl_vl_bert_base'}
]

In [3]:
for target in targets:
    print('compile', target)
    compile_model(dummy_imgfeats=dummy_imgfeats, **target)
    print('reinit', target)
    make_reinit_model(target['output_path'], target['output_path']+'_reinit')

compile {'config_path': 'volta/config/ctrl_vilbert_base.json', 'weight_path': 'volta/checkpoints/distributed/ctrl_vilbert', 'output_path': 'hf_volta_models/ctrl_vilbert_base'}
start_prefix bert. model_to_load VoltaModel
Weights of VoltaModel not initialized from pretrained model: ['bert.dummy_input_imgs', 'bert.dummy_image_loc']
reinit {'config_path': 'volta/config/ctrl_vilbert_base.json', 'weight_path': 'volta/checkpoints/distributed/ctrl_vilbert', 'output_path': 'hf_volta_models/ctrl_vilbert_base'}
weight mapping encoder.layer.11.attention.self.query.weight -> encoder.layer.34.attention_self.query.weight
weight mapping encoder.layer.11.attention.self.query.bias -> encoder.layer.34.attention_self.query.bias
weight mapping encoder.layer.11.attention.self.key.weight -> encoder.layer.34.attention_self.key.weight
weight mapping encoder.layer.11.attention.self.key.bias -> encoder.layer.34.attention_self.key.bias
weight mapping encoder.layer.11.attention.self.value.weight -> encoder.layer.3

compile {'config_path': 'volta/config/ctrl_lxmert.json', 'weight_path': 'volta/checkpoints/distributed/ctrl_lexmert', 'output_path': 'hf_volta_models/ctrl_lxmert_base'}
start_prefix bert. model_to_load VoltaModel
Weights of VoltaModel not initialized from pretrained model: ['bert.dummy_input_imgs', 'bert.dummy_image_loc']
reinit {'config_path': 'volta/config/ctrl_lxmert.json', 'weight_path': 'volta/checkpoints/distributed/ctrl_lexmert', 'output_path': 'hf_volta_models/ctrl_lxmert_base'}
weight mapping encoder.layer.11.attention.self.query.weight -> encoder.layer.25.attention_self.query.weight
weight mapping encoder.layer.11.attention.self.query.bias -> encoder.layer.25.attention_self.query.bias
weight mapping encoder.layer.11.attention.self.key.weight -> encoder.layer.25.attention_self.key.weight
weight mapping encoder.layer.11.attention.self.key.bias -> encoder.layer.25.attention_self.key.bias
weight mapping encoder.layer.11.attention.self.value.weight -> encoder.layer.25.attention_se

compile {'config_path': 'volta/config/ctrl_uniter_base.json', 'weight_path': 'volta/checkpoints/distributed/ctrl_uniter', 'output_path': 'hf_volta_models/ctrl_uniter_base'}
start_prefix bert. model_to_load VoltaModel
Weights of VoltaModel not initialized from pretrained model: ['bert.dummy_input_imgs', 'bert.dummy_image_loc']
reinit {'config_path': 'volta/config/ctrl_uniter_base.json', 'weight_path': 'volta/checkpoints/distributed/ctrl_uniter', 'output_path': 'hf_volta_models/ctrl_uniter_base'}
weight mapping encoder.layer.11.attention.self.query.weight -> encoder.layer.22.attention_self.query.weight
weight mapping encoder.layer.11.attention.self.query.bias -> encoder.layer.22.attention_self.query.bias
weight mapping encoder.layer.11.attention.self.key.weight -> encoder.layer.22.attention_self.key.weight
weight mapping encoder.layer.11.attention.self.key.bias -> encoder.layer.22.attention_self.key.bias
weight mapping encoder.layer.11.attention.self.value.weight -> encoder.layer.22.atte

compile {'config_path': 'volta/config/ctrl_visualbert_base.json', 'weight_path': 'volta/checkpoints/distributed/ctrl_visual_bert', 'output_path': 'hf_volta_models/ctrl_visual_bert_base'}
start_prefix bert. model_to_load VoltaModel
Weights of VoltaModel not initialized from pretrained model: ['bert.dummy_input_imgs', 'bert.dummy_image_loc']
reinit {'config_path': 'volta/config/ctrl_visualbert_base.json', 'weight_path': 'volta/checkpoints/distributed/ctrl_visual_bert', 'output_path': 'hf_volta_models/ctrl_visual_bert_base'}
weight mapping encoder.layer.11.attention.self.query.weight -> encoder.layer.22.attention_self.query.weight
weight mapping encoder.layer.11.attention.self.query.bias -> encoder.layer.22.attention_self.query.bias
weight mapping encoder.layer.11.attention.self.key.weight -> encoder.layer.22.attention_self.key.weight
weight mapping encoder.layer.11.attention.self.key.bias -> encoder.layer.22.attention_self.key.bias
weight mapping encoder.layer.11.attention.self.value.wei

compile {'config_path': 'volta/config/ctrl_vl-bert_base.json', 'weight_path': 'volta/checkpoints/distributed/ctrl_vl_bert', 'output_path': 'hf_volta_models/ctrl_vl_bert_base'}
start_prefix bert. model_to_load VoltaModel
Weights of VoltaModel not initialized from pretrained model: ['bert.dummy_input_imgs', 'bert.dummy_image_loc']
reinit {'config_path': 'volta/config/ctrl_vl-bert_base.json', 'weight_path': 'volta/checkpoints/distributed/ctrl_vl_bert', 'output_path': 'hf_volta_models/ctrl_vl_bert_base'}
weight mapping encoder.layer.11.attention.self.query.weight -> encoder.layer.22.attention_self.query.weight
weight mapping encoder.layer.11.attention.self.query.bias -> encoder.layer.22.attention_self.query.bias
weight mapping encoder.layer.11.attention.self.key.weight -> encoder.layer.22.attention_self.key.weight
weight mapping encoder.layer.11.attention.self.key.bias -> encoder.layer.22.attention_self.key.bias
weight mapping encoder.layer.11.attention.self.value.weight -> encoder.layer.2

In [4]:
# For the new version of lxmert
dummy_imgfeats = {
    'path': 'volta/data/glue/glue_imgfeats.lmdb',
    'key': 'black_224',
}
targets = [
    {'config_path':'volta/config/ctrl_lxmert.json',
         'weight_path':'volta/checkpoints/distributed/ctrl_lxmert_new', 
         'output_path':'hf_volta_models/ctrl_lxmert_base_new'},
]

In [8]:
for target in targets:
    print('compile', target)
    compile_model(dummy_imgfeats=dummy_imgfeats, **target)
    print('reinit', target)
    make_reinit_model(target['output_path'], target['output_path']+'_reinit')

compile {'config_path': 'volta/config/ctrl_lxmert.json', 'weight_path': 'volta/checkpoints/distributed/ctrl_lxmert_new', 'output_path': 'hf_volta_models/ctrl_lxmert_base_new'}
start_prefix bert. model_to_load VoltaModel
Weights of VoltaModel not initialized from pretrained model: ['bert.dummy_input_imgs', 'bert.dummy_image_loc']
reinit {'config_path': 'volta/config/ctrl_lxmert.json', 'weight_path': 'volta/checkpoints/distributed/ctrl_lxmert_new', 'output_path': 'hf_volta_models/ctrl_lxmert_base_new'}
weight mapping encoder.layer.11.attention.self.query.weight -> encoder.layer.25.attention_self.query.weight
weight mapping encoder.layer.11.attention.self.query.bias -> encoder.layer.25.attention_self.query.bias
weight mapping encoder.layer.11.attention.self.key.weight -> encoder.layer.25.attention_self.key.weight
weight mapping encoder.layer.11.attention.self.key.bias -> encoder.layer.25.attention_self.key.bias
weight mapping encoder.layer.11.attention.self.value.weight -> encoder.layer.2