In [None]:
# With ipython notebooks
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=""

# With python
# import os
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# os.environ['CUDA_VISIBLE_DEVICES'] = "0,3"  # specify which GPU(s) to be used

# With bash
# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
# export CUDA_VISIBLE_DEVICES="0,3" 

In [None]:
import sys
sys.path.append('./src/hf/')

In [None]:
# IPython automatically reload all changed code
import torch

from modeling_albert_act_tf import TFAlbertActModel
from modeling_albert_act import AlbertActModel
from configuration_albert_act import AlbertActConfig
from transformers import AlbertTokenizer, TFAlbertModel
import tensorflow as tf
import os
import numpy as np
from titulus import color, print_
import json

%load_ext autoreload
%autoreload 2

In [None]:
torch.__version__

In [None]:
# Initializing an ALBERT-xxlarge style configuration
# albert_act_configuration = AlbertActConfig()


with open('albert_config.json', 'r') as f:
  albert_config = json.load(f)

albert_act_configuration = AlbertActConfig(attention_probs_dropout_prob= 0,
  hidden_act=str(albert_config["hidden_act"]),
  hidden_dropout_prob=int(albert_config["hidden_dropout_prob"]),
  embedding_size=int(albert_config["embedding_size"]),
  hidden_size=int(albert_config["hidden_size"]),
  initializer_range=float(albert_config["initializer_range"]),
  intermediate_size=int(albert_config["intermediate_size"]),
  max_position_embeddings=int(albert_config["max_position_embeddings"]),
  num_attention_heads=int(albert_config["num_attention_heads"]),
  num_hidden_layers=int(albert_config["num_hidden_layers"]),
  net_structure_type=int(albert_config["net_structure_type"]),
  gap_size=int(albert_config["gap_size"]),
  num_memory_blocks=int(albert_config["num_memory_blocks"]),
  inner_group_num=int(albert_config["inner_group_num"]),
  down_scale_factor=int(albert_config["down_scale_factor"]),
  type_vocab_size=int(albert_config["type_vocab_size"]),
  vocab_size=int(albert_config["vocab_size"]))

In [None]:
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2")

In [None]:
model = AlbertActModel(albert_act_configuration)

In [None]:
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

In [None]:
inputs

In [None]:
inputs.input_ids.size()

In [None]:
outputs = model(**inputs)

In [None]:
outputs.updates

In [None]:
[n for (n, p) in model.named_parameters()]

In [None]:
len([n for (n, p) in model.named_parameters()])

In [None]:
pt_assignment_map = {
  'embeddings.word_embeddings.weight': 'bert-embeddings-word_embeddings.npy',
  'embeddings.token_type_embeddings.weight': 'bert-embeddings-token_type_embeddings.npy',
  'embeddings.position_embeddings.weight': 'bert-embeddings-position_embeddings.npy',
  'embeddings.LayerNorm.weight': 'bert-embeddings-layer_normalization-gamma.npy',
  'embeddings.LayerNorm.bias': 'bert-embeddings-layer_normalization-beta.npy',
  'encoder.embedding_hidden_mapping_in.weight': 'bert-encoder-embedding_hidden_mapping_in-kernel.npy',
  'encoder.embedding_hidden_mapping_in.bias': 'bert-encoder-embedding_hidden_mapping_in-bias.npy',
  'encoder.albert_layer.attention.query.weight': 'bert-encoder-transformer-attention_1-self-query-kernel.npy',
  'encoder.albert_layer.attention.query.bias': 'bert-encoder-transformer-attention_1-self-query-bias.npy',
  'encoder.albert_layer.attention.key.weight': 'bert-encoder-transformer-attention_1-self-key-kernel.npy',
  'encoder.albert_layer.attention.key.bias': 'bert-encoder-transformer-attention_1-self-key-bias.npy',
  'encoder.albert_layer.attention.value.weight': 'bert-encoder-transformer-attention_1-self-value-kernel.npy',
  'encoder.albert_layer.attention.value.bias': 'bert-encoder-transformer-attention_1-self-value-bias.npy',
  'encoder.albert_layer.attention.dense.weight': 'bert-encoder-transformer-attention_1-output-dense-kernel.npy',
  'encoder.albert_layer.attention.dense.bias': 'bert-encoder-transformer-attention_1-output-dense-bias.npy',
  'encoder.albert_layer.act.dense.weight': 'bert-encoder-transformer-halting-dense-kernel.npy',
  'encoder.albert_layer.act.dense.bias': 'bert-encoder-transformer-halting-dense-bias.npy',
  'encoder.albert_layer.LayerNorm.weight': 'transformer-layer_normalization-gamma.npy',
  'encoder.albert_layer.LayerNorm.bias': 'transformer-layer_normalization-beta.npy',
  'encoder.albert_layer.ffn.weight': 'bert-encoder-transformer-ffn_1-intermediate-dense-kernel.npy',
  'encoder.albert_layer.ffn.bias': 'bert-encoder-transformer-ffn_1-intermediate-dense-bias.npy',
  'encoder.albert_layer.ffn_output.weight': 'bert-encoder-transformer-ffn_1-intermediate-output-dense-kernel.npy',
  'encoder.albert_layer.ffn_output.bias': 'bert-encoder-transformer-ffn_1-intermediate-output-dense-bias.npy',
  'encoder.albert_layer.full_layer_layer_norm.weight': 'transformer-layer_normalization_1-gamma.npy',
  'encoder.albert_layer.full_layer_layer_norm.bias': 'transformer-layer_normalization_1-beta.npy',
  'pooler.weight': 'bert-pooler-dense-kernel.npy',
  'pooler.bias': 'bert-pooler-dense-bias.npy',
}

In [None]:
model.embeddings.word_embeddings.weight

In [None]:
state_dict = model.state_dict()

In [None]:
for name, param in model.named_parameters():
  # print(name)
    
  file_name = pt_assignment_map[name]
  # print(file_name)

  with open(os.path.join('./weights/', file_name), 'rb') as f:
    save_param = np.load(f)
  if name in ['encoder.albert_layer.act.dense.weight', 
              'encoder.embedding_hidden_mapping_in.weight', 
              'albert_layer.act.dense.weight', 
              'encoder.albert_layer.ffn.weight', 
              'encoder.albert_layer.ffn_output.weight',
             'encoder.albert_layer.attention.query.weight',
             'encoder.albert_layer.attention.key.weight',
             'encoder.albert_layer.attention.value.weight',
             'encoder.albert_layer.attention.dense.weight']:
    state_dict[name] = torch.Tensor(save_param).transpose(0, 1)
  else:
    state_dict[name] = torch.Tensor(save_param)
  print(name, state_dict[name].shape)
model.load_state_dict(state_dict)

In [None]:
# model.save_pretrained('./albert-act-base/')

In [None]:
model = AlbertActModel.from_pretrained('./albert-act-base/')
_ = model.eval()

In [None]:
tokenizer = AlbertTokenizer.from_pretrained("./albert-act-base/")

In [None]:
inputs = tokenizer("a lump in the middle of the monkeys stirred and then fell quiet .", return_tensors="pt")


In [None]:
inputs

In [None]:
outputs = model(**inputs)
outputs.updates