In [None]:
# With ipython notebooks
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=""

# With python
# import os
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# os.environ['CUDA_VISIBLE_DEVICES'] = "0,3"  # specify which GPU(s) to be used

# With bash
# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
# export CUDA_VISIBLE_DEVICES="0,3" 

In [None]:
import sys
sys.path.append('./src/hf/')

In [None]:
import tensorflow as tf

from modeling_albert_act_tf import TFAlbertActModel
from configuration_albert_act import AlbertActConfig
from transformers import AlbertTokenizer, TFAlbertModel
import tensorflow as tf
import os
import numpy as np
import json

# IPython automatically reload all changed code
%load_ext autoreload
%autoreload 2

In [None]:
tf.__version__

In [None]:
# Initializing an ALBERT-xxlarge style configuration
# albert_act_configuration = AlbertActConfig()


with open('./albert_config.json', 'r') as f:
  albert_config = json.load(f)

albert_act_configuration = AlbertActConfig(attention_probs_dropout_prob= 0,
  hidden_act=str(albert_config["hidden_act"]),
  hidden_dropout_prob=int(albert_config["hidden_dropout_prob"]),
  embedding_size=int(albert_config["embedding_size"]),
  hidden_size=int(albert_config["hidden_size"]),
  initializer_range=float(albert_config["initializer_range"]),
  intermediate_size=int(albert_config["intermediate_size"]),
  max_position_embeddings=int(albert_config["max_position_embeddings"]),
  num_attention_heads=int(albert_config["num_attention_heads"]),
  num_hidden_layers=int(albert_config["num_hidden_layers"]),
  net_structure_type=int(albert_config["net_structure_type"]),
  gap_size=int(albert_config["gap_size"]),
  num_memory_blocks=int(albert_config["num_memory_blocks"]),
  inner_group_num=int(albert_config["inner_group_num"]),
  down_scale_factor=int(albert_config["down_scale_factor"]),
  type_vocab_size=int(albert_config["type_vocab_size"]),
  vocab_size=int(albert_config["vocab_size"]))

In [None]:
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2")
model = TFAlbertActModel(albert_act_configuration)

In [None]:
inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")

inputs

In [None]:
outputs = model(inputs, return_dict=False)

In [None]:
assignment_map = {
  'tf_albert_act_model/albert/embeddings/word_embeddings/weight:0': 'bert-embeddings-word_embeddings.npy',
  'tf_albert_act_model/albert/embeddings/token_type_embeddings/embeddings:0': 'bert-embeddings-token_type_embeddings.npy',
  'tf_albert_act_model/albert/embeddings/position_embeddings/embeddings:0': 'bert-embeddings-position_embeddings.npy',
  'tf_albert_act_model/albert/embeddings/LayerNorm/gamma:0': 'bert-embeddings-layer_normalization-gamma.npy',
  'tf_albert_act_model/albert/embeddings/LayerNorm/beta:0': 'bert-embeddings-layer_normalization-beta.npy',
  'tf_albert_act_model/albert/encoder/embedding_hidden_mapping_in/kernel:0': 'bert-encoder-embedding_hidden_mapping_in-kernel.npy',
  'tf_albert_act_model/albert/encoder/embedding_hidden_mapping_in/bias:0': 'bert-encoder-embedding_hidden_mapping_in-bias.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/attention/query/kernel:0': 'bert-encoder-transformer-attention_1-self-query-kernel.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/attention/query/bias:0': 'bert-encoder-transformer-attention_1-self-query-bias.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/attention/key/kernel:0': 'bert-encoder-transformer-attention_1-self-key-kernel.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/attention/key/bias:0': 'bert-encoder-transformer-attention_1-self-key-bias.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/attention/value/kernel:0': 'bert-encoder-transformer-attention_1-self-value-kernel.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/attention/value/bias:0': 'bert-encoder-transformer-attention_1-self-value-bias.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/attention/dense/kernel:0': 'bert-encoder-transformer-attention_1-output-dense-kernel.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/attention/dense/bias:0': 'bert-encoder-transformer-attention_1-output-dense-bias.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/act/dense/kernel:0': 'bert-encoder-transformer-halting-dense-kernel.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/act/dense/bias:0': 'bert-encoder-transformer-halting-dense-bias.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/LayerNorm/gamma:0': 'transformer-layer_normalization-gamma.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/LayerNorm/beta:0': 'transformer-layer_normalization-beta.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/ffn/kernel:0': 'bert-encoder-transformer-ffn_1-intermediate-dense-kernel.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/ffn/bias:0': 'bert-encoder-transformer-ffn_1-intermediate-dense-bias.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/ffn_output/kernel:0': 'bert-encoder-transformer-ffn_1-intermediate-output-dense-kernel.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/ffn_output/bias:0': 'bert-encoder-transformer-ffn_1-intermediate-output-dense-bias.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/full_layer_layer_norm/gamma:0': 'transformer-layer_normalization_1-gamma.npy',
  'tf_albert_act_model/albert/encoder/albert_layer/full_layer_layer_norm/beta:0': 'transformer-layer_normalization_1-beta.npy',
  'tf_albert_act_model/albert/pooler/kernel:0': 'bert-pooler-dense-kernel.npy',
  'tf_albert_act_model/albert/pooler/bias:0': 'bert-pooler-dense-bias.npy',
}

In [None]:
for i in range(len(model.weights)):
  print(model.weights[i].name)
    
  file_name = assignment_map[model.weights[i].name]
  print(file_name)

  with open(os.path.join('./weights/', file_name), 'rb') as f:
    save_param = np.load(f)
    
  model.weights[i].assign(save_param)

In [None]:
model.weights[0]

In [None]:
# model.save_pretrained('./albert-act-base/')

In [None]:
from transformers import PreTrainedTokenizer

In [None]:
tokenizer = AlbertTokenizer(vocab_file="./30k-clean-v2.model")

In [None]:
model = TFAlbertActModel.from_pretrained('./albert-act-base/')

In [None]:
inputs = tokenizer("a lump in the middle of the monkeys stirred and then fell quiet .", return_tensors="tf")

inputs

In [None]:
outputs = model(inputs, return_dict=False)

In [None]:
outputs = model(inputs, return_dict=True)

In [None]:
outputs.updates