In [28]:
from transformers import *
from transformers.modeling_tf_albert import *

In [17]:
import tensorflow as tf

## Albert fine tunning model

In [7]:
albert_base_configuration = AlbertConfig(
    vocab_size=8826,
    hidden_size=768,
    num_attention_heads=12,
    intermediate_size=3072,
    max_position_embeddings=120
)

In [20]:
seq_length = 120

In [18]:
base_model = TFAlbertModel(albert_base_configuration)

In [21]:
input_word_ids = tf.keras.layers.Input(shape=(seq_length,), name='input_word_ids', dtype=tf.int32)

In [22]:
albert_output = base_model(input_word_ids)

In [25]:
output_layer = tf.keras.layers.Dense(2, activation='softmax')(albert_output[0])

In [26]:
model = tf.keras.Model(input_word_ids, output_layer)

In [27]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_word_ids (InputLayer)  [(None, 120)]             0         
_________________________________________________________________
tf_albert_model_2 (TFAlbertM ((None, 120, 768), (None, 8923136   
_________________________________________________________________
dense (Dense)                (None, 120, 2)            1538      
Total params: 8,924,674
Trainable params: 8,924,674
Non-trainable params: 0
_________________________________________________________________


## Albert pretraining model

In [29]:
albert_base_configuration = AlbertConfig(
    vocab_size=8826,
    hidden_size=768,
    num_attention_heads=12,
    intermediate_size=3072,
    max_position_embeddings=120
)

In [30]:
seq_length = 120

In [33]:
input_word_ids = tf.keras.layers.Input(shape=(seq_length,), name='input_word_ids', dtype=tf.int32)
base_masked_model = TFAlbertForMaskedLM(albert_base_configuration)

In [34]:
output_masked_model = base_masked_model(input_word_ids)

In [41]:
model = tf.keras.Model(input_word_ids, output_masked_model)
model.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_word_ids (InputLayer)  [(None, 120)]             0         
_________________________________________________________________
tf_albert_for_masked_lm_1 (T ((None, 120, 8826),)      9039476   
Total params: 9,039,476
Trainable params: 9,039,476
Non-trainable params: 0
_________________________________________________________________
