# Bert to Tensorflow 1 Convertion

## 1. Pytorch to ONNX

In [1]:
import torch
from transformers import BertModel, BertTokenizer

In [6]:
model_path = 'BertLarge'

model = BertModel.from_pretrained(model_path)
tokenizer = BertTokenizer.from_pretrained(model_path)

In [None]:
print(tokenizer)

In [9]:
print(model)

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(29794, 1024, padding_idx=0)
    (position_embeddings): Embedding(512, 1024)
    (token_type_embeddings): Embedding(2, 1024)
    (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-23): 24 x BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=1024, out_features=1024, bias=True)
            (key): Linear(in_features=1024, out_features=1024, bias=True)
            (value): Linear(in_features=1024, out_features=1024, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=1024, out_features=1024, bias=True)
            (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inpl

In [10]:
# Defina o tamanho de entrada (por exemplo, tamanho do token)
# Aqui, '128' é um exemplo de tamanho de sequência. Você pode precisar ajustar isso conforme necessário

input_ids = torch.randint(0, 2000, (1, 128)) # Exemplo de tensor de IDs de token

In [11]:
# Especifique o caminho para salvar o modelo ONNX
output_onnx_path = "ONNX/BertLarge.onnx"

In [13]:
# Exporte o modelo para o formato ONNX
torch.onnx.export(model, input_ids, output_onnx_path, export_params=True, opset_version=11, 
                  do_constant_folding=True, input_names=['input_ids'], 
                  output_names=['output'], dynamic_axes={'input_ids' : {0 : 'batch_size'}, 
                  'output' : {0 : 'batch_size'}})

print("Modelo BERTimbau exportado para ONNX com sucesso.")

Modelo BERTimbau exportado para ONNX com sucesso.


<hr>

## 1.1 Teste de Inferência - Modelo ONNX

In [9]:
import onnxruntime as ort
import numpy as np
from transformers import BertTokenizer

In [10]:
# Carregar o tokenizador
tokenizer = BertTokenizer.from_pretrained("BertLarge")

# Carregar o modelo ONNX
onnx_model_path = "ONNX/BertLarge.onnx"
session = ort.InferenceSession(onnx_model_path)


In [13]:
# Preparar os dados de entrada
text = "I loved this book." 
inputs = tokenizer(text, return_tensors="pt", max_length=128, truncation=True, padding="max_length")
input_ids = inputs["input_ids"].numpy()

# Realizar a inferência
outputs = session.run(None, {'input_ids': input_ids})

# Usar a representação do token [CLS] (primeiro token)
cls_representation = outputs[0][0, 0, :]

print("Representação [CLS]:", cls_representation)

Representação [CLS]: [-0.7848451   0.7218873  -0.8953569  ...  0.27494764 -0.384354
 -0.25554106]


In [8]:
# Preparar os dados de entrada
text = "I loved this book."
inputs = tokenizer(text, return_tensors="pt", max_length=128, truncation=True, padding="max_length")
input_ids = inputs["input_ids"].numpy()

# Realizar a inferência
outputs = session.run(None, {"input_ids": input_ids})

# Processar a saída
output = outputs[0]

# Função auxiliar softmax
def softmax(x, axis=None):
    e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
    return e_x / e_x.sum(axis=axis, keepdims=True)

# Softmax sobre logits e predição
probabilities = softmax(output[0], axis=1)
predicted_class = np.argmax(probabilities, axis=1)[0]

print("Classe prevista:", predicted_class)


Classe prevista: 456


In [7]:
output = outputs[0]
print("Formato da saída:", output.shape)


Formato da saída: (1, 128, 1024)


<hr>

## 2. ONNX to Tensorflow 2

In [2]:
import onnx
from onnx_tf.backend import prepare


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.9.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [7]:
# Carregar o modelo ONNX
onnx_model = onnx.load("ONNX/BertLarge.onnx")

# Converter o modelo ONNX para TensorFlow 2.x
tf_rep = prepare(onnx_model)

# Salvar o modelo TensorFlow 2.x
tf_rep.export_graph("BertLarge_tf2")



INFO:tensorflow:Assets written to: BBBertLarge_tf2\assets


INFO:tensorflow:Assets written to: BBBertLarge_tf2\assets


<hr>

## Teste do modelo Tensorflow convertido do ONNX

In [12]:
import tensorflow as tf
from transformers import BertTokenizer

model_path = 'BertLarge'
# Carregar o tokenizer correspondente ao seu modelo BERT
tokenizer = BertTokenizer.from_pretrained(model_path)

# Texto de exemplo para inferência
texto_exemplo = "Hello, how are you?"

In [15]:
# Tokenizar o texto
inputs = tokenizer(texto_exemplo, return_tensors="tf", max_length=512, truncation=True, padding="max_length")

# Extrair input_ids e converter para int64
input_ids = tf.cast(inputs["input_ids"], tf.int64)

# Carregar o modelo TensorFlow
model = tf.saved_model.load("BertLarge_tf2")

# Preparar os dados de entrada como um dicionário
input_data = {'input_ids': input_ids}

# Realizar inferência
output = model.signatures['serving_default'](**input_data)


In [34]:
# Verificar a assinatura do modelo
for f in model.signatures:
    print('Signature:', f)
    print('Inputs:', model.signatures[f].inputs)
    print('Outputs:', model.signatures[f].outputs)

Signature: serving_default
Inputs: [<tf.Tensor 'input_ids:0' shape=(None, 128) dtype=int64>, <tf.Tensor 'unknown:0' shape=(29794, 1024) dtype=float32>, <tf.Tensor 'unknown_0:0' shape=(2, 1024) dtype=float32>, <tf.Tensor 'unknown_1:0' shape=(512, 1024) dtype=float32>, <tf.Tensor 'unknown_2:0' shape=(1024,) dtype=float32>, <tf.Tensor 'unknown_3:0' shape=(1024,) dtype=float32>, <tf.Tensor 'unknown_4:0' shape=(1024, 1024) dtype=float32>, <tf.Tensor 'unknown_5:0' shape=(1024,) dtype=float32>, <tf.Tensor 'unknown_6:0' shape=(1024, 1024) dtype=float32>, <tf.Tensor 'unknown_7:0' shape=(1024,) dtype=float32>, <tf.Tensor 'unknown_8:0' shape=(1024, 1024) dtype=float32>, <tf.Tensor 'unknown_9:0' shape=(1024,) dtype=float32>, <tf.Tensor 'unknown_10:0' shape=(1024, 1024) dtype=float32>, <tf.Tensor 'unknown_11:0' shape=(1024,) dtype=float32>, <tf.Tensor 'unknown_12:0' shape=(1024,) dtype=float32>, <tf.Tensor 'unknown_13:0' shape=(1024,) dtype=float32>, <tf.Tensor 'unknown_14:0' shape=(1024, 4096) dty

Saídas do Modelo: Last Hidden State e Pooled Output


In [37]:
# Saídas do Modelo: Last Hidden State e Pooled Output

print(output)

{'output': <tf.Tensor: shape=(1, 512, 1024), dtype=float32, numpy=
array([[[-1.405883  , -0.5071652 , -0.19600448, ..., -0.14475238,
          0.7742399 ,  0.60961187],
        [ 0.0534275 ,  1.2149448 ,  0.14572959, ...,  0.11436579,
         -0.3873235 ,  0.46369135],
        [-0.25096303,  0.94107556,  0.12726949, ..., -0.41458088,
         -0.02168075,  0.05212868],
        ...,
        [-1.4619552 , -0.5172862 , -0.20580854, ..., -0.15467204,
          0.78893995,  0.5971466 ],
        [-1.4630004 , -0.517497  , -0.20602077, ..., -0.15482968,
          0.7891975 ,  0.59694535],
        [-1.4503597 , -0.51505363, -0.20361568, ..., -0.1528167 ,
          0.7860867 ,  0.5994793 ]]], dtype=float32)>, '3024': <tf.Tensor: shape=(1, 1024), dtype=float32, numpy=
array([[ 0.8851668 ,  0.6478177 ,  0.73255414, ..., -0.22627208,
        -0.96188384, -0.37408417]], dtype=float32)>}


Last Hidden State (output[0]): Representa as características (ou embeddings) de cada token na frase. Este é um tensor de forma (batch_size, sequence_length, hidden_size), que no caso é (1, 512, 1024). Cada token da sequência de entrada é representado por um vetor de 1024 dimensões.

Pooled Output (output[1]): É uma representação agregada da entrada inteira, normalmente usada em tarefas de classificação. Este é um tensor de forma (batch_size, hidden_size), que no caso é (1, 1024).

In [25]:
# Usando Last Hidden State para extração de características
last_hidden_state = output['output'].numpy()
last_hidden_state

array([[[-1.405883  , -0.5071652 , -0.19600448, ..., -0.14475238,
          0.7742399 ,  0.60961187],
        [ 0.0534275 ,  1.2149448 ,  0.14572959, ...,  0.11436579,
         -0.3873235 ,  0.46369135],
        [-0.25096303,  0.94107556,  0.12726949, ..., -0.41458088,
         -0.02168075,  0.05212868],
        ...,
        [-1.4619552 , -0.5172862 , -0.20580854, ..., -0.15467204,
          0.78893995,  0.5971466 ],
        [-1.4630004 , -0.517497  , -0.20602077, ..., -0.15482968,
          0.7891975 ,  0.59694535],
        [-1.4503597 , -0.51505363, -0.20361568, ..., -0.1528167 ,
          0.7860867 ,  0.5994793 ]]], dtype=float32)

In [32]:
# Exemplo: Obtendo o embedding do primeiro token (geralmente [CLS] em BERT)
first_token_embedding = last_hidden_state[0][0]
first_token_embedding

array([-1.405883  , -0.5071652 , -0.19600448, ..., -0.14475238,
        0.7742399 ,  0.60961187], dtype=float32)

In [31]:
<hr># Acessar o Pooled Output
pooled_output = output['3024'].numpy()
pooled_output

array([[ 0.8851668 ,  0.6478177 ,  0.73255414, ..., -0.22627208,
        -0.96188384, -0.37408417]], dtype=float32)

<hr>

<hr>

## 3. Carregar o Modelo TensorFlow 2.x e Downgrade para TensorFlow 1.x


In [None]:
# Criar ambiente virtual Conda

# Criar um ambiente Conda
conda create -n tf1_env python=3.7

# Ativar o ambiente
conda activate tf1_env

# Instalar TensorFlow 1.x
pip install tensorflow==1.15.5 tensorflow-probability==0.12.1

In [None]:
# Abrir o Jupyter Notebook a partir do env de TF 1.15.5

# python -m ipykernel install --user --name=tf1.15 --display-name="Python 3.6.13 (tf1.15)"

# jupyter notebook

In [1]:
import tensorflow as tf
tf.__version__

'1.15.5'

In [2]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

# Carregar o modelo TensorFlow 2.x
loaded_model = tf.saved_model.load_v2("BertLarge_tf2")

# Criar uma sessão TensorFlow 1.15
with tf.Session(graph=tf.Graph()) as sess:
    # Importar o modelo para a sessão atual
    tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], "BertLarge_tf2")

    # Salvar o modelo no formato de checkpoints TensorFlow 1.15
    saver = tf.train.Saver()
    saver.save(sess, "BertLarge_tf1")


Instructions for updating:
non-resource variables are not supported in the long term
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
INFO:tensorflow:Restoring parameters from BBBertLarge_tf2\variables\variables


ValueError: No variables to save

In [5]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

# Carregar o modelo TensorFlow 2.x
loaded_model = tf.saved_model.load_v2("BertLarge_tf2")

# Iniciar uma sessão TensorFlow 1.15
with tf.Session(graph=tf.Graph()) as sess:
    # Importar o modelo para a sessão atual
    tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], "BertLarge_tf2")

    # Inspecionar variáveis
    all_variables = tf.global_variables()
    for variable in all_variables:
        print(variable)

    # Verificar se existem variáveis para salvar
    if all_variables:
        # Salvar o modelo no formato de checkpoints TensorFlow 1.15
        saver = tf.train.Saver()
        saver.save(sess, "BertLarge_tf1")
    else:
        print("Nenhuma variável para salvar")


INFO:tensorflow:Restoring parameters from BBBertLarge_tf2\variables\variables
Nenhuma variável para salvar


### Conclusão: não é possível converter TF2 para TF1 sem ter os .ckp do TF2, já que veio de um .bin do Pytorch

In [39]:
import tensorflow.compat.v1 as tf
from tensorflow.python.saved_model import tag_constants

tf.disable_v2_behavior()

# Caminho para o diretório do modelo SavedModel
model_dir = 'BertLarge_tf2'

# Carregar o modelo
with tf.Session() as sess:
    tf.saved_model.loader.load(sess, [tag_constants.SERVING], model_dir)

    # Agora você pode usar sess.run() para fazer inferências com o modelo carregado


Instructions for updating:
non-resource variables are not supported in the long term


Instructions for updating:
non-resource variables are not supported in the long term


Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.


Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.


INFO:tensorflow:Restoring parameters from BBBertLarge_tf2\variables\variables


INFO:tensorflow:Restoring parameters from BBBertLarge_tf2\variables\variables


In [40]:
for op in tf.get_default_graph().get_operations():
    print(op.name)


Const
Const_1
Const_2
Const_3
Const_4
Const_5
Const_6
Const_7
Const_8
Const_9
Const_10
Const_11
Const_12
Const_13
Const_14
Const_15
Const_16
Const_17
Const_18
Const_19
Const_20
Const_21
Const_22
Const_23
Const_24
Const_25
Const_26
Const_27
Const_28
Const_29
Const_30
Const_31
Const_32
Const_33
Const_34
Const_35
Const_36
Const_37
Const_38
Const_39
Const_40
Const_41
Const_42
Const_43
Const_44
Const_45
Const_46
Const_47
Const_48
Const_49
Const_50
Const_51
Const_52
Const_53
Const_54
Const_55
Const_56
Const_57
Const_58
Const_59
Const_60
Const_61
Const_62
Const_63
Const_64
Const_65
Const_66
Const_67
Const_68
Const_69
Const_70
Const_71
Const_72
Const_73
Const_74
Const_75
Const_76
Const_77
Const_78
Const_79
Const_80
Const_81
Const_82
Const_83
Const_84
Const_85
Const_86
Const_87
Const_88
Const_89
Const_90
Const_91
Const_92
Const_93
Const_94
Const_95
Const_96
Const_97
Const_98
Const_99
Const_100
Const_101
Const_102
Const_103
Const_104
Const_105
Const_106
Const_107
Const_108
Const_109
Const_110
Co

In [49]:
import tensorflow as tf
import numpy as np

# Carregar o modelo
model = tf.saved_model.load('BertLarge_tf2')

# Preparar os dados de entrada (ajustar conforme necessário)
# Aqui, estou gerando dados de entrada aleatórios como um exemplo.
input_data = np.random.randint(0, 1000, size=(1, 128))
input_data = input_data.astype(np.int64)  # Converter para int64

# Criar um dicionário de entrada com a chave 'input_ids'
input_dict = {"input_ids": input_data}

# Realizar a inferência
output = model.signatures['serving_default'](**input_dict)

# Processar e exibir os resultados
print(output)


{'output': <tf.Tensor 'StatefulPartitionedCall_5:1' shape=(1, 128, 1024) dtype=float32>, '3024': <tf.Tensor 'StatefulPartitionedCall_5:0' shape=(1, 1024) dtype=float32>}


In [6]:
tensor_3024_values = output['3024'].numpy()
print(tensor_3024_values)


[[-0.33626798 -0.98094827 -0.8146997  ... -0.05506861 -0.13888831
   0.14662988]]


In [8]:
# Configurar opções de impressão do NumPy para exibir mais elementos
np.set_printoptions(threshold=np.inf)

# Agora você pode imprimir um array sem ser cortado
print(output)


{'3024': <tf.Tensor: shape=(1, 1024), dtype=float32, numpy=
array([[-3.36267978e-01, -9.80948269e-01, -8.14699709e-01,
        -2.95986056e-01,  9.12011489e-02, -8.87155056e-01,
         1.76211283e-01,  9.83156562e-02,  1.52927414e-01,
         4.89975095e-01, -7.46832609e-01,  9.92944300e-01,
         9.98848021e-01, -7.26091444e-01,  2.66813248e-01,
         9.99982357e-01, -9.27008331e-01, -9.99804497e-01,
        -2.73892432e-01, -1.64704517e-01, -7.92609677e-02,
         2.17181191e-01, -9.94137228e-01, -5.99410355e-01,
        -5.14134705e-01,  5.16373932e-01, -3.33691478e-01,
        -7.10398927e-02,  4.79012311e-01, -8.32551360e-01,
        -1.22458450e-01,  1.83899030e-01, -1.30599797e-01,
        -5.76186180e-01, -2.60638386e-01,  2.72634737e-02,
         2.74154454e-01, -7.35573620e-02, -4.77796704e-01,
         6.39245093e-01,  3.58521193e-01, -9.99894500e-01,
         9.99187052e-01, -9.50380206e-01,  1.91886544e-01,
         4.49410230e-01, -2.33424798e-01, -7.51726091e-

In [14]:
import tensorflow as tf
import numpy as np

# Carregar o modelo
model = tf.saved_model.load('BertLarge_tf2')
tokenizer = BertTokenizer.from_pretrained('BertLarge')


# Inserir a frase de texto para inferência
texto = "O livro é extraordinário."

# Tokenizar a frase
inputs = tokenizer(texto, return_tensors="tf", truncation=True, padding=True, max_length=128)

# Converter os input_ids para int64
input_ids = tf.cast(inputs["input_ids"], tf.int64)

# Realizar a inferência
output = model.signatures['serving_default'](input_ids)

# Processar e exibir os resultados
print(output)

{'3024': <tf.Tensor: shape=(1, 1024), dtype=float32, numpy=
array([[-0.09614879, -0.71227956, -0.7714696 , -0.19271395,  0.34323257,
        -0.7332653 ,  0.6156603 , -0.14155614,  0.20977953,  0.5023877 ,
        -0.03889081,  0.97528267,  0.42031398, -0.3196186 , -0.15403397,
         0.9982634 , -0.32823274, -0.9000808 ,  0.6615581 ,  0.07415983,
        -0.4200374 , -0.11309409, -0.94325644, -0.49387875, -0.2748649 ,
         0.49467877, -0.848702  ,  0.9597428 ,  0.70885336, -0.882489  ,
        -0.14368351,  0.6262285 ,  0.56787217, -0.55577296, -0.836808  ,
         0.5837532 , -0.08329741, -0.5069609 , -0.24890509,  0.7291298 ,
        -0.03156374, -0.99966085,  0.99980044, -0.42752185,  0.8454337 ,
         0.79581594, -0.65876055, -0.5998109 , -0.19411978,  0.08658105,
        -0.44285685,  0.83862734, -0.6361888 ,  0.09335837, -0.4348568 ,
        -0.1172437 , -0.71761703,  0.71362376, -0.10231866, -0.29941216,
        -0.9915208 , -0.98153955,  0.7785739 , -0.61398274, -0.3

<hr>

<hr>

# Conversão do BertLarge: Pytorch >> TF 2.x .h5 e Checkpoints

In [23]:
from transformers import BertModel, BertConfig, TFBertModel
import torch
import tensorflow as tf

# Caminho para o seu modelo PyTorch .bin
path_to_pytorch_model = 'BertLarge'

# Carrega o modelo PyTorch
config = BertConfig.from_pretrained(path_to_pytorch_model)
pytorch_model = BertModel.from_pretrained(path_to_pytorch_model, config=config)




In [25]:
# Converte o modelo PyTorch para TensorFlow
tensorflow_model = TFBertModel.from_pretrained(path_to_pytorch_model, config=config, from_pt=True)

# Salva o modelo TensorFlow como um checkpoint .h5 e o seu config.json
tensorflow_model.save_pretrained('Modelo_Tensorflow2')

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predict

In [27]:
# Cria um objeto Checkpoint do TensorFlow
ckpt = tf.train.Checkpoint(model=tensorflow_model)

# Salva o modelo no formato .ckpt
ckpt_manager = tf.train.CheckpointManager(ckpt, 'Checkpoints_Tensorflow2', max_to_keep=1)
ckpt_manager.save()

'Checkpoints_Tensorflow\\ckpt-1'

<hr>

# Carregamento do modelo TF 2.x em formato .h5 e os Checkpoints

In [1]:
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
from transformers import BertConfig


# Carrega a configuração do modelo
config = BertConfig.from_json_file('Modelo_Tensorflow2/config.json')

# Inicialize o modelo com esta configuração
# Carrega o modelo
model = TFBertForSequenceClassification.from_pretrained('Modelo_Tensorflow2/tf_model.h5', config=config)

# Carrega o Tokenizer
tokenizer = BertTokenizer.from_pretrained('BertLarge')


C:\Users\msanto22\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll
C:\Users\msanto22\Anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at Modelo_Tensorflow/tf_model.h5 and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
# Crie um objeto tf.train.Checkpoint que gerenciará os checkpoints
ckpt = tf.train.Checkpoint(model=model)

# Caminho para o diretório do checkpoint
checkpoint_path = "Checkpoints_Tensorflow2/ckpt"

# Carrega os pesos do checkpoint
ckpt.restore(tf.train.latest_checkpoint(checkpoint_path)).expect_partial()

<tensorflow.python.checkpoint.checkpoint.InitializationOnlyStatus at 0x1a711993700>

In [15]:
print(ckpt)

<tensorflow.python.checkpoint.checkpoint.Checkpoint object at 0x000001A711AEC070>


In [49]:
# Exemplo de texto para fazer a inferência
input_text = "Exemplo de texto para teste."

# Tokeniza o texto
inputs = tokenizer(input_text, return_tensors="tf", truncation=True, max_length=512)

# Realiza a inferência
outputs = model(inputs)

# As saídas são logits, aplicar softmax para obter probabilidades
predictions = tf.nn.softmax(outputs.logits, axis=-1)

# Obter a classe com a maior probabilidade
predicted_class_idx = tf.argmax(predictions, axis=-1).numpy()[0]

print("Classe predita:", predicted_class_idx)

Classe predita: 0


In [50]:
predictions

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[0.5430834 , 0.45691657]], dtype=float32)>

<hr>

<hr>

# Recriar o modelo TensorFlow 2.x em Tensorflow 1.x


## 1. Salvar os Pesos do Modelo TensorFlow 2.x


In [53]:
import pickle

weights = []
for layer in model.layers:

    w = layer.get_weights()
    if len(w)>0:
      print(layer.name)
      weights.append(w)


with open('Bert-large_weights.pkl', 'wb') as f:
  pickle.dump(weights, f)  


bert
classifier


## 2. Recriar o Modelo e Carregar os Pesos no TensorFlow 1.x


In [None]:
# conda activate tf1.15

In [None]:
# Abrir o Jupyter Notebook a partir do env de TF 1.15.5

# python -m ipykernel install --user --name=tf1.15 --display-name="Python 3.6.13 (tf1.15)"

# jupyter notebook

In [1]:
import tensorflow as tf
tf.__version__

'1.15.5'

In [4]:
import os 
import tensorflow as tf
import pickle
from transformers import BertConfig, BertModel

In [5]:
from bert import modeling

In [6]:
# Carregando os pesos do arquivo pickle
with open('Bert-large_weights.pkl', 'rb') as f:
    weights = pickle.load(f)

In [7]:
# No env com TensorFlow 1.x instalado
# Recriando a arquitetura do modelo BERT Large no TensorFlow 1.x

# Caminho para a pasta do BERT
bert_dir = "bert_cased_L-24_H-1024_A-16"

# Carregar a configuração do BERT
bert_config = modeling.BertConfig.from_json_file(os.path.join(bert_dir, "bert_config.json"))

In [5]:
# Criar a arquitetura do modelo BERT
input_ids = tf.placeholder(tf.int32, [None, None])
input_mask = tf.placeholder(tf.int32, [None, None])
token_type_ids = tf.placeholder(tf.int32, [None, None])

model = modeling.BertModel(
    config=bert_config,
    is_training=False,
    input_ids=input_ids,
    input_mask=input_mask,
    token_type_ids=token_type_ids
)

# Inicialize uma sessão TensorFlow
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# Obter todas as variáveis treináveis do modelo
tvars = tf.trainable_variables()

# Mapear e carregar os pesos
for var, layer_weights in zip(tvars, weights):
    # Verificar se o peso é uma lista com dois elementos (pesos e vieses) ou um único tensor de pesos
    if isinstance(layer_weights, list) and len(layer_weights) == 2:
        # Atribuir pesos e vieses, se ambos estiverem presentes e as formas corresponderem
        if var.shape.as_list() == layer_weights[0].shape and var.shape.as_list() == layer_weights[1].shape:
            sess.run(var.assign(layer_weights[0]))
    elif isinstance(layer_weights, list) and len(layer_weights) == 1:
        # Atribuir apenas pesos, se apenas um tensor de pesos estiver presente e as formas corresponderem
        if var.shape.as_list() == layer_weights[0].shape:
            sess.run(var.assign(layer_weights[0]))

Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.


In [6]:
# Imprimir os valores de algumas variáveis
for var in tvars[:5]:  # Ajuste este número para ver mais variáveis
    print(var.name, sess.run(var))


bert/embeddings/word_embeddings:0 [[-0.00908107  0.01854071 -0.03444754 ... -0.01280272 -0.00804984
   0.01674145]
 [-0.01491163 -0.00931816  0.02172526 ...  0.00909788 -0.02419336
   0.02402307]
 [ 0.01435304  0.00655961  0.01152861 ...  0.01686356  0.01133896
  -0.02278557]
 ...
 [-0.01476307 -0.01534108 -0.0136062  ...  0.01461376 -0.02968218
  -0.01352432]
 [ 0.01957903 -0.01744184 -0.01083578 ...  0.00355751  0.00789495
   0.01774943]
 [-0.00472241  0.00728642 -0.02555941 ... -0.02155046  0.01164685
   0.02980229]]
bert/embeddings/token_type_embeddings:0 [[ 0.01508123 -0.01289781 -0.00329514 ... -0.02437756  0.0068311
   0.03512117]
 [-0.01824484  0.03870416  0.02496896 ...  0.02759647  0.02609741
   0.00990113]]
bert/embeddings/position_embeddings:0 [[-0.01041236 -0.02646815  0.00281126 ... -0.01940365 -0.00284836
   0.0181841 ]
 [ 0.01152825  0.00357219  0.02551587 ... -0.0245837  -0.0064114
   0.00420156]
 [-0.01283735  0.01320586  0.02625499 ...  0.01980915 -0.00878631
  -0.02

In [52]:
# Caminho onde o modelo será salvo
save_path = "BertLarge_tf1/model.ckpt"

# Criar um objeto Saver
saver = tf.train.Saver()

# Salvar o modelo
saver.save(sess, save_path)


'BBBertLarge_tf1/model.ckpt'

In [7]:
# Caminho onde o modelo será salvo
save_path = "BertLarge_tf1_v2/model.ckpt"

# Criar um objeto Saver
saver = tf.train.Saver()

# Salvar o modelo
saved_path = saver.save(sess, save_path)

print("Modelo salvo em: {}".format(saved_path))


Modelo salvo em: BBBertLarge_tf1_v2/model.ckpt


In [8]:
import tensorflow as tf

# Supondo que 'tvars' contém as variáveis treináveis do modelo
tvars = tf.trainable_variables()

# Criar um objeto Saver para salvar apenas as variáveis treináveis
saver = tf.train.Saver(tvars)

# Caminho para salvar os pesos do modelo
save_path = "BertLarge_tf1_v3/bert.ckpt"

# Salvar os pesos do modelo
saved_path = saver.save(sess, save_path)

print("Pesos do modelo salvos em: {}".format(saved_path))


Pesos do modelo salvos em: BBBertLarge_tf1_v3/bert.ckpt


In [7]:
# Get the variables of the model
variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
variables

[<tf.Variable 'bert/embeddings/word_embeddings:0' shape=(28996, 1024) dtype=float32_ref>,
 <tf.Variable 'bert/embeddings/token_type_embeddings:0' shape=(2, 1024) dtype=float32_ref>,
 <tf.Variable 'bert/embeddings/position_embeddings:0' shape=(512, 1024) dtype=float32_ref>,
 <tf.Variable 'bert/embeddings/LayerNorm/beta:0' shape=(1024,) dtype=float32_ref>,
 <tf.Variable 'bert/embeddings/LayerNorm/gamma:0' shape=(1024,) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/query/kernel:0' shape=(1024, 1024) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/query/bias:0' shape=(1024,) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/key/kernel:0' shape=(1024, 1024) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/key/bias:0' shape=(1024,) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/value/kernel:0' shape=(1024, 1024) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/s

In [10]:
import tensorflow as tf

# Create a saver object
saver = tf.train.Saver()

# Restore the checkpoint
with tf.Session() as sess:
    saver.restore(sess, "BertLarge_tf1_v2/model.ckpt")
    
    # Print the shapes and embeddings of the variables
    for var in variables:
        print(var.name, var.shape)
    if "embedding" in var.name:
        print(sess.run(var))

INFO:tensorflow:Restoring parameters from BBBertLarge_tf1_v2/model.ckpt
bert/embeddings/word_embeddings:0 (28996, 1024)
bert/embeddings/token_type_embeddings:0 (2, 1024)
bert/embeddings/position_embeddings:0 (512, 1024)
bert/embeddings/LayerNorm/beta:0 (1024,)
bert/embeddings/LayerNorm/gamma:0 (1024,)
bert/encoder/layer_0/attention/self/query/kernel:0 (1024, 1024)
bert/encoder/layer_0/attention/self/query/bias:0 (1024,)
bert/encoder/layer_0/attention/self/key/kernel:0 (1024, 1024)
bert/encoder/layer_0/attention/self/key/bias:0 (1024,)
bert/encoder/layer_0/attention/self/value/kernel:0 (1024, 1024)
bert/encoder/layer_0/attention/self/value/bias:0 (1024,)
bert/encoder/layer_0/attention/output/dense/kernel:0 (1024, 1024)
bert/encoder/layer_0/attention/output/dense/bias:0 (1024,)
bert/encoder/layer_0/attention/output/LayerNorm/beta:0 (1024,)
bert/encoder/layer_0/attention/output/LayerNorm/gamma:0 (1024,)
bert/encoder/layer_0/intermediate/dense/kernel:0 (1024, 4096)
bert/encoder/layer_0/int

In [30]:
# Get the variables of the model
variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
variables

[<tf.Variable 'bert/embeddings/word_embeddings:0' shape=(28996, 1024) dtype=float32_ref>,
 <tf.Variable 'bert/embeddings/token_type_embeddings:0' shape=(2, 1024) dtype=float32_ref>,
 <tf.Variable 'bert/embeddings/position_embeddings:0' shape=(512, 1024) dtype=float32_ref>,
 <tf.Variable 'bert/embeddings/LayerNorm/beta:0' shape=(1024,) dtype=float32_ref>,
 <tf.Variable 'bert/embeddings/LayerNorm/gamma:0' shape=(1024,) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/query/kernel:0' shape=(1024, 1024) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/query/bias:0' shape=(1024,) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/key/kernel:0' shape=(1024, 1024) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/key/bias:0' shape=(1024,) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/self/value/kernel:0' shape=(1024, 1024) dtype=float32_ref>,
 <tf.Variable 'bert/encoder/layer_0/attention/s

In [None]:
# OUTRA CONSTRUÇÃO, COM A TRANSPOSIÇÃO

In [48]:
import tensorflow as tf

# Reiniciar o grafo TensorFlow para evitar conflitos de variáveis
tf.reset_default_graph()


In [8]:
import tensorflow as tf
from tensorflow.python.training import saver as saver_lib
from bert import modeling

# Supondo que 'bert_config' e 'weights' são carregados de acordo com suas necessidades

# Criar a arquitetura do modelo BERT
input_ids = tf.placeholder(tf.int32, [None, None])
input_mask = tf.placeholder(tf.int32, [None, None])
token_type_ids = tf.placeholder(tf.int32, [None, None])

model = modeling.BertModel(
    config=bert_config,
    is_training=False,
    input_ids=input_ids,
    input_mask=input_mask,
    token_type_ids=token_type_ids
)

# Inicialize uma sessão TensorFlow
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# Obter todas as variáveis treináveis do modelo
tvars = tf.trainable_variables()

# Mapear e carregar os pesos
for var, layer_weights in zip(tvars, weights):
    # Verificar se o peso é uma lista com dois elementos (pesos e vieses) ou um único tensor de pesos
    if isinstance(layer_weights, list):
        # Atribuir pesos e vieses
        if len(layer_weights) == 2:
            # Ajustar a forma dos pesos, se necessário
            if var.shape.as_list() == [i for i in reversed(layer_weights[0].shape)]:
                sess.run(var.assign(tf.transpose(layer_weights[0])))
            else:
                sess.run(var.assign(layer_weights[0]))
        elif len(layer_weights) == 1:
            # Atribuir apenas os pesos
            # Ajustar a forma dos pesos, se necessário
            if var.shape.as_list() == [i for i in reversed(layer_weights[0].shape)]:
                sess.run(var.assign(tf.transpose(layer_weights[0])))
            else:
                sess.run(var.assign(layer_weights[0]))


Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.


In [9]:
all_vars = tf.global_variables()
for var in all_vars:
    print(var.name, var.shape)


bert/embeddings/word_embeddings:0 (29794, 1024)
bert/embeddings/token_type_embeddings:0 (2, 1024)
bert/embeddings/position_embeddings:0 (512, 1024)
bert/embeddings/LayerNorm/beta:0 (1024,)
bert/embeddings/LayerNorm/gamma:0 (1024,)
bert/encoder/layer_0/attention/self/query/kernel:0 (1024, 1024)
bert/encoder/layer_0/attention/self/query/bias:0 (1024,)
bert/encoder/layer_0/attention/self/key/kernel:0 (1024, 1024)
bert/encoder/layer_0/attention/self/key/bias:0 (1024,)
bert/encoder/layer_0/attention/self/value/kernel:0 (1024, 1024)
bert/encoder/layer_0/attention/self/value/bias:0 (1024,)
bert/encoder/layer_0/attention/output/dense/kernel:0 (1024, 1024)
bert/encoder/layer_0/attention/output/dense/bias:0 (1024,)
bert/encoder/layer_0/attention/output/LayerNorm/beta:0 (1024,)
bert/encoder/layer_0/attention/output/LayerNorm/gamma:0 (1024,)
bert/encoder/layer_0/intermediate/dense/kernel:0 (1024, 4096)
bert/encoder/layer_0/intermediate/dense/bias:0 (4096,)
bert/encoder/layer_0/output/dense/kernel:

In [10]:
saver = tf.train.Saver()
saver.save(sess, 'BertLarge_tf1_v4/model.ckpt')


'BBBertLarge_tf1_v4/model.ckpt'

<hr>

<hr>

### SUCESSO AO IMPRIMIR A SAÍDA DO MODELO

In [64]:
import tensorflow as tf

# Reiniciar o grafo TensorFlow para evitar conflitos de variáveis
tf.reset_default_graph()

In [66]:
import tensorflow as tf
from bert import modeling, tokenization
import os

In [67]:
# No env com TensorFlow 1.x instalado
# Recriando a arquitetura do modelo BERT Large no TensorFlow 1.x

# Caminho para a pasta do BERT
bert_dir = "BertLarge_tf1_v4"

# Carregar a configuração do BERT
bert_config = modeling.BertConfig.from_json_file(os.path.join(bert_dir, "config.json"))

In [68]:
# Reconstruir o grafo do modelo BERT
input_ids = tf.placeholder(tf.int32, [None, None])
input_mask = tf.placeholder(tf.int32, [None, None])
token_type_ids = tf.placeholder(tf.int32, [None, None])

model = modeling.BertModel(
    config=bert_config,
    is_training=False,
    input_ids=input_ids,
    input_mask=input_mask,
    token_type_ids=token_type_ids
)

# Restaurar os pesos do modelo
saver = tf.train.Saver()
checkpoint_path = "BertLarge_tf1_v4/model.ckpt"

with tf.Session() as sess:
    # Restaurar os pesos
    saver.restore(sess, checkpoint_path)
    
    
    bert_output = sess.run(model.get_pooled_output(), feed_dict={
        input_ids: input_ids_np,   # Chaves são os objetos tf.placeholder
        input_mask: input_mask_np,
        token_type_ids: segment_ids_np
    })

# Imprimir a saída do modelo
print("BERT output:", bert_output)

INFO:tensorflow:Restoring parameters from BBBertLarge_tf1_v4/model.ckpt
BERT output: [[-0.59119755 -0.8866554  -0.02629808 ...  0.30711493  0.4135409
  -0.7138794 ]]


Reconstrução do Grafo do Modelo BERT: Primeiro, foi feita reconstrução da arquitetura do modelo BERT utilizando TensorFlow. Isso é feito definindo placeholders para input_ids, input_mask e token_type_ids, que são estruturas de dados padrão do TensorFlow usadas para alimentar dados para um modelo. Depois, criação de uma instância do modelo BERT (model) com esses placeholders como entrada.

Restauração dos Pesos do Modelo: Uso de um objeto Saver do TensorFlow para carregar os pesos do modelo a partir de um checkpoint (checkpoint_path). Isso significa que o estado treinado do modelo BERT, incluindo todos os pesos e bias aprendidos durante o treinamento, foram carregados na sessão TensorFlow atual.

Execução do Modelo e Obtenção da Saída: Com a sessão TensorFlow ativa, foram fornecidos os dados de entrada (convertidos para arrays NumPy) ao modelo e executado o método get_pooled_output(). Este método retorna a saída da última camada do modelo BERT, que é um vetor de características condensadas (ou embeddings) para a entrada fornecida.

>>>>
Saída do Modelo BERT: O resultado impresso, que parece ser um array de números reais, é a representação vetorial da entrada fornecida no espaço de características do modelo BERT. Cada número neste vetor é um componente da representação e é usado para tarefas de processamento de linguagem natural, como classificação de texto, análise de sentimento, reconhecimento de entidades nomeadas, etc.

Essencialmente, a saída é a transformação de um texto de entrada em um vetor denso de recursos que encapsula informações semânticas e contextuais aprendidas pelo modelo BERT durante seu treinamento. Este vetor pode então ser utilizado em várias tarefas de NLP para realizar inferências baseadas no conteúdo e no contexto do texto.

In [74]:
# Modelo Bert

def list_variables_in_checkpoint(checkpoint_path):
    try:
        # Carregar o checkpoint
        reader = tf.train.load_checkpoint(checkpoint_path)
        
        # Listar todas as chaves (nomes dos tensors)
        return reader.get_variable_to_dtype_map().keys()
    except Exception as e:  
        print("Erro ao carregar o checkpoint:", e)
        return []


checkpoint_path = 'BertLarge_tf1_v4'
variables = list_variables_in_checkpoint(checkpoint_path)
for var in variables:
    print(var)


bert/encoder/layer_0/output/dense/kernel
bert/encoder/layer_0/intermediate/dense/kernel
bert/embeddings/LayerNorm/beta
bert/encoder/layer_9/attention/self/key/bias
bert/encoder/layer_18/attention/self/value/bias
bert/encoder/layer_11/attention/output/dense/kernel
bert/encoder/layer_16/attention/self/key/kernel
bert/embeddings/LayerNorm/gamma
bert/embeddings/position_embeddings
bert/encoder/layer_5/intermediate/dense/bias
bert/encoder/layer_2/attention/output/LayerNorm/gamma
bert/encoder/layer_11/attention/self/query/bias
bert/encoder/layer_23/intermediate/dense/bias
bert/encoder/layer_1/attention/self/key/kernel
bert/encoder/layer_5/output/dense/bias
bert/embeddings/token_type_embeddings
bert/encoder/layer_10/intermediate/dense/kernel
bert/embeddings/word_embeddings
bert/encoder/layer_17/attention/self/value/bias
bert/encoder/layer_0/attention/output/LayerNorm/beta
bert/encoder/layer_0/attention/output/dense/bias
bert/encoder/layer_13/attention/output/LayerNorm/gamma
bert/encoder/layer

In [75]:
# Modelo original bert_cased_L-24_H-1024_A-16 (pré-BERTimbau)

def list_variables_in_checkpoint(checkpoint_path):
    try:
        # Carregar o checkpoint
        reader = tf.train.load_checkpoint(checkpoint_path)
        
        # Listar todas as chaves (nomes dos tensors)
        return reader.get_variable_to_dtype_map().keys()
    except Exception as e:  
        print("Erro ao carregar o checkpoint:", e)
        return []


checkpoint_path = 'bert_cased_L-24_H-1024_A-16/bert_model.ckpt'
variables = list_variables_in_checkpoint(checkpoint_path)
for var in variables:
    print(var)


bert/encoder/layer_0/output/dense/kernel
bert/encoder/layer_0/intermediate/dense/kernel
bert/embeddings/LayerNorm/beta
bert/encoder/layer_9/attention/self/key/bias
bert/encoder/layer_18/attention/self/value/bias
bert/encoder/layer_11/attention/output/dense/kernel
bert/encoder/layer_16/attention/self/key/kernel
bert/embeddings/LayerNorm/gamma
bert/embeddings/position_embeddings
bert/encoder/layer_5/intermediate/dense/bias
bert/encoder/layer_2/attention/output/LayerNorm/gamma
bert/encoder/layer_11/attention/self/query/bias
bert/encoder/layer_23/intermediate/dense/bias
bert/encoder/layer_1/attention/self/key/kernel
bert/encoder/layer_5/output/dense/bias
bert/embeddings/token_type_embeddings
bert/encoder/layer_10/intermediate/dense/kernel
bert/embeddings/word_embeddings
bert/encoder/layer_17/attention/self/value/bias
bert/encoder/layer_0/attention/output/LayerNorm/beta
bert/encoder/layer_0/attention/output/dense/bias
bert/encoder/layer_13/attention/output/LayerNorm/gamma
bert/encoder/layer