In [1]:
import os
path = 'C:/pytest/'
os.chdir(path)

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [5]:
from tqdm import tqdm
from transformers import TFBertModel, BertTokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

import matplotlib.pyplot as plt

In [44]:
import warnings
warnings.filterwarnings('ignore')

In [6]:
def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_'+string], '')
    plt.xlabel('Epochs')
    plt.ylabel(string)
    plt.legend([string, 'val_'+string])
    plt.show()

In [7]:
tf.random.set_seed(1111)
np.random.seed(1111)

In [8]:
# BERT는 일반 딥러닝과 다르게 TRANSFORMER의 구조를 가짐
# 0과 1로 해석해야 하는 각각의 상황에 대한 가중치 값을 가짐
# 두 값에 대한 가중치 모두를 가져와야 함
CLASS_NUMBER = 2 
BATCH_SIZE = 32
NUM_EPOCHS = 2
VALID_SPLIT = 0.2
MAX_LEN = 40
BERT_CKPT = 'C:/pytest/data/KOR/BERT/bert_ckpt/'
DATA_IN_PATH = 'C:/pytest/data/KOR/naver_movie/data_in/'
DATA_OUT_PATH = 'C:/pytest/data/KOR/BERT/data_out/'

In [21]:
def listToString(listdata):
    result = 'id\tdocument\tlabel\n'
    for data_each in listdata:
        if data_each:
            result += data_each[0] + '\t' + data_each[1] +'\t'+ data_each[2] + '\n'
    return result

def read_data(filename, encoding = 'cp949', start = 0):
    with open(filename, 'r', encoding= encoding) as f:
        data = [line.split('\t') for line in f.read().splitlines()]
        data = data[start :]
    return data

def write_data(data, filename, encoding = 'cp949'):
    with open(filename, 'w', encoding= encoding) as f:
        f.write(data)

In [22]:
data_ratings = read_data(os.path.join(DATA_IN_PATH, 'ratings_utf8_small.txt'), encoding = 'utf-8', start = 1)

In [23]:
from sklearn.model_selection import train_test_split
ratings_train, ratings_test = train_test_split(data_ratings)

In [24]:
ratings_train = listToString(ratings_train)
ratings_test = listToString(ratings_test)

In [25]:
write_data(ratings_train, os.path.join(DATA_IN_PATH,'ratings_train.txt'), encoding = 'utf-8')
write_data(ratings_test, os.path.join(DATA_IN_PATH,'ratings_test.txt'), encoding = 'utf-8')

In [30]:
DATA_TRAIN_PATH = os.path.join(DATA_IN_PATH, 'ratings_train.txt')
DATA_TEST_PATH = os.path.join(DATA_IN_PATH, 'ratings_test.txt')

# quoting = 3 본문 안에 있는 따옴표 처리
train_data = pd.read_csv(DATA_TRAIN_PATH, header=0, delimiter= '\t', quoting= 3)
train_data = train_data.dropna()
train_data.head()

Unnamed: 0,id,document,label
0,1364440,내 인생의 영화,1
1,7216894,각본은 인터넷 찌라시 수준 연기는 허접 배우들 얼굴은 부담,0
2,9461185,제가 좋아하는영화중 진짜찐짜좋아하는영화... :),1
3,9738357,솔직히..땡칠이는 너무 날로 먹는거 아냐?,0
4,9932738,좋은 영화. 다만 엔딩에서 주인공이 배심원들을 상대로 늘어놓는 일장연설이 너무 작위...,1


In [26]:
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased',
                                         cache_dir = os.path.join(BERT_CKPT, 'tokenizer'), do_lower_case = False)

Downloading:   0%|          | 0.00/996k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

In [27]:
import pickle
with open(DATA_OUT_PATH+'bert_tokenizer.pickle', 'wb') as file:
    pickle.dump(tokenizer, file, protocol= pickle.HIGHEST_PROTOCOL)

In [28]:
test_sentence = '안녕하세요, 반갑습니다.'

encode = tokenizer.encode(test_sentence)
token_print = [tokenizer.decode(token) for token in encode]

print(encode)
print(token_print)

[101, 9521, 118741, 35506, 24982, 48549, 117, 9321, 118610, 119081, 48345, 119, 102]
['[ C L S ]', '안', '# # 녕', '# # 하', '# # 세', '# # 요', ',', '반', '# # 갑', '# # 습', '# # 니 다', '.', '[ S E P ]']


In [29]:
# BERT 토크나이저 연습 2
kor_encode = tokenizer.encode('안녕하세요, 반갑습니다')
eng_encode = tokenizer.encode('Hello world')
kor_decode = tokenizer.decode(kor_encode)
eng_decode = tokenizer.decode(eng_encode)

print(kor_encode,kor_decode, eng_encode, eng_decode,sep ='\n')

[101, 9521, 118741, 35506, 24982, 48549, 117, 9321, 118610, 119081, 48345, 102]
[CLS] 안녕하세요, 반갑습니다 [SEP]
[101, 31178, 11356, 102]
[CLS] Hello world [SEP]


In [31]:
print(tokenizer.all_special_tokens)
# [UNK] : unkown 모르는 토큰에 대해서도 처리 하겠다

['[UNK]', '[SEP]', '[PAD]', '[CLS]', '[MASK]']


In [32]:
# 사용자 정의 BERT 토크나이저 함수
def bert_tokenizer(sent, MAX_LEN):
    encoded_dict = tokenizer.encode_plus(
        text= sent,
        add_special_tokens = True,
        max_length = MAX_LEN,
        padding = 'max_length', # 패딩 적용에 사용할 문장의 최대 길이
        truncation = True,
        return_attention_mask = True
    )
    input_id = encoded_dict['input_ids'] # 각 토큰을 인덱스로 변환
    attention_mask = encoded_dict['attention_mask'] # 어텐션 마스크 생성
    # 문장이 1개일 경우 0 , 2개일 경우 0과 1로 구분하여 생성
    token_type_id = encoded_dict['token_type_ids']
    # padding 0 이 있는 부분은 0으로 지정하여 이 부분에는 어텐션이 일어나지 않게 함
    # token_type_id : 문장이 2개일 경우 Answer 부분은 0, Question 부분은 1로 구분
    # 현재는 모든 문장이 0만 가지고 있음
    return input_id, attention_mask, token_type_id

In [33]:
# 훈련 데이터 변환
input_ids = []; attention_masks = []; token_type_ids = []; train_data_labels = []
for train_sent, train_label in tqdm(zip(train_data['document'], train_data['label']), total = len(train_data)):
    try:
        input_id, attention_mask, token_type_id = bert_tokenizer(train_sent, MAX_LEN)
        
        # 문장별 처리
        input_ids.append(input_id)
        attention_masks.append(attention_mask)
        token_type_ids.append(token_type_id)
        train_data_labels.append(train_label)
    except Exception as e:
        print(e)
        print(train_sent)
        pass

100%|██████████████████████████████████████████████████████████████████████████████| 375/375 [00:00<00:00, 3917.38it/s]


In [34]:
train_movie_input_ids = np.array(input_ids, dtype = int)
train_movie_attention_masks = np.array(attention_masks, dtype = int)
train_movie_type_ids = np.array(token_type_ids, dtype = int)
train_movie_inputs = (train_movie_input_ids, train_movie_attention_masks, train_movie_type_ids)

train_data_labels = np.asarray(train_data_labels, dtype = np.int32)
print(len(train_movie_input_ids), len(train_data_labels))

375 375


In [35]:
input_id = train_movie_input_ids[1] # 인덱스로 변환된 토큰
attention_mask = train_movie_attention_masks[1] # 어떤 단어(토큰)을 어텐션 시킬지 여부
token_type_id = train_movie_type_ids[1] # 현재는 모든 문장이 0

print(input_id)
print(attention_mask)
# padding 0이 들어간 부분만 attention_mask에서 0이 입력되어
# 이 부분에는 어텐션이 일어나지 않게 함
print(token_type_id)
print(tokenizer.decode(input_id))

[   101   8844  40419  10892   9640  21876  82881   9727  17342  14040
   9460  54867   9568  46216   9968 119205  84703  27023   9551 118654
  10892   9365 105462    102      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0]
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0]
[CLS] 각본은 인터넷 찌라시 수준 연기는 허접 배우들 얼굴은 부담 [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]


In [37]:
# BERT 분류기 사용자 정의
class TFBertClassifier(tf.keras.Model):
    '''
        outputs[0]~[3]의 내용:
        0: sequence output = 각 문장의 토큰의 은닉상태값. 입력 문장의 각 토큰에 대한 은닉상태(last_hidden_state)
        1: pooler_output = 전체 문장의 은닉상태값. 활성화 함수 적용되어 있음
        2: hidden_states = optional. 모델의 각 층과 초기 임베딩 결과에 대한 은닉상태 값
        3: attentions = optional. 어텐션 가중치
    '''
    def __init__(self, model_name, dir_path, num_class):
        super().__init__()
        self.bert = TFBertModel.from_pretrained(model_name, cache_dir=  dir_path)
        self.dropout = tf.keras.layers.Dropout(self.bert.config.hidden_dropout_prob)
        # 활성화 함수를 적용하지 않으면 선형으로 결과 출력
        # call 함수에 에서 bert의 반환 값인 outputs를 인덱스 [1]번을 사용하면 활성화 함수 사용하지 않아도 됨
        # 출력층
        self.classifier = tf.keras.layers.Dense(num_class, name = 'classifier', activation = 'softmax',
        # Dense층의 초깃값 생성
        kernel_initializer = tf.keras.initializers.TruncatedNormal(self.bert.config.initializer_range))
        
    def call(self, inputs, attention_mask = None, token_type_ids = None, training = False):
        # outputs 값 : sequence_output, pooler_output, (hidden_states), (attentions)
        outputs = self.bert(inputs, attention_mask = attention_mask, token_type_ids = token_type_ids)
        pooler_output = outputs[1]
        pooler_output = self.dropout(pooler_output
                                     #, training = training
                                    )
        logits = self.classifier(pooler_output)
        return logits

In [39]:
cls_model = TFBertClassifier(model_name = 'bert-base-multilingual-cased', dir_path = os.path.join(BERT_CKPT, 'model'), num_class = CLASS_NUMBER)

Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading:   0%|          | 0.00/1.08G [00:00<?, ?B/s]

Some layers from the model checkpoint at bert-base-multilingual-cased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-multilingual-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [40]:
optimizer = tf.keras.optimizers.Adam(3e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
cls_model.compile(optimizer=optimizer, loss= loss, metrics = [metric])

In [42]:
model_name = 'tf2_bert'

earlystop_callback = EarlyStopping(monitor= 'val_accuracy', min_delta = 0.0001, patience = 2)

checkpoint_path = os.path.join(DATA_OUT_PATH, model_name, 'weights.h5')
checkpoint_dir = os.path.dirname(checkpoint_path)

if os.path.exists(checkpoint_dir):
    print('exists')
else:
    os.makedirs(checkpoint_dir, exist_ok= True)
    print('create')

cp_callback = ModelCheckpoint(checkpoint_path, monitor= 'val_accuracy', verbose =1, save_best_only= True, save_weights_only= True)

create


In [46]:
history = cls_model.fit(train_movie_inputs, train_data_labels, epochs= NUM_EPOCHS, batch_size = BATCH_SIZE,
                       validation_split = VALID_SPLIT, callbacks= [earlystop_callback, cp_callback])

Epoch 1/2


ResourceExhaustedError: Graph execution error:

Detected at node 'Adam/Adam/update/mul_1' defined at (most recent call last):
    File "C:\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
      "__main__", mod_spec)
    File "C:\Anaconda3\lib\runpy.py", line 85, in _run_code
      exec(code, run_globals)
    File "C:\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Anaconda3\lib\site-packages\traitlets\config\application.py", line 978, in launch_instance
      app.start()
    File "C:\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "C:\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "C:\Anaconda3\lib\asyncio\base_events.py", line 534, in run_forever
      self._run_once()
    File "C:\Anaconda3\lib\asyncio\base_events.py", line 1771, in _run_once
      handle._run()
    File "C:\Anaconda3\lib\asyncio\events.py", line 88, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "C:\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 387, in do_execute
      cell_id=cell_id,
    File "C:\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2976, in run_cell
      raw_cell, store_history, silent, shell_futures, cell_id
    File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3030, in _run_cell
      return runner(coro)
    File "C:\Anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 78, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3258, in run_cell_async
      interactivity=interactivity, compiler=compiler, result=result)
    File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3473, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3553, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\광주인공지능사관학교\AppData\Local\Temp\ipykernel_27272\1895341038.py", line 2, in <module>
      validation_split = VALID_SPLIT, callbacks= [earlystop_callback, cp_callback])
    File "C:\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Anaconda3\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Anaconda3\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "C:\Anaconda3\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Anaconda3\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "C:\Anaconda3\lib\site-packages\keras\engine\training.py", line 997, in train_step
      self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "C:\Anaconda3\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 579, in minimize
      return self.apply_gradients(grads_and_vars, name=name)
    File "C:\Anaconda3\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 744, in apply_gradients
      name=name,
    File "C:\Anaconda3\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 801, in _distributed_apply
      group=False,
    File "C:\Anaconda3\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 771, in apply_grad_to_update_var
      grad.values, var, grad.indices, **apply_kwargs
    File "C:\Anaconda3\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 1454, in _resource_apply_sparse_duplicate_indices
      summed_grad, handle, unique_indices, **kwargs
    File "C:\Anaconda3\lib\site-packages\keras\optimizers\optimizer_v2\adam.py", line 217, in _resource_apply_sparse
      m, m * coefficients["beta_1_t"], use_locking=self._use_locking
Node: 'Adam/Adam/update/mul_1'
Detected at node 'Adam/Adam/update/mul_1' defined at (most recent call last):
    File "C:\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
      "__main__", mod_spec)
    File "C:\Anaconda3\lib\runpy.py", line 85, in _run_code
      exec(code, run_globals)
    File "C:\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Anaconda3\lib\site-packages\traitlets\config\application.py", line 978, in launch_instance
      app.start()
    File "C:\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "C:\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "C:\Anaconda3\lib\asyncio\base_events.py", line 534, in run_forever
      self._run_once()
    File "C:\Anaconda3\lib\asyncio\base_events.py", line 1771, in _run_once
      handle._run()
    File "C:\Anaconda3\lib\asyncio\events.py", line 88, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "C:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "C:\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 387, in do_execute
      cell_id=cell_id,
    File "C:\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2976, in run_cell
      raw_cell, store_history, silent, shell_futures, cell_id
    File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3030, in _run_cell
      return runner(coro)
    File "C:\Anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 78, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3258, in run_cell_async
      interactivity=interactivity, compiler=compiler, result=result)
    File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3473, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3553, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\광주인공지능사관학교\AppData\Local\Temp\ipykernel_27272\1895341038.py", line 2, in <module>
      validation_split = VALID_SPLIT, callbacks= [earlystop_callback, cp_callback])
    File "C:\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Anaconda3\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Anaconda3\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "C:\Anaconda3\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Anaconda3\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "C:\Anaconda3\lib\site-packages\keras\engine\training.py", line 997, in train_step
      self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "C:\Anaconda3\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 579, in minimize
      return self.apply_gradients(grads_and_vars, name=name)
    File "C:\Anaconda3\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 744, in apply_gradients
      name=name,
    File "C:\Anaconda3\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 801, in _distributed_apply
      group=False,
    File "C:\Anaconda3\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 771, in apply_grad_to_update_var
      grad.values, var, grad.indices, **apply_kwargs
    File "C:\Anaconda3\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 1454, in _resource_apply_sparse_duplicate_indices
      summed_grad, handle, unique_indices, **kwargs
    File "C:\Anaconda3\lib\site-packages\keras\optimizers\optimizer_v2\adam.py", line 217, in _resource_apply_sparse
      m, m * coefficients["beta_1_t"], use_locking=self._use_locking
Node: 'Adam/Adam/update/mul_1'
2 root error(s) found.
  (0) RESOURCE_EXHAUSTED:  failed to allocate memory
	 [[{{node Adam/Adam/update/mul_1}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

	 [[gradient_tape/tf_bert_classifier/tf_bert_model/bert/encoder/layer_._7/attention/output/LayerNorm/batchnorm/mul/Shape_1/_324]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

  (1) RESOURCE_EXHAUSTED:  failed to allocate memory
	 [[{{node Adam/Adam/update/mul_1}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_24727]