In [1]:
import os
import math
import numpy as np
import pandas as pd
from tqdm import tqdm
#from kaggle_datasets import KaggleDatasets
import tensorflow as tf
import tensorflow.keras.backend as K

In [2]:
import warnings
warnings.filterwarnings(action='ignore')

In [3]:
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is
    # set: this is always the case on Kaggle.
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
else:
    # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

REPLICAS:  1


In [4]:
BATCH_SIZE=32 * strategy.num_replicas_in_sync
MAX_LENGTH=270
PRE_TRAINED_NAME="bert-base-uncased"

In [None]:
GCS_PATH = KaggleDatasets().get_gcs_path('shop-512-size-tfrecords')

In [5]:
df=pd.read_csv("processed_data/fold_data.csv")
df.head()

Unnamed: 0,posting_id,image,image_phash,title,label_group,image_path,gfold
0,train_129225211,0000a68812bc7e98c42888dfb1c07da0.jpg,94974f937d4c2433,Paper Bag Victoria Secret,666,train_images\0000a68812bc7e98c42888dfb1c07da0.jpg,0
1,train_3386243561,00039780dfc94d01db8676fe789ecd05.jpg,af3f9460c2838f0f,"Double Tape 3M VHB 12 mm x 4,5 m ORIGINAL / DO...",7572,train_images\00039780dfc94d01db8676fe789ecd05.jpg,2
2,train_2288590299,000a190fdd715a2a36faed16e2c65df7.jpg,b94cb00ed3e50f78,Maling TTS Canned Pork Luncheon Meat 397 gr,6172,train_images\000a190fdd715a2a36faed16e2c65df7.jpg,0
3,train_2406599165,00117e4fc239b1b641ff08340b429633.jpg,8514fc58eafea283,Daster Batik Lengan pendek - Motif Acak / Camp...,10509,train_images\00117e4fc239b1b641ff08340b429633.jpg,1
4,train_3369186413,00136d1cf4edede0203f32f05f660588.jpg,a6f319f924ad708c,Nescafe \xc3\x89clair Latte 220ml,9425,train_images\00136d1cf4edede0203f32f05f660588.jpg,3


In [6]:
train_steps={}
valid_steps={}
for fold in range(5):
    valid_data=df.loc[df['gfold']==fold].reset_index(drop=True)
    valid_steps[fold]=valid_data.shape[0]
    train_steps[fold]=df.shape[0]-valid_data.shape[0]

In [7]:
from transformers import TFBertModel,TFRobertaModel,TFAlbertModel,TFXLNetModel
from transformers import BertTokenizer,RobertaTokenizer,AlbertTokenizer,XLNetTokenizer

In [8]:
TOKENIZER=BertTokenizer.from_pretrained("bert-base-uncased")

In [9]:
def MAKE_MODEL_INPUTS(data):
    INPUT_IDS=np.zeros((data.shape[0],MAX_LENGTH),dtype=np.int32)
    ATTENTION_MASK=np.zeros((data.shape[0],MAX_LENGTH),dtype=np.int32)
    TOKEN_TYPE_IDS=np.zeros((data.shape[0],MAX_LENGTH),dtype=np.int32)
    Y=np.zeros((data.shape[0],))
    n_iters=int(np.ceil(data.shape[0]/BATCH_SIZE))
    max_end=data.shape[0]
    for batch in tqdm(range(n_iters)):
        start=batch*BATCH_SIZE
        end=(batch+1)*BATCH_SIZE
        if end>max_end:
            end=max_end
        samples=data.iloc[start:end]['title'].values.tolist()
        tokenized_samples=TOKENIZER.batch_encode_plus(samples,max_length=MAX_LENGTH,padding='max_length',truncation='longest_first')
        INPUT_IDS[start:end,]=tokenized_samples['input_ids']
        ATTENTION_MASK[start:end,]=tokenized_samples['attention_mask']
        TOKEN_TYPE_IDS[start:end,]=tokenized_samples['token_type_ids']
        Y[start:end]=data.iloc[start:end]['label_group']
    MODEL_INPUTS=({"input_ids":INPUT_IDS,"attention_mask":ATTENTION_MASK,"token_type_ids":TOKEN_TYPE_IDS},Y)
    MODEL_OUTPUTS=Y
    return MODEL_INPUTS,MODEL_OUTPUTS

In [11]:
FOLDS_INPUTS={}
for i in range(5):
    train_data=df.loc[df['gfold']!=i].reset_index(drop=True)
    test_data=df.loc[df['gfold']==i].reset_index(drop=True)
    FOLDS_INPUTS[i]={}
    FOLDS_INPUTS[i]['train']=MAKE_MODEL_INPUTS(train_data)
    FOLDS_INPUTS[i]['test']=MAKE_MODEL_INPUTS(test_data)

100%|██████████| 857/857 [00:11<00:00, 77.89it/s]
100%|██████████| 215/215 [00:02<00:00, 76.33it/s]
100%|██████████| 857/857 [00:11<00:00, 71.46it/s]
100%|██████████| 215/215 [00:02<00:00, 75.87it/s]
100%|██████████| 857/857 [00:11<00:00, 76.54it/s]
100%|██████████| 215/215 [00:02<00:00, 76.75it/s]
100%|██████████| 857/857 [00:10<00:00, 79.48it/s]
100%|██████████| 215/215 [00:02<00:00, 72.85it/s]
100%|██████████| 857/857 [00:11<00:00, 71.66it/s]
100%|██████████| 215/215 [00:02<00:00, 82.13it/s]


In [13]:
from tensorflow.keras.layers import Input,Dense
from tensorflow.keras.models import Model

In [14]:
class ARCFACE_LAYER(tf.keras.layers.Layer):
    def __init__(self,m=0.5,s=30,n_classes=11014):
        super(ARCFACE_LAYER,self).__init__()
        self.m=m
        self.s=s
        self.sin_m=tf.sin(m)
        self.cos_m=tf.cos(m)
        self.n_classes=n_classes
        self.threshold = tf.cos(math.pi - m)
        self.mm = tf.math.sin(math.pi - m) * m

    def build(self,input_shape):
        prev_layer_units=input_shape[0][1]
        self.w=self.add_weight(shape=(prev_layer_units,self.n_classes),trainable=True,
                              initializer='glorot_uniform')

    def get_config(self):
        config=super().get_config()
        config.update({"m":0.5,
                       "s":30,
                       "n_classes":11014})
        return config


    def call(self,inputs):
        prev_layer,y=inputs
        y=tf.cast(y,dtype=tf.int32)
        y_hot=tf.one_hot(y,self.n_classes)
        y_hot=tf.cast(y_hot,dtype=tf.float32)
        w_norm=tf.linalg.l2_normalize(self.w,axis=0)
        x_norm=tf.linalg.l2_normalize(prev_layer,axis=1)
        cos_theta=tf.linalg.matmul(x_norm,w_norm)
        sin_theta=tf.sqrt(1-tf.pow(cos_theta,tf.cast(2,dtype=tf.float32)))
        cos_theta_m=(cos_theta*self.cos_m)-(sin_theta*self.sin_m)
        cos_theta_m=tf.where(cos_theta>self.threshold,cos_theta_m,cos_theta-self.mm)
        final=self.s*((y_hot*cos_theta_m)+((1-y_hot)*cos_theta))
        return final

In [15]:
for fold in range(5):
    TRAIN_INPUTS,TRAIN_OUTPUTS=FOLDS_INPUTS[fold]['train']
    TEST_INPUTS,TEST_OUTPUTS=FOLDS_INPUTS[fold]['test']
    NUM_TRAIN_STEPS=train_steps[fold]//BATCH_SIZE
    NUM_VALID_STEPS=valid_steps[fold]//BATCH_SIZE
    tf.keras.backend.clear_session()
    with strategy.scope():
        input_ids=Input((MAX_LENGTH,),dtype=tf.int32)
        attention_mask=Input((MAX_LENGTH,),dtype=tf.int32)
        token_type_ids=Input((MAX_LENGTH,),dtype=tf.int32)
        ins=Input((),name="label_input")
        pre_trained=TFBertModel.from_pretrained(PRE_TRAINED_NAME,output_hidden_states=True)
        pre_outputs=pre_trained({"input_ids":input_ids,"attention_mask":attention_mask,
                        "token_type_ids":token_type_ids})
        hidden_layers=[]
        k=0
        for i in reversed(range(len(pre_outputs['hidden_states']))):
            if k<4:
                hidden_layers.append(pre_outputs['hidden_states'][i])
                k+=1
            else:
                break
        x=tf.keras.layers.Concatenate()(hidden_layers)[:,0,:]
        #x=pre_outputs['pooler_output']
        x=Dense(512)(x)
        arc_layer=ARCFACE_LAYER()
        x=arc_layer([x,ins])
        outs=tf.keras.layers.Softmax()(x)
        model=Model(inputs=({"input_ids":input_ids,"attention_mask":attention_mask,
                        "token_type_ids":token_type_ids},ins),outputs=outs)
        print(f"training for fold {fold}")
        if fold==0:
            print(model.summary())
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
    early=tf.keras.callbacks.EarlyStopping(monitor="val_loss",mode="min",verbose=1,patience=10)
    saver=tf.keras.callbacks.ModelCheckpoint(filepath=PRE_TRAINED_NAME+f"{fold}.h5",
                                     monitor="val_loss",mode="min",save_best_only=True,
                                     save_weights_only=True,verbose=1)
    model.fit(TRAIN_INPUTS,TRAIN_OUTPUTS,
              validation_data=(TEST_INPUTS,TEST_OUTPUTS),epochs=40,
              callbacks=[early,saver],steps_per_epoch=NUM_TRAIN_STEPS,
             validation_steps=NUM_VALID_STEPS)
    print(f"model training for {fold} is done")
    del model
    import gc
    gc.collect()

Downloading: 100%|██████████| 511M/511M [01:53<00:00, 4.74MB/s]   
Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


training for fold 0
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 270)]        0           []                               
                                                                                                  
 input_1 (InputLayer)           [(None, 270)]        0           []                               
                                                                                                  
 input_3 (InputLayer)           [(None, 270)]        0           []                               
                                                                                                  
 tf_bert_model (TFBertModel)    TFBaseModelOutputWi  109482240   ['input_2[0][0]',                
                                thPoolingAndCrossAt               'input_1

ResourceExhaustedError: Graph execution error:

Detected at node 'model/tf_bert_model/bert/encoder/layer_._0/intermediate/dense/Tensordot/MatMul' defined at (most recent call last):
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\traitlets\config\application.py", line 976, in launch_instance
      app.start()
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\ipykernel\kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\ipykernel\ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_cell
      result = self._run_cell(
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\IPython\core\interactiveshell.py", line 2936, in _run_cell
      return runner(coro)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\IPython\core\interactiveshell.py", line 3135, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\IPython\core\interactiveshell.py", line 3338, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\IPython\core\interactiveshell.py", line 3398, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\ravi1\AppData\Local\Temp\ipykernel_7140\510268190.py", line 41, in <cell line: 1>
      model.fit(TRAIN_INPUTS,TRAIN_OUTPUTS,
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\training.py", line 1409, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\training.py", line 1051, in train_function
      return step_function(self, iterator)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\training.py", line 1040, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\training.py", line 1030, in run_step
      outputs = model.train_step(data)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\training.py", line 889, in train_step
      y_pred = self(x, training=True)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\training.py", line 490, in __call__
      return super().__call__(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\functional.py", line 458, in call
      return self._run_internal_graph(
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\functional.py", line 596, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\training.py", line 490, in __call__
      return super().__call__(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\transformers\modeling_tf_utils.py", line 1078, in run_call_with_unpacked_inputs
      arg_names = list(dict(inspect.signature(self.call).parameters).keys())
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 1109, in call
      outputs = self.bert(
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\transformers\modeling_tf_utils.py", line 1078, in run_call_with_unpacked_inputs
      arg_names = list(dict(inspect.signature(self.call).parameters).keys())
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 869, in call
      encoder_outputs = self.encoder(
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 554, in call
      for i, layer_module in enumerate(self.layer):
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 560, in call
      layer_outputs = layer_module(
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 516, in call
      intermediate_output = self.intermediate(hidden_states=attention_output)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 419, in call
      hidden_states = self.dense(inputs=hidden_states)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\ravi1\anaconda3\envs\restaurant\lib\site-packages\keras\layers\core\dense.py", line 224, in call
      outputs = tf.tensordot(inputs, self.kernel, [[rank - 1], [0]])
Node: 'model/tf_bert_model/bert/encoder/layer_._0/intermediate/dense/Tensordot/MatMul'
OOM when allocating tensor with shape[8910,3072] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node model/tf_bert_model/bert/encoder/layer_._0/intermediate/dense/Tensordot/MatMul}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_27770]