In [2]:
import os
import tensorflow as tf

tf.config.set_visible_devices([], 'GPU')

if tf.test.gpu_device_name():
    print('GPU found')
else:
    print("No GPU found")

from typing import Union

import numpy as np
import pandas as pd

import transformers
from sklearn.preprocessing import LabelEncoder

from modelling.models import TextProductMatch
from sklearn.model_selection import KFold
from transformers import BertTokenizer, TFBertModel

import os

params = {
    "N_CLASSES": 11014,
    "MAX_LEN": 20,
    "MODEL_NAME": 'bert-base-multilingual-uncased',
    "POOLING": "global_avg_1d",
    "EPOCHS": 5,
    "BATCH_SIZE": 16,
    "METRIC": "adacos"
}
PATH_NAME = 'saved/arcface/v1'
os.makedirs(PATH_NAME,exist_ok=True)

GPU found


In [76]:
from modelling.metrics import *
from modelling.pooling import *

In [77]:
config = transformers.BertConfig.from_pretrained(params["MODEL_NAME"])
config.output_hidden_states = True
word_model = transformers.TFAutoModel.from_pretrained(params["MODEL_NAME"],config=config)
tokenizer = transformers.AutoTokenizer.from_pretrained(params["MODEL_NAME"])

Some layers from the model checkpoint at bert-base-multilingual-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-multilingual-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [78]:
params = {
    "N_CLASSES": 11014,
    "MAX_LEN": 70,
    "MODEL_NAME": 'bert-base-multilingual-uncased',
    "POOLING": "global_avg_1d",
    "EPOCHS": 5,
    "BATCH_SIZE": 32,
    "METRIC": "adacos",
    "LAST_HIDDEN_STATES": 3 
}

In [116]:
ids = tf.keras.layers.Input((params["MAX_LEN"],), dtype=tf.int32)
att = tf.keras.layers.Input((params["MAX_LEN"],), dtype=tf.int32)
tok = tf.keras.layers.Input((params["MAX_LEN"],), dtype=tf.int32)

labels_onehot = tf.keras.layers.Input(shape=(params["N_CLASSES"]), dtype=tf.int32)

x = word_model(ids, attention_mask=att, token_type_ids=tok)[-1]
x1 = tf.concat(tuple(x[-i-1] for i in range(params["LAST_HIDDEN_STATES"])),axis=-1)



In [117]:
x2_mean = tf.reduce_mean(x1,axis=1)
x2_max = tf.reduce_max(x1,axis=1)
x3 = tf.concat([x2_mean, x2_max],axis=1)

In [118]:
x3

<tf.Tensor 'concat_27:0' shape=(None, 4608) dtype=float32>

In [15]:
labels = [1,2,3,1,3,2,3]

In [16]:
tf.eye(len(labels),dtype=tf.bool)

<tf.Tensor: shape=(7, 7), dtype=bool, numpy=
array([[ True, False, False, False, False, False, False],
       [False,  True, False, False, False, False, False],
       [False, False,  True, False, False, False, False],
       [False, False, False,  True, False, False, False],
       [False, False, False, False,  True, False, False],
       [False, False, False, False, False,  True, False],
       [False, False, False, False, False, False,  True]])>

In [17]:
tf.logical_and(tf.logical_not(tf.eye(len(labels),dtype=tf.bool)),tf.equal(tf.expand_dims(labels, 0), tf.expand_dims(labels, 1)))

<tf.Tensor: shape=(7, 7), dtype=bool, numpy=
array([[False, False, False,  True, False, False, False],
       [False, False, False, False, False,  True, False],
       [False, False, False, False,  True, False,  True],
       [ True, False, False, False, False, False, False],
       [False, False,  True, False, False, False,  True],
       [False,  True, False, False, False, False, False],
       [False, False,  True, False,  True, False, False]])>