In [1]:
import pandas as pd
import numpy as np 
import os 
import cv2 as cv
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [2]:
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is
    # set: this is always the case on Kaggle.
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
else:
    # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

REPLICAS:  1


In [3]:
Batch_size = 8 * strategy.num_replicas_in_sync

In [4]:
model = tf.keras.models.load_model("../input/arcface-training-images/model")

In [5]:
test = pd.read_csv("../input/shopee-product-matching/train.csv")

In [6]:
images = "../input/shopee-product-matching/train_images"

In [7]:
lb = LabelEncoder()
test["encoded_label_group"] = lb.fit_transform(test["label_group"])

In [8]:
test.head()

Unnamed: 0,posting_id,image,image_phash,title,label_group,encoded_label_group
0,train_129225211,0000a68812bc7e98c42888dfb1c07da0.jpg,94974f937d4c2433,Paper Bag Victoria Secret,249114794,666
1,train_3386243561,00039780dfc94d01db8676fe789ecd05.jpg,af3f9460c2838f0f,"Double Tape 3M VHB 12 mm x 4,5 m ORIGINAL / DO...",2937985045,7572
2,train_2288590299,000a190fdd715a2a36faed16e2c65df7.jpg,b94cb00ed3e50f78,Maling TTS Canned Pork Luncheon Meat 397 gr,2395904891,6172
3,train_2406599165,00117e4fc239b1b641ff08340b429633.jpg,8514fc58eafea283,Daster Batik Lengan pendek - Motif Acak / Camp...,4093212188,10509
4,train_3369186413,00136d1cf4edede0203f32f05f660588.jpg,a6f319f924ad708c,Nescafe \xc3\x89clair Latte 220ml,3648931069,9425


In [9]:
class Datagenerator(Sequence):
    def __init__(self,df,path=images,img_size=512,batch_size=Batch_size,listID=None,\
                shuffle = True):
        self.batch_size = batch_size
        self.img_size = img_size 
        self.path = path 
        self.df = df 
        self.data = df.iloc[listID]
        self.indices = self.data.index
        self.shuffle = shuffle
        self.on_epoch_end()
    def __len__(self):
        cls = len(self.data) // self.batch_size
        cls += int((len(self.data) % self.batch_size) !=0)
        return cls 
    def on_epoch_end(self):
        if self.shuffle :
            self.data = self.data.sample(frac=1,random_state=42).reset_index(drop=True)
            self.indices = self.data.index
    def __getitem__(self,ind):
        dex = self.indices[ind * self.batch_size :(ind+1) * self.batch_size]
        return self.__Generator(dex)
    def __Generator(self,dex):
        images = np.zeros((len(dex),self.img_size,self.img_size,3))
        Y = np.zeros((len(dex)))
        dff = self.data.iloc[dex]
        for i ,(j,row) in enumerate(dff.iterrows()):
            img = os.path.join(self.path,row.image)
            img = cv.imread(img)
            img = cv.resize(img,(self.img_size,self.img_size))
            images[i,] = img 
            Y[i] = row.encoded_label_group 
        return (images,Y),Y
            

In [10]:
xtr,xts,ytr,yts = train_test_split(test,test["encoded_label_group"].values,stratify=\
                                   test["encoded_label_group"].values,test_size=0.33)

In [11]:
indices_train = xtr.index
indices_test = xts.index

In [12]:
train_data = Datagenerator(test,listID = indices_train)
test_data = Datagenerator(test,listID=indices_test)

In [13]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_image (InputLayer)        [(None, 512, 512, 3) 0                                            
__________________________________________________________________________________________________
efficientnetb3 (Functional)     (None, 1536)         10783535    input_image[0][0]                
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 1536)         6144        efficientnetb3[0][0]             
__________________________________________________________________________________________________
labels (InputLayer)             [(None,)]            0                                            
______________________________________________________________________________________________

In [14]:
model.fit(train_data,validation_data=test_data,epochs=14)

Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14


<tensorflow.python.keras.callbacks.History at 0x7fa780f150d0>

In [15]:
def get_layer_index(model, layer_name, not_found=None):
    """get model's layer index by layer's name"""
    for i, layer in enumerate(model.layers):
        if layer.name == layer_name:
            return i
    return not_found

In [16]:
md = tf.keras.models.Model(inputs=model.layers[0].input, outputs=model.layers[2].input)

In [17]:
md.save("embadding_arcface")