In [11]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
#path that contains folder you want to copy
%cp /content/gdrive/My\ Drive/Colab\ Notebooks/features_inception_resnet_inception_v3_finetune_COLAB.csv /content/
%cp /content/gdrive/My\ Drive/Colab\ Notebooks/data/train_triplets.txt /content/
%cp /content/gdrive/My\ Drive/Colab\ Notebooks/data/test_triplets.txt /content/

In [0]:
import itertools
import os
import pathlib
import matplotlib.pylab as plt
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
from PIL import Image
from math import ceil, floor
from timeit import default_timer as timer

In [40]:
np.random.seed(470)
AUTOTUNE = tf.data.experimental.AUTOTUNE
print("TF version:", tf.__version__)
print("Hub version:", hub.__version__)
print("Availables GPU:")
print(tf.config.list_physical_devices('GPU') if tf.config.list_physical_devices('GPU') != [] else 'No GPU available')
#os.environ["TFHUB_CACHE_DIR"] = "C:/Users/Ennio/AppData/Local/Temp/model"

TF version: 2.2.0
Hub version: 0.8.0
Availables GPU:
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [0]:
# read features
#features = np.array(pd.read_csv('/content/features_resnet.zip', compression='zip', delimiter=',', header=None))
features = np.array(pd.read_csv('/content/features_inception_resnet_inception_v3_finetune_COLAB.csv', delimiter=',', header=None))
BATCH_SIZE = 64
# read triplets
train_triplets_df = pd.read_csv('/content/train_triplets.txt', delimiter=' ', header=None)
test_triplets_df = pd.read_csv('/content/test_triplets.txt', delimiter=' ', header=None)
train_triplets_df.columns = ['A', 'B', 'C']
test_triplets_df.columns = ['A', 'B', 'C']

N_train = len(train_triplets_df.index)
N_test = len(test_triplets_df.index)

In [0]:
# swap half
N_train = len(train_triplets_df.index)
N_test = len(test_triplets_df.index)
swapped_train_triplets_df = train_triplets_df.iloc[:int(N_train / 2), :]
swapped_train_triplets_df.columns = ['A', 'C', 'B']
train_triplets_df = pd.concat((swapped_train_triplets_df, train_triplets_df.iloc[int(N_train / 2):, :]), sort=True)
# train_triplets_dict = {index: list(row) for index, row in train_triplets_df.iterrows()}

# create Y
Y_train_np = np.zeros((N_train,2))
Y_train_np[:,0] = (np.arange(N_train) >= int(N_train / 2)) * 1 
Y_train_np[:,1] = 1 - Y_train_np[:,0]


shuffle = True
if shuffle:
    rd_permutation = np.random.permutation(train_triplets_df.index)
    train_triplets_df = train_triplets_df.reindex(rd_permutation).set_index(np.arange(0, train_triplets_df.shape[0], 1))
    Y_train_np = Y_train_np[rd_permutation,:]
Y_train_ts = tf.constant(Y_train_np)

In [43]:
# build test and train
print()
def X_train_generator():
    for _, row in train_triplets_df.iterrows():
        yield features[row['A'], :], features[row['B'], :], features[row['C'], :]


def X_test_generator():
    for _, row in test_triplets_df.iterrows():
        yield features[row['A'], :], features[row['B'], :], features[row['C'], :]


input_shape = (1001,)
X_train = tf.data.Dataset.from_generator(X_train_generator,
                                         (tf.float32, tf.float32, tf.float32),
                                         output_shapes=(tf.TensorShape(input_shape),) * 3
                                         )

X_test = tf.data.Dataset.from_generator(X_test_generator,
                                        (tf.float32, tf.float32, tf.float32),
                                        output_shapes=(tf.TensorShape(input_shape),) * 3,
                                        ).batch(BATCH_SIZE)

Y_train = tf.data.Dataset.from_tensor_slices(Y_train_ts)
zipped_train = tf.data.Dataset.zip((X_train, Y_train)).batch(BATCH_SIZE).repeat()




In [44]:
# build the model
input_A = tf.keras.layers.Input(shape=input_shape, name='input_A'),
input_B = tf.keras.layers.Input(shape=input_shape, name='input_B'),
input_C = tf.keras.layers.Input(shape=input_shape, name='input_C'),

inputs_AB = [input_A[0], input_B[0]]
inputs_AC = [input_A[0], input_C[0]]

x_AB = tf.keras.layers.Concatenate(axis=1)(inputs_AB)
x_AC = tf.keras.layers.Concatenate(axis=1)(inputs_AC)

x_AB = tf.keras.layers.Dense(10, activation='relu')(x_AB)
x_AC = tf.keras.layers.Dense(10, activation='relu')(x_AC)

x = tf.keras.layers.Concatenate(axis=1)([x_AB, x_AC])
output = tf.keras.layers.Dense(2, activation='softmax')(x)

model = tf.keras.Model(inputs=[input_A, input_B, input_C], outputs=output, name='task3_model')

model.summary()

Model: "task3_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_A (InputLayer)            [(None, 1001)]       0                                            
__________________________________________________________________________________________________
input_B (InputLayer)            [(None, 1001)]       0                                            
__________________________________________________________________________________________________
input_C (InputLayer)            [(None, 1001)]       0                                            
__________________________________________________________________________________________________
concatenate_12 (Concatenate)    (None, 2002)         0           input_A[0][0]                    
                                                                 input_B[0][0]          

In [45]:
#compile
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy']
              )
#callbacks
es = tf.keras.callbacks.EarlyStopping(monitor='accuracy', mode='max', verbose=1, patience=2, restore_best_weights=True)

#fit
print('Training started')
model.fit(zipped_train, epochs=10, steps_per_epoch=930, verbose=1, use_multiprocessing=True, callbacks=[es])

Training started
Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9


<tensorflow.python.keras.callbacks.History at 0x7f70f0eef080>

In [0]:
#debug only
start = timer()
model.predict( [np.ones([BATCH_SIZE,2048]),]*3 )
end = timer()
elapsed = end - start
print(str(round(elapsed,2)) + " sec to predict a batch of " + str(BATCH_SIZE)
      + ", 59516 samples will be evaluated in " + str(round(59516/BATCH_SIZE*elapsed, 2)) + "sec")

0.07 sec to predict a batch of 64, 59516 samples will be evaluated in 68.55sec


In [46]:
#function to predict batched dataset
def batch_predict(X, N):
  X_it = X.as_numpy_iterator()
  Y_batch = np.zeros([0,2])
  for n in range(0, N, BATCH_SIZE): #N = 59516 ==>
      start = timer()
      Y_batch = np.row_stack([Y_batch, model.predict(next(X_it))])
      end = timer()
      print('Predicted until ' + str(n) + ', ' + str(round(end-start,2)) + 's')
  print('Predicted')
  return Y_batch

#predict
Y_test = batch_predict(X_test, N_test)
pd.DataFrame(data=(Y_test[:,0]), columns=None, index=None).to_csv("sumbission_float.csv", index=None, header=None, float_format='%.2f')
pd.DataFrame(data=(Y_test[:,0]>0.5)*1, columns=None, index=None).to_csv("sumbission.csv", index=None, header=None)
print('Done')


Predicted until 0, 0.08s
Predicted until 64, 0.05s
Predicted until 128, 0.04s
Predicted until 192, 0.04s
Predicted until 256, 0.04s
Predicted until 320, 0.06s
Predicted until 384, 0.05s
Predicted until 448, 0.04s
Predicted until 512, 0.05s
Predicted until 576, 0.05s
Predicted until 640, 0.05s
Predicted until 704, 0.05s
Predicted until 768, 0.05s
Predicted until 832, 0.05s
Predicted until 896, 0.05s
Predicted until 960, 0.05s
Predicted until 1024, 0.05s
Predicted until 1088, 0.05s
Predicted until 1152, 0.05s
Predicted until 1216, 0.05s
Predicted until 1280, 0.05s
Predicted until 1344, 0.05s
Predicted until 1408, 0.04s
Predicted until 1472, 0.05s
Predicted until 1536, 0.05s
Predicted until 1600, 0.05s
Predicted until 1664, 0.05s
Predicted until 1728, 0.05s
Predicted until 1792, 0.05s
Predicted until 1856, 0.05s
Predicted until 1920, 0.05s
Predicted until 1984, 0.04s
Predicted until 2048, 0.04s
Predicted until 2112, 0.05s
Predicted until 2176, 0.05s
Predicted until 2240, 0.04s
Predicted u