# Use next commands to get info of GPU device

In [1]:
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
print(tf.__version__)

Found GPU at: /device:GPU:0
2.2.0


In [2]:
import os
import time
from functools import partial

from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

from tensorflow.keras import backend as K

from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input

from SimCLR_Keras.DataGenerators.DataGeneratorSimCLR import DataGeneratorSimCLR
from SimCLR_Keras.model import SimCLR
from SimCLR_Keras.gpu import use_gpu_and_allow_growth
from SimCLR_Keras.preprocessing import preprocess_image

from VincentVGG.Utils import retrieve_training_state, remove_training_state
import tensorflow as tf

In [3]:
save_path='./SimCLR'
data_dir='./train_data'
random_state=42
epochs=4000
patience=20
batch_size=10
input_shape=(80, 80, 3)
gpu_allow_growth=False
df_sep='|'
feat_dims_ph=[512, 256, 128, 64]
augmentation_functions=['crop', 'flip', 'color_distort']
num_of_unfrozen_layers=None
test_size=0.25
track_weight_change=False

# base model
base_model = VGG16(
    weights="imagenet",
    include_top=False,
    input_shape=input_shape
)
print('\n========= Build VGG16 base model loaded =========')
#base_model.summary()

print('\n========= Build SimCLR model to train =========')
model = SimCLR(
    base_model=base_model,
    input_shape=input_shape,
    batch_size=batch_size,
    feat_dims_ph=feat_dims_ph,
    num_of_unfrozen_layers=num_of_unfrozen_layers,
    save_path=save_path,
    lr=1e-5
)
# Check and retrieve checkpoint file to resume training
_, initial_epoch, checkpoint_file = retrieve_training_state(save_path)

# Check and retrieve checkpoint file to resume training
print('\n========= Restore checkpoint =========')
print(f'Usin checkpoint file {checkpoint_file}')
print(f'Initial epcho is {initial_epoch+1}')

print('\n========= Build model =========')
model.build_model(checkpoint_file)
model.print_weights()





Usin checkpoint file ./SimCLR/checkpoints/last_checkpoint.h5
Initial epcho is 11

Trainable parameters: 15.15 M.
Non-trainable parameters: 0.0 M.


In [None]:

print(f'\n========= Build Data generators for train/val using augmentations {augmentation_functions} =========')
image_augmentation = partial(
    preprocess_image,
    operators=augmentation_functions
)
generator_params = {
    'batch_size': batch_size,
    'shuffle': True,
    'width': input_shape[0],
    'height': input_shape[1],
    'augmentation_function': image_augmentation,
    'preprocess_image': preprocess_input
}

df = pd.read_csv(
    os.path.join(data_dir, 'output_tags.csv'),
    sep=df_sep
)

df['filename'] = df.id.apply(lambda x: os.path.join(data_dir, f'images/{x}.jpg'))
df_train, df_test = train_test_split(
    df,
    test_size=test_size,
    shuffle=True,
    random_state=random_state
)

print(f'Total rows in dataframe {len(df)}')
print(f'Total rows in test {len(df_test)}')
print(f'Total rows in train {len(df_train)}')

# Generators
data_train = DataGeneratorSimCLR(
    df_train.reset_index(drop=True),
    subset='train',
    **generator_params
)

data_val = DataGeneratorSimCLR(
    df_test.reset_index(drop=True),
    subset='val',
    **generator_params
)

In [5]:
a = [data for data in data_train]

Instructions for updating:
`seed2` arg is deprecated.Use sample_distorted_bounding_box_v2 instead.


In [11]:
for i, (X,y) in enumerate(a):
    if None in X:
        print(i)

1060

In [12]:

print('\n========= Predict on validation before =========')
# This calculate the accuracy of positive pairs by taking the average the model output probability
# of all positive pairs one time for each i,j and j,i. 
# y_true has 1s in the position of positive pairs and 0s in all other position. multiplying by y_true
# is a way to only sum the probabilities of the positive pairs positions in the matrix
y_true = np.concatenate([y for _,y in data_val])





In [7]:

y_predict_val_before = model.SimCLR_model(data_val)


InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument:  TypeError: `generator` yielded an element that could not be converted to the expected type. The expected type was int32, but the yielded element was None.
Traceback (most recent call last):

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 800, in generator_py_func
    ret_arrays.append(script_ops.FuncRegistry._convert(  # pylint: disable=protected-access

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 203, in _convert
    result = np.asarray(value, dtype=dtype, order="C")

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/numpy/core/_asarray.py", line 83, in asarray
    return array(a, dtype, copy=False, order=order)

TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'


During handling of the above exception, another exception occurred:


Traceback (most recent call last):

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 243, in __call__
    ret = func(*args)

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 309, in wrapper
    return func(*args, **kwargs)

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 803, in generator_py_func
    six.reraise(TypeError, TypeError(

  File "/home/lbelloli/.local/lib/python3.8/site-packages/six.py", line 718, in reraise
    raise value.with_traceback(tb)

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 800, in generator_py_func
    ret_arrays.append(script_ops.FuncRegistry._convert(  # pylint: disable=protected-access

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 203, in _convert
    result = np.asarray(value, dtype=dtype, order="C")

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/numpy/core/_asarray.py", line 83, in asarray
    return array(a, dtype, copy=False, order=order)

TypeError: `generator` yielded an element that could not be converted to the expected type. The expected type was int32, but the yielded element was None.


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]]
  (1) Invalid argument:  TypeError: `generator` yielded an element that could not be converted to the expected type. The expected type was int32, but the yielded element was None.
Traceback (most recent call last):

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 800, in generator_py_func
    ret_arrays.append(script_ops.FuncRegistry._convert(  # pylint: disable=protected-access

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 203, in _convert
    result = np.asarray(value, dtype=dtype, order="C")

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/numpy/core/_asarray.py", line 83, in asarray
    return array(a, dtype, copy=False, order=order)

TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'


During handling of the above exception, another exception occurred:


Traceback (most recent call last):

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 243, in __call__
    ret = func(*args)

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 309, in wrapper
    return func(*args, **kwargs)

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 803, in generator_py_func
    six.reraise(TypeError, TypeError(

  File "/home/lbelloli/.local/lib/python3.8/site-packages/six.py", line 718, in reraise
    raise value.with_traceback(tb)

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 800, in generator_py_func
    ret_arrays.append(script_ops.FuncRegistry._convert(  # pylint: disable=protected-access

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/ops/script_ops.py", line 203, in _convert
    result = np.asarray(value, dtype=dtype, order="C")

  File "/home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/numpy/core/_asarray.py", line 83, in asarray
    return array(a, dtype, copy=False, order=order)

TypeError: `generator` yielded an element that could not be converted to the expected type. The expected type was int32, but the yielded element was None.


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]]
	 [[IteratorGetNext/_14]]
0 successful operations.
0 derived errors ignored. [Op:__inference_predict_function_102649]

Function call stack:
predict_function -> predict_function


In [None]:

K.clear_session()
accuracy_before = np.sum(y_true * y_predict_val_before)/(2*len(data_val))

print(f'Accuracy - test - before: {np.round(accuracy_before,2)}')


In [5]:
model.SimCLR_model.fit(
    data_train,
    epochs=10,
    initial_epoch=initial_epoch,
    verbose=1,
    validation_data=data_val
)

Instructions for updating:
`seed2` arg is deprecated.Use sample_distorted_bounding_box_v2 instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fc57a0236d0>

In [6]:
callbacks = model.get_callbacks(
    early_stop_patience=patience,
    track_weight_change=track_weight_change
)

for c in callbacks:
    print(c)

<tensorflow.python.keras.callbacks.TensorBoard object at 0x7fc57a023a90>
<VincentVGG.Callbacks.RestartTrainingModelCheckpoint object at 0x7fc7107b1310>
<tensorflow.python.keras.callbacks.ModelCheckpoint object at 0x7fc560faffd0>
<tensorflow.python.keras.callbacks.EarlyStopping object at 0x7fc560faff40>
<tensorflow.python.keras.callbacks.ReduceLROnPlateau object at 0x7fc7107b14c0>
<VincentVGG.Callbacks.ClearMemory object at 0x7fc57a023ca0>


In [8]:

print(f'\n========= Train SimCLR model from epoch {initial_epoch+1} =========')
for cb in callbacks[1:]:
    print('Callback')
    print(cb)
    model.SimCLR_model.fit(
        data_train,
        epochs=10,
        initial_epoch=initial_epoch,
        verbose=1,
        validation_data=data_val,
        callbacks=[cb]
    )



Callback
<VincentVGG.Callbacks.RestartTrainingModelCheckpoint object at 0x7fc7107b1310>
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Callback
<tensorflow.python.keras.callbacks.ModelCheckpoint object at 0x7fc560faffd0>
Epoch 1/10
Epoch 00001: val_loss improved from inf to 65.91270, saving model to ./SimCLR/results/_07_07_23h_31_best_checkpoint.h5
Epoch 2/10
Epoch 00002: val_loss improved from 65.91270 to 64.52106, saving model to ./SimCLR/results/_07_07_23h_31_best_checkpoint.h5
Epoch 3/10
Epoch 00003: val_loss improved from 64.52106 to 63.15841, saving model to ./SimCLR/results/_07_07_23h_31_best_checkpoint.h5
Epoch 4/10
Epoch 00004: val_loss improved from 63.15841 to 61.82637, saving model to ./SimCLR/results/_07_07_23h_31_best_checkpoint.h5
Epoch 5/10
Epoch 00005: val_loss improved from 61.82637 to 60.52542, saving model to ./SimCLR/results/_07_07_23h_31_best_checkpoint.h5
Epoch 6/10
Epoch 00006: val_loss improved fr

In [9]:

print('\n========= Predict on validation after and final results =========')
y_predict_test_after = model.predict(data_val)





In [10]:
accuracy_after = np.sum(y_true * y_predict_test_after)/(2*len(data_val))

print(f'Random guess accuracy: {round(1 / (2*batch_size), 4)}')
print(f'Accuracy - test - before: {np.round(accuracy_before, 2)}')
print(f'Accuracy - test - after: {np.round(accuracy_after, 2)}')

NameError: name 'y_true' is not defined

In [3]:
from SimCLR_Keras.Scripts.pretain_vgg16 import pretain_vgg16
import os

os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

pretain_vgg16(
    save_path='./SimCLR',
    data_dir='./train_data',
    random_state=42,
    epochs=4000,
    patience=20,
    batch_size=10,
    input_shape=(80, 80, 3),
    gpu_allow_growth=False
)

  return warn(



Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 80, 80, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 80, 80, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 80, 80, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 40, 40, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 40, 40, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 40, 40, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 20, 20, 128)       0    

AssertionError: in user code:

    /home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1147 predict_function  *
        outputs = self.distribute_strategy.run(
    /home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1122 predict_step  **
        return self(x, training=False)
    /home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:927 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py:717 call
        return self._run_internal_graph(
    /home/lbelloli/anaconda3/envs/van-gogh/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py:899 _run_internal_graph
        assert str(id(x)) in tensor_dict, 'Could not compute output ' + str(x)

    AssertionError: Could not compute output Tensor("softmax_cosine_sim_1/Identity:0", shape=(32, 128), dtype=float32)
