# Set the parameters, and the filepaths, here

In [1]:
## Hyperparameters
batch_size = 32
# Triplet loss computation parameter
margin = 0.1
#minimum number of samples required in a class in the train dataset, for that class, to be considered
min_count_th = 0 
#samples set aside, per class, for validation. They are also samples set aside, per class, for test.
samples_per_class_val = 0 
samples_per_class_test = 0 
#Learning rate
learning_rate = 1e-5
# Total epochs for training
num_epochs = 300
# Epochs for re-sampling of triplets
epochs_per_triplets = 1
# Candidate triplet images generated per triplet of labels
triplets_fixed_class = 70
# Number of op candidate triplets selected for training
top_triplets = 1600
# Augmentation - True or False
augmentation_flag = False
# Length of feature vectors (512 for VGG, 2048 for Resnet50)
len_features = 512
######################################
## Pooling type: 'mac' or 'spoc'
pooling_type = 'spoc'
######################################
## Layers to be fine-tuned
# Fine-tune pca layer
tune_pca = True
# Fine-tune conv5 layer
tune_conv5 = True
# For naming files
tag = 'mac-keras-new-data-margin01'
trainingLayer = 'conv5_pca'

In [3]:
import sys
import os
import numpy as np

## Append file path for the modules
sys.path.append('./modules')

## Filepaths
# Directory of images
imagesDir = '../real_images/train'
input_csv_path='../real_images/df_final_all_resized_top14_folder_split_train.csv'
# Path for saving train data frame
csv_path_train = '../Processed_csvs/train/df_train.csv'
# Path for saving validation data frame
csv_path_val = '../Processed_csvs/train/df_val.csv'
# Path for saving test data frame
csv_path_test = '../Processed_csvs/train/df_test.csv'
# Column to be used for grouping of images
column_target = 'std_image'
# For saving train feature vectors:
features_parent_dir = './featuresTrain'
if not os.path.exists(features_parent_dir):
    os.makedirs(features_parent_dir)
# Path for saving train feature vectors
features_path = features_parent_dir + '/real_image_train_features.h5'
# Path for saving ranked triplets
csv_path_ranked_triplets = 'df_triplets_ranked.csv'
# Path for saving candidate triplets
csv_path_triplets = 'df_triplets.csv'
# Path to save model weights  
weightsTunedPath = '{}_{}_{}_{}_{}_{}_{}'.format(tag,trainingLayer,batch_size,learning_rate,num_epochs,epochs_per_triplets,top_triplets)
print("model weights path: ", weightsTunedPath)
# For saving model after every epoch, if there's an improvement
tempStr = tag + '_' + trainingLayer + '_' + str(learning_rate)
if not os.path.exists('./checkpoints'):
    os.makedirs('./checkpoints')
ckpt_filepath = "./checkpoints/{}-best.h5".format(tempStr)

model weights path:  mac-keras-real-base-shape-new-data-margin01_conv5_pca_32_1e-05_300_1_1600


# Generate the train and validation split

In [None]:
from modules_split_data import split_train_val_test
import numpy as np

# #For repeatibility os splits
np.random.seed(0)
#Split the data, and save the corresponding csv's
split_train_val_test(input_csv_path,csv_path_train,csv_path_val,csv_path_test,column_target,samples_per_class_val,samples_per_class_test,min_count_th,imagesDir)
print("Data split into train, validation and test dataset, based on shape, successfully...")

# Load the model (for generating train features)

In [None]:
from load_model import load_deep_retrieval_siamese
model_parameters={"tune_conv5":tune_conv5, "tune_pca":tune_pca,
                 "lr":1e-5,"batch":1,"margin":margin,"model_type":pooling_type}
#pre-trained model, since weights_path not provided 
model = load_deep_retrieval_siamese(model_parameters)
#model.summary()


# Generate ranked triplets from train data

In [None]:
from modules_generating_triplets import generate_candidate_triplets
from modules_generating_triplets import generate_ranked_triplets
from modules_generating_triplets import generate_feature_vectors
###################################################
# Generate, and save, feature vectors for all train images
# csv that contains the frame for train images
generate_feature_vectors(model,csv_path_train,imagesDir,features_path,len_features)
###################################################
## Generating triplet candidates
# For repeatibility of experiment
seed_no=11
np.random.seed(seed_no) 
# Generate triplet candidates
generate_candidate_triplets(csv_path_train,csv_path_triplets,triplets_fixed_class,column_target)
####################################################
## Generating ranked triplets
# Print something every 500 triplets
verbose=True
# Generate ranked triplets
print("Ranking the triplets...")
generate_ranked_triplets(csv_path_triplets,csv_path_ranked_triplets,features_path,margin,verbose)
###################################################

# Preprocessing complete. Ready for siamese-tuning of model weights.

# Re-load the model, for tuning of model weights...

In [None]:
# Delete the existing model
%reset_selective -f model

In [5]:
# %load_ext autoreload
# %autoreload 2
from load_model import load_deep_retrieval_siamese
from keras.preprocessing import image
import numpy as np
import pandas as pd
from keras.callbacks import ModelCheckpoint
from keras import optimizers

model_parameters={"tune_conv5":tune_conv5, "tune_pca":tune_pca,
                 "lr":1e-5,"batch":batch_size,"margin":margin,"model_type":pooling_type}
# Loading pre-trained model, but with a different batch size this time ..
model = load_deep_retrieval_siamese(model_parameters)
model.summary()

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)
  return f(*args, **kwds)


Loading Deep Image Retrieval (pre-trained weights) model...


  pca_model = Model(input=in_pca, output=out_pca)
  output=[y_pred])#rmac_norm)


Done!
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
vgg16 (Model)                   (None, 7, 7, 512)    14714688    input_2[0][0]                    
                                                                 input_3[0][0]                    
    

# Preprocess train data for siamese training

In [6]:
# Making sure number of train images is a multiple of batch_size. This is important, when generating 
# features from the siamese model, because the model accepts batch_size number of inputs.
import os
csv_path_train_siamese=os.path.splitext(input_csv_path)[0]+'_siamese'+'.csv'
df=pd.read_csv(csv_path_train)
new_len_df=int(np.floor(len(df)/batch_size))*batch_size
df=df[:new_len_df]
df.to_csv(csv_path_train_siamese)

# Finally, fine-tune the model weights...

In [7]:
# %load_ext autoreload
# %autoreload 2
import matplotlib.pyplot as plt
import keras
from keras.callbacks import LearningRateScheduler
from modules_custom_callbacks import PlotLosses, save_figs, computeTriplets, generator
#%matplotlib inline  
#===========================================
# Callbacks
#===========================================
# For plotting train losses after evey epoch
plot_losses = PlotLosses(figsize=(8, 5))
#===========================================
# # Learning rate scheduler
# def exp_decay(epoch,learning_rate):
#     return ((1e-5)*(0.995 **epoch))
# lr_scheduler = LearningRateScheduler(exp_decay, verbose=1)
#===========================================
# For saving model after every epoch, if there's an improvement
checkpoint = ModelCheckpoint(ckpt_filepath,
                                monitor='loss',
                                verbose=1,
                                save_best_only=False,
                                save_weights_only=True, 
                                mode='auto')
#===========================================
# For resampling candidate triplets after every epochs_per_triplets epochs 
resampleTriplets = computeTriplets(epochs_per_triplets,features_path,csv_path_train_siamese,batch_size,
                                   imagesDir,margin,csv_path_ranked_triplets,csv_path_triplets,triplets_fixed_class,column_target,len_features)
#===========================================
# Ranked triplets csv
input_csv_path=csv_path_ranked_triplets
# Making sure top_triplets is a multiple of batch_size, so that batches in an epoch can be exactly tracked
top_triplets = int(np.floor(top_triplets/batch_size)) * batch_size
batches_per_epoch = top_triplets/batch_size
#===========================================
# Generator
train_generator = generator(input_csv_path,top_triplets, batch_size, 224, 224, 3,imagesDir,batches_per_epoch,epochs_per_triplets,
                           len_features,augmentation_flag)
#===========================================

history = model.fit_generator(train_generator,steps_per_epoch = batches_per_epoch
                              ,epochs = num_epochs,callbacks=[resampleTriplets,checkpoint])#, lr_scheduler


Epoch 1/300
Generating feature vectors...
Feature vectors genereated for all train vectors...
Candidate triplets generated...
Ranking the triplets...
Number of candidates with non-zero loss:  12740
Total number of candidates:  12740
Candidate triplets ranked and saved, successfully...

Epoch 00001: saving model to ./checkpoints/mac-keras-real-base-shape-new-data-margin01_conv5_pca_1e-05-best.h5
Epoch 2/300
 3/50 [>.............................] - ETA: 37s - loss: 3.3413

KeyboardInterrupt: 

# Save the model

In [8]:
# Save model in the defined filename
if not os.path.exists('./saved_models'):
    os.makedirs('./saved_models')
model.save_weights('./saved_models/{}.h5'.format(weightsTunedPath))
print("Fine-tuned model weights saved successfully...")

Fine-tuned model weights saved successfully...


In [9]:
print("model path: ",weightsTunedPath)

model path:  mac-keras-real-base-shape-new-data-margin01_conv5_pca_32_1e-05_300_1_1600
