In [3]:
from PIL import Image
from collections import Counter
import numpy as np
import pandas as pd
from zipfile import ZipFile 
import torch
from torchvision import transforms

# You need to INSTALL KERAS by running 'conda install -c conda-forge keras' in conda terminal.
# Keras Applications are deep learning models.
# These models can be used for prediction, feature extraction, and fine-tuning.
from keras.preprocessing import image          # For getting image features
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input

# For training
import keras
import keras.layers as kl
from keras import backend as K
from keras.models import Model

In [7]:
#________________UNZIPPING IMAGES________________________________________________
# Unzip the file with images.
#________________________________________________________________________________
food_zip = "food.zip"                # Specifying zip file name
with ZipFile(food_zip, 'r') as zip:  # Opening zip file in read mode
    zip.extractall()

In [25]:
#________________PREPROCESSING IMAGES-GETTING FEATURES___________________________
##### SKIP THIS IF YOU ALREADY HAVE IMAGE FEATURES BCS RUNNING TAKES A LOT OF TIME
# Go tru all the images. Use a pre-trained modelin Keras, e.g, VGG16,19 or ResNet.
# Extract features (vector representation of na image) and put them all in the array.
# Code more or less from https://keras.io/api/applications/#vgg16
# Running this takes A LOT OF TIME, like an hour on my lap top
#________________________________________________________________________________
model = VGG16(weights='imagenet', include_top=False)     # Specify pretrained Keras model
# Different models we could try:
#model = ResNet50(weights='imagenet')                    # ResNet50                 
#base_model = VGG19(weights='imagenet')                  # VGG19 
#model = Model(inputs=base_model.input, outputs=base_model.get_layer('block4_pool').output)

number_of_images = 10000                                 # By openning the file we see that imgs are indexed 0->9999
features = torch.empty((number_of_images, 25088))        # Initialize array that contains all the features; 
                                                         # Feature vector for 1 image is of length 25088 (run manually 1 img and see)
img_path = './food/'                                     # Folder where images are stored

for i in range(number_of_images):                        # Go tru all the images
    img_name = str(i).zfill(5) + ".jpg"                  # Create image file name from an image number
    img = image.load_img(img_path+img_name, target_size=(224,224))# Load image; VGG16 input layer takes an image in the size of (224x224x3)
    img_data = image.img_to_array(img)                   # Convert image to a matrix
    img_data = np.expand_dims(img_data, axis=0)          # inserts a dimension of length 1
    img_data = preprocess_input(img_data)                # Preprocesses a tensor or Numpy array encoding a batch of images.

    vgg16_feature = model.predict(img_data)              # Extract feature for 1 image
    feature_tensor = torch.tensor(vgg16_feature)         # Convert feature to tensor
    features[i] = feature_tensor.flatten()               # Add feature to the list of features
  
torch.save(features, 'features.pt')                      # Save to file so that you dont have to run this again

In [2]:
#_____________________LOADING PREPROCESSED DATA__________________________________
# Loading features from file if the above code was already run.
#________________________________________________________________________________
features = torch.load('features.pt')

In [4]:
#________________PREPROCESSING SETS OF TRIPLETS___________________________________
# Load text files. Add label 1 to all train triplets. 
# Change the order of second and third columns of train triplets and add label 0.
# Append these two matrices of train data into train_triplets tensor.
#_________________________________________________________________________________
file_path = 'train_triplets.txt'
train_triplets = pd.read_csv(file_path, sep=' ', header=None, dtype=str)     # Load train data
train_triplets = torch.tensor(train_triplets.astype(str).astype(int).values) # Converting to tensor

file_path = 'test_triplets.txt'                                              # Load test data
test_triplets = pd.read_csv(file_path, sep=' ', header=None, dtype=str)
test_triplets = torch.tensor(test_triplets.astype(str).astype(int).values)   # Convert to tensor

In [4]:
#____________PREPARING A MODEL - SIAMESE NETWORK WITH TRIPLET LOSS________________
# Most of the code from https://keras.io/examples/vision/siamese_network/
#_________________________________________________________________________________

def euclidean_distance(xy):                                    #  We had to have 1 input arg bcs of the way the fun is called later
    x, y = xy
    return K.sum(K.square(x - y), axis=1, keepdims=True)
                                                               # Other than this we could try contrastive loss
def triplet_loss(pos, neg, margin = K.constant(0)):            # L(A, P, N) = max(‖f(A) - f(P)‖² - ‖f(A) - f(N)‖² + margin, 0)
    loss = pos - neg
    loss = K.maximum(loss + margin, K.constant(0))
    return loss

size_of_features = 25088                                       # Feature vector has length 25088.
units = 1024                                                   # I have no idea how to chose this number...                                       

#______________
# Siamese Network will generate embeddings for each of the images of the triplet. 
# To do this, we add Dense layers to Keras model. The dense layer is a neural network 
# layer that is connected deeply, which means each neuron in the dense layer 
# receives input from all neurons of its previous layer.
#______________
model_input = kl.Input(shape=25088)
model = kl.BatchNormalization()(model_input)                  # Batch normalization mean 0 and the std 1.
model = kl.Dense(units*4, activation='relu')(model_input)     # Adding a dense layer.
model = kl.Dropout(0.5)(model)                                # The Dropout layer randomly sets input units to 0
                                                              # with a frequency of rate at each step during trainings 
                                                              # time, which helps prevent overfitting.
model = kl.BatchNormalization()(model)
model = kl.Dense(units*4, activation='relu')(model)
model = kl.Dropout(0.5)(model)
model = kl.BatchNormalization()(model)
model = kl.Dense(units, activation='linear')(model)

embedding = Model(model_input, model, name="embedding")       # Groups layers into a network

#______________
# Input to siamese network: each of the triplet images; It generates
# the embeddings, and outputs the distance between the anchor and the positive/negative
# embedding.
# Anchor is the image in 1st column, positive is 2nd column, negative is 3rd.
# To get the distances between anchor and positive/negative embedding, use Lambda layer.
# The Lambda layer exists so that arbitrary expressions can be used as a Layer
# when constructing models.
# tf.keras.layers.Lambda(function, output_shape=None, mask=None, arguments=None, **kwargs
#______________
anchor_input = kl.Input(shape=size_of_features)               # Create inputs
positive_input = kl.Input(shape=size_of_features)
negative_input = kl.Input(shape=size_of_features)

anchor_output = embedding(anchor_input)                       # Create outputs
positive_output = embedding(positive_input)
negative_output = embedding(negative_input)

positive_distance = kl.Lambda(euclidean_distance)([anchor_output, positive_output])
negative_distance = kl.Lambda(euclidean_distance)([anchor_output, negative_output])

#______________
# Define a model
#______________
siamese_model = Model(inputs=[anchor_input, positive_input, negative_input], outputs=[anchor_output, positive_output, negative_output])
siamese_model.compile(optimizer=keras.optimizers.Adam(0.0001), loss=triplet_loss)

In [5]:
#_________________________MODEL TRAINING__________________________________________
# Most of the code from https://keras.io/examples/vision/siamese_network/
#_________________________________________________________________________________
anchor_indexes = train_triplets[0:56515,0]                    # 95% of data will be used for training
positive_indexes = train_triplets[0:56515,1]                  # Getting indexes of 95% of data
negative_indexes = train_triplets[0:56515,2]
 
##################THIS IS NOT RUNNING ON MY COMPUTER####################
# 'features' is a huuuge tensor of dimensions 10000 x 25088 so accessing its elements is not so easy....
# So from here on, i didnt manage to test anything. 
train_anchors = features[anchor_indexes,:]                    # Getting features for images that correspond to certain indexes    
train_positive = features[positive_indexes,:]
train_negative = features[negative_indexes,:]

anchor_indexes = train_triplets[56515:, 0]                    # 5% of data will be used for validation
positive_indexes = train_triplets[56515:,1]
negative_indexes = train_triplets[56515:,2]

validation_anchors = features[anchor_indexes,:]
validation_positive = features[positive_indexes,:]
validation_negative = features[negative_indexes,:]

siamese_model.fit([train_anchors, train_positive, train_negative], epochs=10, [validation_anchors, validation_positive, validation_negative])


In [46]:
# TO DO:
# - Get test data the same way as above this last par which is not working 
# - Predict distances for test data:  distances = siamese_model.model.predict([anchors_test, positives_test, negatives_test])
# - Getting labels from distances : 
# for d in distances:
#        if d[0] > d[1]:       #d[0] will be positive distance, d[1] negative
#            results[i] = 0    
#        else:
#            results[i] = 1
#        i+=1

# - Saving to file: np.savetxt('output.txt', results.astype(int), fmt='%i')