In [4]:
import os
import pickle
import random
import numpy as np
import pandas as pd
from tqdm import tqdm

import tensorflow as tf
from keras.preprocessing.image import load_img, img_to_array
from keras.models import Model, load_model
from keras.preprocessing import image
from keras.layers import Input
from keras.layers import Dense, Dropout, Activation

In [6]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [1]:
#----------------------------------------------- Fonctions -----------------------------------------------#

## Resize Image function to desired shape
def resize_image(img_shape,path,path_dir):
    for item in tqdm(os.listdir(path)):
        if item == '.DS_Store':
             continue
        if os.path.isfile(path+item):
            im = Image.open(path+item)
            f, e = os.path.splitext(item)
            imResize = im.resize((img_shape[0],img_shape[1])) ## Resize image
            imResize.save(path_dir+f+'.jpg','jpeg') ## Save it to path_dir


## Get the features of all images with pre trained model : Inception ResNet v2. Discarding last dense
## classification layer
def get_features_images_Inception(input_shape,model):
  image_res_path = '/content/drive/MyDrive/Colab Notebooks/Task3/food_res/'
  all_features = []
  for item in tqdm(os.listdir(image_res_path)):
    img = image.load_img(image_res_path+item, target_size=(299, 299))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    img = tf.keras.applications.inception_resnet_v2.preprocess_input(x)
    x_feat = model.predict(img,steps=10000)
    f,s = os.path.splitext(item)
    try:
      features = np.insert(x_feat,0,int(f))
      all_features.append(features)
    except ValueError:
      continue
  return all_features
 

#building the training/testing dataset, based on ids triplets given in the txt files of the task
def build_raw_features(all_features, file, train_or_test_bool):
    training_data_id = pd.read_csv(file, delim_whitespace=True, header=None, names=["A", "B", "C"])
    training_data_raw = []

    if train_or_test_bool:
        for train_point in tqdm(range(len(training_data_id))):
            triplet = training_data_id.iloc[train_point]
            raw_point_positive = np.concatenate((all_features[triplet['A']], all_features[triplet['B']], all_features[triplet['C']]), axis = -1)
            raw_point_negative = np.concatenate((all_features[triplet['A']], all_features[triplet['C']], all_features[triplet['B']]), axis = -1)

            training_data_raw.append(raw_point_positive)
            training_data_raw.append(raw_point_negative)
    else:
        for train_point in tqdm(range(len(training_data_id))):
            triplet = training_data_id.iloc[train_point]
            raw_point = np.concatenate((all_features[triplet['A']], all_features[triplet['B']], all_features[triplet['C']]), axis = -1)
            training_data_raw.append(raw_point)

    return np.array(training_data_raw)

#constructing the labels by doubling the training dataset. Need positive as well as negatives
def construct_labels(file):
  triplets_read = pd.read_csv(file, delim_whitespace=True, header=None, names=["A", "B", "C"])
  #we assume here the number of feature is even
  labels = np.ones((len(triplets_read)*2, 1))

  idx_list = [idx for idx in tqdm(range(1, len(triplets_read)*2 + 1, 2))]

  labels[idx_list] = 0
  return np.array(labels)


In [2]:
#----------------------------------------------- Main -----------------------------------------------#

def main():
  ## Boolean ##
  resize_bol = False
  get_features_bol = False
  build_tensor_bol = True
  train_or_test_bol = [True,False]

  ## Path ##
  food_dir = '/content/gdrive/MyDrive/Colab Notebooks/food/'
  food_res_dir = '/content/drive/MyDrive/Colab Notebooks/food_res/'
  features_file = '/content/gdrive/MyDrive/Colab Notebooks/features.pckl'
  train_file = '/content/gdrive/MyDrive/Colab Notebooks/train_triplets.txt'
  test_file = '/content/gdrive/MyDrive/Colab Notebooks/test_triplets.txt'

  ## Shape ##
  img_shape = [299,299]
  input_shape = (299,299,3)

  ## Model ##
  model = tf.keras.applications.InceptionResNetV2(pooling='avg',include_top=False)


  ## Flow ##
  if resize_bol:
    print('Resizing image ...')
    resize_image(img_shape,food_dir,food_res_dir)

  if get_features_bol:
    print('Computing features ...')
    features = get_features_images_Inception(input_shape,model)
    with open(features_file, 'wb') as f:
      pickle.dump(features, f)
  else:
    print('Loading features ...')
    with open(features_file, 'rb') as f:
      features = pickle.load(f)

  ## Sorting the features ascending ## 
  features = np.array(features)
  features_sort = features[features[:, 0].argsort()]
  features_sort = np.delete(features_sort, 0, axis=1)
  
  #creating features triplets and their labels
  labels = construct_labels(train_file)
  if build_tensor_bol:
    print('Creating train and test tensor ...')
    train_final = build_raw_features(features_sort,train_file,train_or_test_bol[0])
    test_final = build_raw_features(features_sort,test_file,train_or_test_bol[1])

  #classification model
  x = x_in = Input(train_final.shape[1:])
  x = Activation('relu')(x)
  x = Dropout(0.7)(x)
  x = Dense(1152)(x)
  x = Activation('relu')(x)
  x = Dense(288)(x)
  x = Activation('relu')(x)
  x = Dense(72)(x)
    
  x = Activation('relu')(x)
  x = Dense(18)(x)
  x = Activation('relu')(x)
  x = Dense(1)(x)
  x = Activation('sigmoid')(x)
  model = Model(inputs=x_in, outputs=x)

  #model's attributes
  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

  #fitting training dataset to training labels
  model.fit(x = train_final, y = labels, epochs=7)

  #making final predictions for tesing dataset
  y_test = model.predict(test_final)
  y_test_thresh = np.where(y_test < 0.5, 0, 1)

  #output in txt format to desired location
  np.savetxt('/content/gdrive/MyDrive/Colab Notebooks/predictions.txt', y_test_thresh, fmt='%d')

In [7]:
#executing parsed pipeline with following instructions: 
##set resize_bool for resizing images to desired shape
##set get_features_bol for computing feature tensor of all images + output to features.pckl
##set build_tensor_bol for building training testing dataset with feature triplets
main()

Loading features ...


100%|██████████| 59515/59515 [00:00<00:00, 1442688.15it/s]


Creating train and test tensor ...


100%|██████████| 59515/59515 [00:09<00:00, 6100.70it/s]
100%|██████████| 59544/59544 [00:07<00:00, 8004.70it/s]


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
