In [None]:
import time
import os
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization
from sklearn.metrics import accuracy_score
import numpy as np
import tensorflow as tf

In [None]:
file_names = os.listdir("food")
file_names.sort()
#print(file_names)
print('The number of food images: ', len(file_names))

In [None]:
# Load training train_triplets
train_triplets = np.loadtxt('train_triplets.txt', dtype=int)
print(train_triplets.shape)
print(train_triplets)

In [None]:
train = train_triplets.copy()
#np.random.seed(617)
np.random.seed(618)
np.random.shuffle(train)
Y_train = np.random.randint(2, size=len(train_triplets))
print(train.shape, train[0:10])
print(Y_train.shape, Y_train[0:10])

In [None]:
BATCH_SIZE = 256
#EPOCHS = 15
EPOCHS = 100

n_total = len(train_triplets)
#n_train = int(0.85*n_total - 0.85*n_total % BATCH_SIZE)
n_train = n_total - n_total % BATCH_SIZE
n_valid = (n_total - n_train) - (n_total - n_train) % BATCH_SIZE
print(n_total, n_train, n_valid)

In [None]:
# include_top=False: exclude top(last) 3 fully-connected layers. get features dim=(1,7,7,512)
VGG = VGG16(weights='imagenet', include_top=False)

In [None]:
r_drop = 0.2

model = Sequential([
    
    Flatten(input_shape=(3, 7, 7, 512)),
    BatchNormalization(),
    Dropout(r_drop),
    
    Dense(4096, activation='relu', kernel_initializer='he_uniform'),
    BatchNormalization(),
    Dropout(r_drop),
    
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss=keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])
model.save_weights('ann')
model.summary()

In [None]:
acc_history = np.array([])
val_acc_history = np.array([])
model.load_weights('ann_trained')

total_time = 0

for s in range(0, n_train, BATCH_SIZE):
    
    t1 = time.perf_counter()
    
    features_array = np.array([])
    for i in range(s, s+BATCH_SIZE):
        
        X_img = np.array([])
        for j in [0, 1+Y_train[i], 2-Y_train[i]]:
        
            #load image
            img_raw = load_img('food/' + str(train[i][j]).zfill(5) + '.jpg', target_size=(224, 224))
            #plt.imshow(img_crop)
            #plt.show()
            img_crop = img_to_array(img_raw)
        
            # convert input to VGG format
            img_crop = preprocess_input(img_crop)
        
            X_img = np.append(X_img, img_crop)
        
        X_img = X_img.reshape(3, 224, 224, 3)
        

        # use VGG to extract features (4D input)
        features = VGG.predict(X_img)
    
        features_array = np.append(features_array, features)
        
    features_array = features_array.reshape(BATCH_SIZE, 3, 7, 7, 512)
    
    t2 = time.perf_counter()

    history = model.fit(features_array[:BATCH_SIZE], np.array(Y_train[s:s+BATCH_SIZE]), \
                        batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=0)
    acc_history = np.append(acc_history, history.history['accuracy'])
    #val_acc_history = np.append(val_acc_history, history.history['val_accuracy'])

    t3 = time.perf_counter()
    total_time += t3 - t1
    print('s = ', s, 'Loading time = %.3f' %(t2 - t1), 'Training time = %.3f' %(t3 - t2), 'Total time = %.3f' %(total_time))
    #print('validation accuacry = %.4f' %(val_acc_history[-1]))
    
    if (s % 100*BATCH_SIZE) == 0:
        model.save_weights('ann_trained')
        print('weights saved at %.1f s' %(total_time))
        
model.save_weights('ann_trained')
print('Total time = %.3f' %(total_time))

In [None]:
plt.plot(acc_history, label='accuracy')
plt.plot(val_acc_history, label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Validation Run

In [None]:
model.load_weights('ann_trained')
Y_hat = np.array([])

total_time = 0

for s in range(n_train, n_train+n_valid, BATCH_SIZE):
    
    t1 = time.perf_counter()
    
    features_array = np.array([])
    for i in range(s, s+BATCH_SIZE):
        
        X_img = np.array([])
        for j in [0, 1+Y_train[i], 2-Y_train[i]]:
        
            #load image
            img_raw = load_img('food/' + str(train[i][j]).zfill(5) + '.jpg', target_size=(224, 224))
            #plt.imshow(img_crop)
            #plt.show()
            img_crop = img_to_array(img_raw)
        
            # convert input to VGG format
            img_crop = preprocess_input(img_crop)
        
            X_img = np.append(X_img, img_crop)
        
        X_img = X_img.reshape(3, 224, 224, 3)

        # use VGG to extract features (4D input)
        features = VGG.predict(X_img)
    
        features_array = np.append(features_array, features)
        
    features_array = features_array.reshape(BATCH_SIZE, 3, 7, 7, 512)

    Y_hat = np.append(Y_hat, model.predict_classes(features_array[:BATCH_SIZE]))

    t2 = time.perf_counter()
    total_time += t2 - t1
    print('s = ', s, 'Loading time = %.3f' %(t2 - t1), 'Total time = %.3f' %(total_time))
        
print('Total time = %.3f' %(total_time))

In [None]:
print(Y_train.shape, Y_hat.shape)
print(accuracy_score(Y_train[:n_train], Y_hat))
print(accuracy_score(Y_train[n_train:n_train+n_valid], Y_hat))

# Real Run

In [None]:
test_triplets = np.loadtxt('test_triplets.txt', dtype=int)
n_test = len(test_triplets)
print(test_triplets.shape)

In [None]:
model.load_weights('ann_trained')
Y_hat = np.array([])

total_time = 0

for i in range(10):
    
    t1 = time.perf_counter()
    
    X_img = np.array([])
    for j in [0, 1, 2]:
        
        #load image
        img_raw = load_img('food/' + str(test_triplets[i][j]).zfill(5) + '.jpg', target_size=(224, 224))
        #plt.imshow(img_crop)
        #plt.show()
        img_crop = img_to_array(img_raw)
        
        # convert input to VGG format
        img_crop = preprocess_input(img_crop)
        
        X_img = np.append(X_img, img_crop)
        
    X_img = X_img.reshape(3, 224, 224, 3)

    # use VGG to extract features (4D input)
    features = VGG.predict(X_img)
    
    # flatten as one dimension
    features_compress = features.reshape(1, 3, 7, 7, 512)

    Y_hat = np.append(Y_hat, (model.predict(features_compress) < 0.5).astype(int) )
    
    t2 = time.perf_counter()
    total_time += t2 - t1
    if i % 2000 == 0:
        print('i = ', i, 'Loading time = %.3f' %(t2 - t1), 'Total time = %.3f' %(total_time))
    
print('Total time = %.3f' %(total_time))

In [None]:
np.savetxt("labels.csv", Y_hat, fmt='%i')