# Main

In [1]:
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Activation, Dropout, Lambda
from keras.optimizers import Adadelta
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
import matplotlib.pyplot as plt
import numpy as np
import os

from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3



Using TensorFlow backend.


## Parameters

In [2]:
INPUT_SHAPE = (256, 256, 3)
TARGET_SIZE = (256, 256)
NUM_CLASSES =  1
LEARNING_RATE = 0.1
PATIENCE = 3
VERBOSE = 1
LEARNING_RATE_REDUCTION_FACTOR = 0.5
MIN_LEARNING_RATE = 0.00001

EPOCHS = 25
BATCH_SIZE = 50

MODEL_OUT_DIR = ''
OUTPUT_DIR = 'output'
SEED = 42

## Load Data

In [3]:
train_labeled = pd.read_csv('data/train_labeled.csv')
test_labeled = pd.read_csv('data/test_labeled.csv')
val_labeled = pd.read_csv('data/val_labeled.csv')

## Create the model

In [4]:
INPUT_SHAPE

(256, 256, 3)

In [5]:
def image_process(x):
    import tensorflow as tf
    hsv = tf.image.rgb_to_hsv(x)
    gray = tf.image.rgb_to_grayscale(x)
    rez = tf.concat([hsv, gray], axis=-1)
    return rez

def network(input_shape, num_classes):
    img_input = Input(shape=input_shape, name='data')
    x = Lambda(image_process)(img_input)
    x = Conv2D(16, (5, 5), strides=(1, 1), padding='same', name='conv1')(x)
    x = Activation('relu', name='conv1_relu')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='valid', name='pool1')(x)
    x = Conv2D(32, (5, 5), strides=(1, 1), padding='same', name='conv2')(x)
    x = Activation('relu', name='conv2_relu')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='valid', name='pool2')(x)
    x = Conv2D(64, (5, 5), strides=(1, 1), padding='same', name='conv3')(x)
    x = Activation('relu', name='conv3_relu')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='valid', name='pool3')(x)
    x = Conv2D(128, (5, 5), strides=(1, 1), padding='same', name='conv4')(x)
    x = Activation('relu', name='conv4_relu')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='valid', name='pool4')(x)
    x = Flatten()(x)
    x = Dense(1024, activation='relu', name='fcl1')(x)
    x = Dropout(0.2)(x)
    x = Dense(128, activation='relu', name='fcl2')(x)
    x = Dropout(0.2)(x)
    out = Dense(num_classes, activation='sigmoid', name='predictions')(x)
    rez = Model(inputs=img_input, outputs=out)
    return rez

def resnet(input_shape, num_classes):
    resnet = ResNet50(weights='imagenet',
                       input_shape=input_shape,
                   include_top=False)
    image = resnet.get_layer(index=0).output
    output = resnet.get_layer(index=-1).output
    output = Flatten()(output)
    output = Dense(num_classes, activation='sigmoid', name='predictions')(output)
    
    resnet = Model(inputs = image, outputs = output)
    return resnet
    
    
# model = network(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES)
model = network(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES)
resnet = resnet(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES)



In [6]:
# restnet = model

In [7]:
# image = model.get_layer(index=0).output
# # x= model.get_layer('avg_pool').output
# output = model.get_layer(index=-1).output
# output = Flatten()(output)
# output = Dense(NUM_CLASSES, activation='softmax', name='predictions')(output)

# restnet = Model(inputs = model.input, outputs = output)
# model = restnet

## Creating the Data Generators

In [8]:
datagen = ImageDataGenerator(
        width_shift_range=0.0,
        height_shift_range=0.0,
        zoom_range=0.0,
        horizontal_flip=True,
        vertical_flip=True,  # randomly flip images
)


train_generator = datagen.flow_from_dataframe(
    dataframe=train_labeled,
    directory="./Recipes5k/images/",
    x_col="url",
    y_col="label",
    subset="training",
    batch_size=32,
    seed=SEED,
    shuffle=True,
    class_mode="binary",
    target_size=TARGET_SIZE
)



Found 3409 validated image filenames belonging to 2 classes.


In [9]:
test_datagen = ImageDataGenerator()


test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_labeled,
    directory="./Recipes5k/images/",
    x_col="url",
    y_col="label",
    batch_size=1,
    seed=SEED,
    shuffle=False,
    class_mode="categorical",
    target_size=(256,256)
)

Found 783 validated image filenames belonging to 2 classes.


In [10]:
val_datagen = ImageDataGenerator(
        width_shift_range=0.0,
        height_shift_range=0.0,
        zoom_range=0.0,
        horizontal_flip=True,
        vertical_flip=True,  # randomly flip images
)

In [11]:
val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_labeled,
    directory="./Recipes5k/images/",
    x_col="url",
    y_col="label",
    batch_size=32,
    seed=SEED,
    shuffle=False,
    class_mode="binary",
    target_size=(256,256)
)

Found 634 validated image filenames belonging to 2 classes.


In [12]:
model_out_dir = os.path.join(OUTPUT_DIR, "test")
                            
if not os.path.exists(model_out_dir):
    os.makedirs(model_out_dir)

In [13]:
train_labeled['label'].value_counts()

Vegetarian        2153
Non-Vegetarian    1256
Name: label, dtype: int64

In [14]:
model

<keras.engine.training.Model at 0x243961eedc8>

In [15]:
# model = resnet

In [16]:
model

<keras.engine.training.Model at 0x243961eedc8>

In [17]:
import keras.backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [19]:
optimizer = Adadelta(lr = LEARNING_RATE)
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy', f1_m])
learning_rate_reduction = ReduceLROnPlateau(
                    monitor='val_loss', 
                    patience=PATIENCE, verbose=VERBOSE, 
                    factor=LEARNING_RATE_REDUCTION_FACTOR, 
                    min_lr=MIN_LEARNING_RATE
)
save_model = ModelCheckpoint(filepath=model_out_dir + "/model.h5", monitor='val_f1_m', verbose=VERBOSE, 
                             save_best_only=True, save_weights_only=False, mode='max', period=1)
class_weight = {
    1.0: 0.5,
    0.0: 0.5
}
history = model.fit_generator(generator=train_generator,
                                  class_weight=class_weight,
                                  epochs=EPOCHS,
                                  steps_per_epoch=(train_generator.n // BATCH_SIZE) + 1, 
                                  verbose=VERBOSE,
                                  validation_data=val_generator,
                                  validation_steps=(val_generator.n // BATCH_SIZE) + 1,
                                  callbacks=[learning_rate_reduction, save_model]
                             )
weights = model.load_weights(model_out_dir + "/model.h5")
weights
# validation_data=validation_gen,
#                                   validation_steps=(val_generator.n // BATCH_SIZE) + 1,

Epoch 1/25

Epoch 00001: val_f1_m improved from -inf to 0.73440, saving model to output\test/model.h5
Epoch 2/25

Epoch 00002: val_f1_m improved from 0.73440 to 0.75880, saving model to output\test/model.h5
Epoch 3/25

Epoch 00003: val_f1_m improved from 0.75880 to 0.78071, saving model to output\test/model.h5
Epoch 4/25

Epoch 00004: val_f1_m did not improve from 0.78071
Epoch 5/25

Epoch 00005: val_f1_m did not improve from 0.78071
Epoch 6/25

Epoch 00006: val_f1_m improved from 0.78071 to 0.78344, saving model to output\test/model.h5
Epoch 7/25

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 00007: val_f1_m did not improve from 0.78344
Epoch 8/25

Epoch 00008: val_f1_m did not improve from 0.78344
Epoch 9/25

Epoch 00009: val_f1_m did not improve from 0.78344
Epoch 10/25

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 00010: val_f1_m did not improve from 0.78344
Epoch 11/25

Epoch 00011: val_f1_m did not

OSError: Unable to create file (unable to open file: name = 'output\test/model.h5', errno = 22, error message = 'Invalid argument', flags = 13, o_flags = 302)

In [20]:
model.load_weights(model_out_dir + "/model.h5")

val_generator.reset()
test_generator.reset()
# loss_v, accuracy_v = model.evaluate_generator(val_generator, steps=(val_generator.n // BATCH_SIZE) + 1, verbose=VERBOSE)
# loss, accuracy = model.evaluate_generator(test_generator, steps=(test_generator.n // BATCH_SIZE) + 1, verbose=VERBOSE)
# print("Validation: accuracy = %f  ;  loss_v = %f" % (accuracy_v, loss_v))
# print("Test: accuracy = %f  ;  loss_v = %f" % (accuracy, loss))

# plot_model_history(history, out_path=model_out_dir)
test_generator.reset()
y_pred = model.predict_generator(test_generator, steps=test_generator.n, verbose=VERBOSE)
# y_true = test_generator.classes
# plot_confusion_matrix(y_true, y_pred.argmax(axis=-1), labels, out_path=model_out_dir)
# class_report = classification_report(y_true, y_pred.argmax(axis=-1), target_names=labels)

# with open(model_out_dir + "/classification_report.txt", "w") as text_file:
#     text_file.write("%s" % class_report)
# print(class_report)



In [30]:
y_true = test_generator.classes

In [25]:
# y_pred

In [23]:
y_pred[y_pred < 0.55]

array([0.54958355, 0.5343794 , 0.5356111 , 0.5368543 , 0.536808  ,
       0.54076827, 0.5452594 , 0.53996164, 0.54585993, 0.53773266,
       0.27366996, 0.5456502 , 0.5442275 , 0.5415915 , 0.5404472 ,
       0.5326773 , 0.543449  , 0.5374627 , 0.5387341 , 0.52867323,
       0.54094344, 0.5492553 , 0.5424087 , 0.54858446, 0.53492516,
       0.54477113, 0.53966707, 0.5361723 , 0.5385075 , 0.5407376 ,
       0.5374603 , 0.5429233 , 0.54473805, 0.5455593 , 0.5380229 ,
       0.54100645, 0.53912735, 0.5010425 , 0.5399611 , 0.5489154 ,
       0.5404606 , 0.545887  , 0.54810524, 0.5401783 , 0.5337636 ,
       0.5453385 , 0.5464207 , 0.5378585 , 0.5482451 , 0.5472656 ,
       0.53730965, 0.5348318 , 0.5446014 , 0.5440617 , 0.5398571 ,
       0.5488047 , 0.54207   , 0.54665846, 0.5467832 , 0.54499847,
       0.5499784 , 0.5392333 , 0.54463816, 0.54931635, 0.5385078 ,
       0.54703474, 0.5392014 , 0.54351526, 0.54092616, 0.5352765 ,
       0.5470838 , 0.5410812 , 0.54694957, 0.5389671 , 0.54887

In [48]:
test = np.where(y_pred > 0.51, 1, 0)
test;

In [None]:
test_labeled['label'].value_counts()

In [49]:
492/783

0.6283524904214559

In [50]:
from sklearn.metrics import accuracy_score
accuracy_score(y_true, test)

0.6309067688378033

In [None]:
y_pred[y_pred < 0.55]

In [None]:
y_pred = model.predict_generator(test_generator, verbose=VERBOSE)
y_pred.argmax(axis=-1)

In [None]:
kaas = y_pred > 0.5
kaas

In [None]:
y_pred

In [None]:
predicted_class_indices = np.argmax(y_pred, axis=1)


In [None]:
test = np.argmax(y_pred, axis=1)

labels = (train_generator.class_indices)
labels

In [None]:
y_pred

In [None]:
def plot_model_history(model_history, out_path=""):
    fig, axs = plt.subplots(1, 2, figsize=(15, 5))
    # summarize history for accuracy
    axs[0].plot(range(1, len(model_history.history['accuracy']) + 1), model_history.history['accuracy'])
    axs[0].plot(range(1, len(model_history.history['val_accuracy']) + 1), model_history.history['val_accuracy'])
    axs[0].set_title('Model Accuracy')
    axs[0].set_ylabel('Accuracy')
    axs[0].set_xlabel('Epoch')
    axs[0].legend(['train', 'val'], loc='best')
    # summarize history for loss
    axs[1].plot(range(1, len(model_history.history['loss']) + 1), model_history.history['loss'])
    axs[1].plot(range(1, len(model_history.history['val_loss']) + 1), model_history.history['val_loss'])
    axs[1].set_title('Model Loss')
    axs[1].set_ylabel('Loss')
    axs[1].set_xlabel('Epoch')
    axs[1].legend(['train', 'val'], loc='best')
    # save the graph in a file called "acc_loss.png" to be available for later; the model_name is provided when creating and training a model
    if out_path:
        plt.savefig(out_path + "/acc_loss.png")
    plt.show()

def plot_confusion_matrix(y_true, y_pred, classes, out_path=""):
    cm = confusion_matrix(y_true, y_pred)
    df_cm = pd.DataFrame(cm, index=[i for i in classes], columns=[i for i in classes])
    plt.figure(figsize=(40, 40))
    ax = sn.heatmap(df_cm, annot=True, square=True, fmt="d", linewidths=.2, cbar_kws={"shrink": 0.8})
    if out_path:
        plt.savefig(out_path + "/confusion_matrix.png")  # as in the plot_model_history, the matrix is saved in a file called "model_name_confusion_matrix.png"
    return ax    

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
y_true = test_generator.classes

In [None]:
y_pred

In [None]:
accuracy_score(y_true, y_pred)

In [None]:
y_pred = model.predict_generator(test_generator, steps=test_generator.n, verbose=VERBOSE)
len(y_pred)

In [None]:
len(y_pred)

In [None]:
len(y_true)

In [None]:
test_labeled.shape

In [None]:
np.unique(y_pred)