# Hot dog - Not Hot dog Classifier

This problem's purpose is to build a convolutional neural network to classify images as hot dogs or not-hot dogs. This is inspired by Jian-Yang from the popular HBO series "Silicon Valley" (https://www.youtube.com/watch?v=pqTntG1RXSY).  The dataset used here is put together by a user on Kaggle (https://www.kaggle.com/dansbecker/hot-dog-not-hot-dog) which contains 498 training images and 500 test images.

# Code

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, UpSampling2D

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
import keras
from keras.applications.mobilenet import MobileNet
from keras.applications.mobilenet import preprocess_input

In [3]:
mobilenet_base = MobileNet(weights='imagenet', include_top=False)













<keras.engine.training.Model at 0x17bba5781c8>

## Loading Hotdog-Not-Hotdog Dataset 

In [4]:
# Re-scaled dimensions of our images.
img_width, img_height = 150, 150

train_data_dir = 'C:/Users/anshu/projects/misc/hotdog_not_hotdog/train'
test_data_dir = 'C:/Users/anshu/projects/misc/hotdog_not_hotdog/test'

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

# Model

In [5]:
def mymodel():
    ''' 
        Simple model from: https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d
    '''
    model = Sequential()
    model.add(mobilenet_base)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    # Freeze layers in the base model (i.e. only train the classifier)
    for layer in mobilenet_base.layers:
        layer.trainable = False
    
    model.compile(loss='binary_crossentropy', metrics=['accuracy'], 
                  optimizer=keras.optimizers.RMSprop(lr=0.001))
    
    return model

# Test function
mymodel().summary()

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenet_1.00_224 (Model)   (None, None, None, 1024)  3228864   
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                65600     
_________________________________________________________________
activation_1 (Activation)    (None, 64)                0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
__________________________________________

In [6]:
# You may optionally change these parameters
batch_size = 50
epochs = 10
train_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)
test_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

# Data parameters (DO NOT MODIFY)
num_train_samples = 498
num_test_samples = 500

# Data generators (DO NOT MODIFY)
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary'
)

Found 498 images belonging to 2 classes.
Found 500 images belonging to 2 classes.


# Model Evaluation

In [7]:
def evaluate_model(runs=5):
    scores = [] 
    for i in range(runs):
        print('Executing run %d' % (i+1))
        model = mymodel()
        model.fit_generator(train_generator,
                            callbacks=[],
                            steps_per_epoch=num_train_samples // batch_size,
                            epochs=epochs, verbose=0)
        print(' * Evaluating model on test set')
        scores.append(model.evaluate_generator(test_generator, 
                                               steps=num_test_samples // batch_size,
                                               verbose=0))
        print(' * Test set Loss: %.4f, Accuracy: %.4f' % (scores[-1][0], scores[-1][1]))
        
    accuracies = [score[1] for score in scores]     
    return np.mean(accuracies), np.std(accuracies)
        
mean_accuracy, std_accuracy = evaluate_model(runs=5)

Executing run 1
 * Evaluating model on test set
 * Test set Loss: 0.5512, Accuracy: 0.7980
Executing run 2
 * Evaluating model on test set
 * Test set Loss: 0.6771, Accuracy: 0.7740
Executing run 3
 * Evaluating model on test set
 * Test set Loss: 0.3912, Accuracy: 0.8460
Executing run 4
 * Evaluating model on test set
 * Test set Loss: 0.4591, Accuracy: 0.8280
Executing run 5
 * Evaluating model on test set
 * Test set Loss: 0.4667, Accuracy: 0.8140


In [8]:
print('Mean test set accuracy over 5 runs: %.4f +/- %.4f' % (mean_accuracy, std_accuracy))  

Mean test set accuracy over 5 runs: 0.8120 +/- 0.0247
