In [20]:
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Conv2D, MaxPooling2D
from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense
from keras.models import model_from_json
from keras.preprocessing import image
import numpy as np
import pandas as pd
import os

from keras.utils import CustomObjectScope
from keras.initializers import glorot_uniform


In [11]:
# Useful functions

def load_image(img_path, show=False):

    img = image.load_img(img_path, target_size=(img_width, img_height))
    img_tensor = image.img_to_array(img)                    # (height, width, channels)
    img_tensor = np.expand_dims(img_tensor, axis=0)         # (1, height, width, channels), add a dimension because the model expects this shape: (batch_size, height, width, channels)
    img_tensor /= 255.                                      # imshow expects values in the range [0, 1]

    if show:
        plt.imshow(img_tensor[0])                           
        plt.axis('off')
        plt.show()

    return img_tensor

def save_model(model):
    json_string = model.to_json()
    if not os.path.isdir('cache'):
        os.mkdir('cache')
    open(os.path.join('cache', 'architecture.json'), 'w').write(json_string)
    model.save_weights(os.path.join('cache', 'model_weights.h5'), overwrite=True)


def read_model():
    model = model_from_json(open(os.path.join('cache', 'architecture.json')).read())
    model.load_weights(os.path.join('cache', 'model_weights.h5'))
    return model

def create_submission(predictions, test_id):
    result = pd.DataFrame(predictions, columns=['id', 'label'])
    result.loc[:, 'img'] = pd.Series(test_id, index=result.index)
    if not os.path.isdir('subm'):
        os.mkdir('subm')

def is_cleaned(pred_value):
    if pred_value < 0.5:
        return 'cleaned'
    else:
        return 'dirty'


In [12]:
train_dir = 'superbowllsh/train'
test_dir = 'superbowllsh/test'
img_width, img_height = 341, 256

input_shape = (img_width, img_height, 3)
epochs = 30
batch_size = 16

numb_train_samples = 40
numb_test_samples = 660

In [13]:
# Neural network architecture
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [14]:
# Uploading images
datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

Found 40 images belonging to 2 classes.


In [15]:
# Training
model.fit_generator(
    train_generator,
    steps_per_epoch=numb_train_samples // batch_size,
    epochs=epochs)
save_model(model)

Epoch 1/30
Epoch 2/30
Epoch 3/30


KeyboardInterrupt: 

In [21]:
# Get result
with CustomObjectScope({'GlorotUniform': glorot_uniform()}):
    model = read_model()
    
    result = pd.DataFrame({'id': [], 'label': []})
    i = 0
    for filename in os.listdir(test_dir):
        if filename.endswith(".jpg"): 
            img_id, _ = filename.split('.')
            img_path = 'superbowllsh/test/' + filename
            new_image = load_image(img_path)
            pred = model.predict(new_image)

            result.loc[i] = [img_id] + [is_cleaned(pred)]
            i += 1

    print(result)
        
        

       id    label
0    0615  cleaned
1    0071    dirty
2    0523    dirty
3    0148    dirty
4    0564  cleaned
5    0552    dirty
6    0089  cleaned
7    0609  cleaned
8    0372    dirty
9    0420    dirty
10   0490    dirty
11   0238    dirty
12   0576    dirty
13   0390    dirty
14   0340  cleaned
15   0117    dirty
16   0022    dirty
17   0285    dirty
18   0544  cleaned
19   0282    dirty
20   0640  cleaned
21   0016  cleaned
22   0280    dirty
23   0560  cleaned
24   0431  cleaned
25   0204  cleaned
26   0190    dirty
27   0120    dirty
28   0534    dirty
29   0601    dirty
..    ...      ...
630  0259    dirty
631  0207    dirty
632  0457    dirty
633  0657  cleaned
634  0209    dirty
635  0135    dirty
636  0306    dirty
637  0094    dirty
638  0599  cleaned
639  0414    dirty
640  0184  cleaned
641  0249    dirty
642  0439  cleaned
643  0021  cleaned
644  0292  cleaned
645  0061    dirty
646  0185    dirty
647  0610  cleaned
648  0568    dirty
649  0366  cleaned
650  0180   

In [22]:
# Save result to csv file
result = result.sort_values(by=['id'])
result.to_csv('submission.csv', index=False)