In [1]:
#Import libraries for doing image analysis
from skimage.io import imread
from skimage.transform import resize
from sklearn.ensemble import RandomForestClassifier as RF
import glob
import os
from sklearn import cross_validation
from sklearn.cross_validation import StratifiedKFold as KFold
from sklearn.metrics import classification_report
from matplotlib import pyplot as plt
from matplotlib import colors
from pylab import cm
from skimage import segmentation
from skimage.morphology import watershed
from skimage import measure
from skimage import morphology
import numpy as np
import pandas as pd
from scipy import ndimage
from skimage.feature import peak_local_max
# make graphics inline
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")



In [2]:
#keras imports
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.callbacks import TensorBoard

Using TensorFlow backend.


In [3]:
from keras import backend as K
print(K.image_data_format())
K.set_image_data_format('channels_first')
print(K.image_data_format())

channels_last
channels_first


In [4]:
# check for GPU
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 16118063148174085873
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 3282167398
locality {
  bus_id: 1
}
incarnation: 1063493276835867971
physical_device_desc: "device: 0, name: GeForce GTX 950M, pci bus id: 0000:01:00.0, compute capability: 5.0"
]


## Importing the data

In [5]:
data_dir = 'data'

In [6]:
df_train_labels = pd.read_csv(os.path.join(data_dir,'train_onelabel.csv'))
df_train_labels.head()

Unnamed: 0,image,class
0,132103.jpg,0
1,66467.jpg,0
2,9143.jpg,0
3,20630.jpg,0
4,33689.jpg,0


In [7]:
df_label_map = pd.read_csv(os.path.join(data_dir,'label_map.txt'), sep=" ", header=None, names=["label", "class"])
df_label_map.head()

Unnamed: 0,label,class
0,unknown_unclassified,0
1,unknown_sticks,1
2,protist_star,2
3,copepod_cyclopoid_oithona,3
4,hydromedusae_solmundella,4


## Preparing Training Data

In [310]:
# hyperparameters
IMG_SIZE = 64

In [268]:
def preprocess_img(img):
    # rescale to standard size
    img = resize(img, (IMG_SIZE, IMG_SIZE))
    return img

In [269]:
# Example image
example_file = glob.glob(os.path.join(data_dir,'train_images/*.jpg'))[12]
print(example_file)
im = imread(example_file, as_grey=True)
plt.imshow(im, cmap=cm.gray)
plt.show()

data\train_images\100088.jpg


AttributeError: 'numpy.ndarray' object has no attribute 'gray'

In [None]:
new_im = preprocess_img(im)
plt.imshow(new_im, cmap=cm.gray)
plt.show()

In [270]:
def get_label(file_name):
    return df_train_labels.loc[df_train_labels['image'] == file_name]['class'].values[0]

In [311]:
#get the total training images
number_of_images = 0
for _, _, fileNames in os.walk(os.path.join(data_dir,'train_images')): 
    for fileName in fileNames:
        # Only read in the images
        if fileName[-4:] != ".jpg":
            continue
        number_of_images += 1
        
print('Number of images:', number_of_images)

imgs = []
labels = []
i = 0

print('Reading images...')

for root, _, file_names in os.walk(os.path.join(data_dir,'train_images')): # change in train_images
    for file_name in file_names:
        # Only read in the images
        if fileName[-4:] != ".jpg":
            continue
        
        img_path = "{0}{1}{2}".format(root, os.sep, file_name)
        img = preprocess_img(imread(img_path, as_grey=True))
        imgs.append(img)
        
        label = get_label(file_name)
        labels.append(label)
        
        i += 1
        # report progress for each 5% done  
        report = [int((j+1)*number_of_images/20.) for j in range(20)]
        if i in report: print(np.ceil(i *100.0 / number_of_images), "% done")

Number of images: 24204
Reading images...
5.0 % done
10.0 % done
15.0 % done
20.0 % done
25.0 % done
30.0 % done
35.0 % done
40.0 % done
45.0 % done
50.0 % done
55.0 % done
60.0 % done
65.0 % done
70.0 % done
75.0 % done
80.0 % done
85.0 % done
90.0 % done
95.0 % done
100.0 % done


In [313]:
X = np.array(imgs, dtype='float32')
X.shape

(24204, 64, 64)

In [314]:
X = X.reshape(X.shape[0], 1, IMG_SIZE, IMG_SIZE)
X.shape

(24204, 1, 64, 64)

In [315]:
Y = np_utils.to_categorical(labels)
Y.shape

(24204, 121)

In [316]:
#get the total training images
number_of_images = 0
for _, _, fileNames in os.walk(os.path.join(data_dir,'test_images')): 
    for fileName in fileNames:
        # Only read in the images
        if fileName[-4:] != ".jpg":
            continue
        number_of_images += 1
        
print('Number of images:', number_of_images)

imgs = []
test_img_names = []
i = 0

print('Reading test images...')

for root, _, file_names in os.walk(os.path.join(data_dir,'test_images')): # change in train_images
    for file_name in file_names:
        # Only read in the images
        if fileName[-4:] != ".jpg":
            continue
        
        img_path = "{0}{1}{2}".format(root, os.sep, file_name)
        img = preprocess_img(imread(img_path, as_grey=True))
        imgs.append(img)
        
        test_img_names.append(file_name)
        
        i += 1
        # report progress for each 5% done  
        report = [int((j+1)*number_of_images/20.) for j in range(20)]
        if i in report: print(np.ceil(i *100.0 / number_of_images), "% done")

Number of images: 6132
Reading test images...
5.0 % done
10.0 % done
15.0 % done
20.0 % done
25.0 % done
30.0 % done
35.0 % done
40.0 % done
45.0 % done
50.0 % done
55.0 % done
60.0 % done
65.0 % done
70.0 % done
75.0 % done
80.0 % done
85.0 % done
90.0 % done
95.0 % done
100.0 % done


In [317]:
X_test = np.array(imgs, dtype='float32')
X_test.shape

(6132, 64, 64)

In [318]:
X_test = X_test.reshape(X_test.shape[0], 1, IMG_SIZE, IMG_SIZE)
X_test.shape

(6132, 1, 64, 64)

## Define Keras Models

### Unnamed model

In [278]:
NUM_CLASSES = 121

def cnn_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same', 
                     input_shape=(1, IMG_SIZE, IMG_SIZE), 
                     activation='relu', data_format='channels_first'))
    model.add(Conv2D(32, (3, 3), activation='relu', data_format='channels_first'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(64, (3, 3), padding='same',
                     activation='relu', data_format='channels_first'))
    model.add(Conv2D(64, (3, 3), activation='relu', data_format='channels_first'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(128, (3, 3), padding='same',
                     activation='relu', data_format='channels_first'))
    model.add(Conv2D(128, (3, 3), activation='relu', data_format='channels_first'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(NUM_CLASSES, activation='softmax'))
    return model


model = cnn_model()

In [279]:
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [280]:
tensorboard = TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=False)

In [281]:
model.fit(X, Y, validation_split=0.2, batch_size=32, epochs=10, verbose=1, callbacks=[tensorboard])

Train on 19363 samples, validate on 4841 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

KeyboardInterrupt: 

In [None]:
model.save_weights('model.h5')  # always save your weights after training or during training

In [None]:
model.summary()

$ CategoryAccuracy = \frac{1}{N} \sum_{y_i = \hat{y}_i} 1,$

In [None]:
score = model.evaluate(X, Y, verbose=0)
print("Accuracy: ", score[1])
#.68

#0.72

#0.8 with img size 90

In [None]:
y_pred = model.predict_classes(X_test)

In [None]:
y_pred[:4]

In [None]:
df_submit = pd.DataFrame(list(zip(test_img_names, y_pred)),columns=['image','class'])
df_submit.head()

In [None]:
# export dataframe to csv file for submission
df_submit.to_csv('submission', sep=',', index=False)

### Cifar10 CNN Model

In [None]:
def model_cifar10():
    
    n_filters = 64
    filter_size1 = 3
    filter_size2 = 2
    pool_size1 = 3
    pool_size2 = 1
    n_dense = 128

    model = Sequential()

    model.add(Conv2D(64, filter_size1, filter_size1, 
                            batch_input_shape=(None, 1, IMG_SIZE, IMG_SIZE), activation='relu', border_mode='valid'))

    model.add(MaxPooling2D(pool_size=(pool_size1, pool_size1)))

    model.add(Conv2D(128, filter_size2, filter_size2, activation='relu', border_mode='valid'))

    model.add(MaxPooling2D(pool_size=(pool_size2, pool_size2)))

    model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(512))

    model.add(Activation('relu'))

    model.add(Dropout(0.5))

    model.add(Dense(NUM_CLASSES))

    model.add(Activation('softmax'))


#     model = Sequential()
#     model.add(Conv2D(32, (3, 3), padding='same',
#                      input_shape=(1, IMG_SIZE, IMG_SIZE), data_format='channels_first'))
#     model.add(Activation('relu'))
#     model.add(Conv2D(32, (3, 3), data_format='channels_first'))
#     model.add(Activation('relu'))
#     model.add(MaxPooling2D(pool_size=(2, 2)))
#     model.add(Dropout(0.25))

#     model.add(Conv2D(64, (3, 3), padding='same', data_format='channels_first'))
#     model.add(Activation('relu'))
#     model.add(Conv2D(64, (3, 3), data_format='channels_first'))
#     model.add(Activation('relu'))
#     model.add(MaxPooling2D(pool_size=(2, 2)))
#     model.add(Dropout(0.25))
    
# #     model.add(Conv2D(128, (3, 3), data_format='channels_first'))
# #     model.add(Activation('relu'))
# #     model.add(MaxPooling2D(pool_size=(2, 2)))
# #     model.add(Dropout(0.25))
    
#     model.add(Flatten())
#     model.add(Dense(512))
#     model.add(Activation('relu'))
#     model.add(Dropout(0.5))
#     model.add(Dense(NUM_CLASSES))
#     model.add(Activation('softmax'))
    return model


model_cifar10 = model_cifar10()

In [None]:
#     model = Sequential()
#     model.add(Conv2D(32, (3, 3), padding='same',
#                      input_shape=(1, IMG_SIZE, IMG_SIZE), data_format='channels_first'))
#     model.add(Activation('relu'))
#     model.add(Conv2D(32, (3, 3), data_format='channels_first'))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(MaxPooling2D(pool_size=(2, 2)))
#     model.add(Dropout(0.25))

#     model.add(Conv2D(128, (3, 3), padding='same', data_format='channels_first'))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(Conv2D(128, (3, 3), data_format='channels_first'))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(Conv2D(64, (3, 3), data_format='channels_first'))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(MaxPooling2D(pool_size=(2, 2)))
#     model.add(Dropout(0.25))
    
#     model.add(Conv2D(256, (3, 3), data_format='channels_first'))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(Conv2D(256, (3, 3), data_format='channels_first'))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(Conv2D(128, (3, 3), data_format='channels_first'))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(MaxPooling2D(pool_size=(2, 2)))
#     model.add(Dropout(0.3))
    
#     model.add(Flatten())
#     model.add(Dense(512))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(Dropout(0.5))
#     model.add(Dense(NUM_CLASSES))
#     model.add(Activation('softmax'))


# 0.69 model below

# model = Sequential()
#     model.add(Conv2D(32, (3, 3), padding='same',
#                      input_shape=(1, IMG_SIZE, IMG_SIZE), data_format='channels_first'))
#     model.add(Activation('relu'))
#     model.add(Conv2D(32, (3, 3), data_format='channels_first'))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(MaxPooling2D(pool_size=(2, 2)))
#     model.add(Dropout(0.25))

#     model.add(Conv2D(64, (3, 3), padding='same', data_format='channels_first'))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(Conv2D(64, (3, 3), data_format='channels_first'))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(Conv2D(128, (3, 3), data_format='channels_first'))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(MaxPooling2D(pool_size=(2, 2)))
#     model.add(Dropout(0.3))
    
#     model.add(Conv2D(128, (3, 3), data_format='channels_first'))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(Conv2D(256, (3, 3), data_format='channels_first'))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(MaxPooling2D(pool_size=(2, 2)))
#     model.add(Dropout(0.3))
    
#     model.add(Flatten())
#     model.add(Dense(512))
#     model.add(keras.layers.LeakyReLU(alpha=0.3))
#     model.add(Dropout(0.5))
#     model.add(Dense(NUM_CLASSES))
#     model.add(Activation('softmax'))

In [335]:
def model_cifar10():

    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same',
                     input_shape=(1, IMG_SIZE, IMG_SIZE), data_format='channels_first'))
    model.add(keras.layers.LeakyReLU(alpha=0.3))
    model.add(Conv2D(32, (3, 3), data_format='channels_first'))
    model.add(keras.layers.LeakyReLU(alpha=0.3))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same', data_format='channels_first'))
    model.add(keras.layers.LeakyReLU(alpha=0.3))
    model.add(Conv2D(64, (3, 3), data_format='channels_first'))
    model.add(keras.layers.LeakyReLU(alpha=0.3))
    model.add(Conv2D(128, (3, 3), data_format='channels_first'))
    model.add(keras.layers.LeakyReLU(alpha=0.3))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    
    model.add(Conv2D(128, (3, 3), data_format='channels_first'))
    model.add(keras.layers.LeakyReLU(alpha=0.3))
    model.add(Conv2D(256, (3, 3), data_format='channels_first'))
    model.add(keras.layers.LeakyReLU(alpha=0.3))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.3))
    
    model.add(Flatten())
    model.add(Dense(512))
    model.add(keras.layers.LeakyReLU(alpha=0.3))
    model.add(Dropout(0.5))
    model.add(Dense(NUM_CLASSES))
    model.add(Activation('softmax'))
    
    return model

model_cifar10 = model_cifar10()

In [336]:
from keras.optimizers import SGD

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

model_cifar10.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])

In [None]:
# import keras

# # initiate RMSprop optimizer
# opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# # Let's train the model using RMSprop
# model_cifar10.compile(loss='categorical_crossentropy',
#               optimizer=opt,
#               metrics=['accuracy'])

In [321]:
import keras

tb_cifar10 = TensorBoard(log_dir='./logs_cifar10', histogram_freq=0, write_graph=True, write_images=False)

esCallback = keras.callbacks.EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience=5,
                              verbose=0, mode='auto')

checkpointCallback = keras.callbacks.ModelCheckpoint('./best_model_checkpoint.hdf5', monitor='val_loss',
                                verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)

In [None]:
model_cifar10.fit(X, Y, validation_split=0.15, batch_size=64, epochs=3, verbose=1, callbacks=[tb_cifar10, esCallback])

In [None]:
model_cifar10.save_weights('model_cifar12.h5')

In [None]:
score = model_cifar10.evaluate(X, Y, verbose=0)
print("Accuracy: ", score[1])

# 0.8266

# 0.86 adding one layer 128

# zmienic img size do 90x90 ? wtedy bardziej optymalny przeplyw? albo cos podzielnego przez 3, np 60x60

In [None]:
y_pred = model_cifar10.predict_classes(X_test)

In [None]:
y_pred = model_cifar10.predict_classes(X_val)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

cr = classification_report(np.argmax(Y_val,axis=1), y_pred, target_names=df_label_map['label'])
print(cr)

cm = confusion_matrix(np.argmax(Y_val,axis=1), y_pred)
print(cm)

plt.matshow(cm)
plt.title('Confusion matrix')
plt.colorbar()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

In [None]:
df_submit = pd.DataFrame(list(zip(test_img_names, y_pred)),columns=['image','class'])
df_submit.head()

In [None]:
# export dataframe to csv file for submission
df_submit.to_csv('submission_cifar10_1', sep=',', index=False)

#### CIFAR10 with augmentation

In [322]:
from sklearn.model_selection import train_test_split
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

In [286]:
tb_cifar10_aug = TensorBoard(log_dir='./logs_cifar10_aug', histogram_freq=0, write_graph=True, write_images=False)

In [337]:
from keras.preprocessing.image import ImageDataGenerator

batch_size = 64
epochs = 100

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=5,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

# Compute quantities required for feature-wise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(X_train)

steps_per_epoch = int(np.ceil(X_train.shape[0] / float(batch_size)))

# Fit the model on the batches generated by datagen.flow() ==> Realtime data augmentation
model_cifar10.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size), 
                    epochs=epochs, validation_data=(X_val, Y_val), verbose=1, 
                            steps_per_epoch=steps_per_epoch, workers=4, callbacks=[tb_cifar10_aug, checkpointCallback])
#, esCallback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100


Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x23cdaaa3ac8>

In [340]:
#model_cifar10.save_weights('model_cifar10_augmentation2_66.h5')

model_cifar10 = keras.models.load_model('best_model_checkpoint.hdf5')

# POWTORZ TRENOWANIE TEGO MODELU ALE Z EARLY STOP! ^^^^^^^^^^^^^^^^^^^^^^^^

In [341]:
# Score trained model.
scores = model_cifar10.evaluate(X_val, Y_val, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

#0.640983267969

# 0.6587.. with img size 64 and one conv 128 added

Test loss: 0.989741425282
Test accuracy: 0.710803553022


In [342]:
y_pred = model_cifar10.predict_classes(X_test)
df_submit = pd.DataFrame(list(zip(test_img_names, y_pred)),columns=['image','class'])
df_submit.to_csv('submission_cifar10_augmentation', sep=',', index=False)

### Model 3

## Data Augmentation

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from sklearn.cross_validation import train_test_split

datagen = ImageDataGenerator(featurewise_center=False,
                             featurewise_std_normalization=False,
                             width_shift_range=0.1,
                             height_shift_range=0.1,
                             zoom_range=0.2,
                             shear_range=0.1,
                             rotation_range=30.)

datagen.fit(X)

# Reinitialize model and compile
model = cnn_model()

model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# Train again
epochs = 50
batch_size = 32
steps_per_epoch = int(np.ceil(X_train.shape[0] / float(batch_size)))

model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size), validation_data=(X_val, Y_val), steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=[tensorboard], workers=4)

In [None]:
scores = model.evaluate(X_val, Y_val, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

y_pred = model.predict_classes(X_test)

In [None]:
df_submit3 = pd.DataFrame(list(zip(test_img_names, y_pred)),columns=['image','class'])
df_submit3.head()

In [None]:
# export dataframe to csv file for submission
df_submit3.to_csv('submission_augmentation.csv', sep=',', index=False)

## Plot augmented images

In [None]:
# define data preparation
datagen = ImageDataGenerator(featurewise_center=False,
                             featurewise_std_normalization=False,
                             width_shift_range=0.1,
                             height_shift_range=0.1,
                             zoom_range=0.2,
                             shear_range=0.1,
                             rotation_range=10.)
# fit parameters from data
datagen.fit(X)
# configure batch size and retrieve one batch of images
for X_batch, Y_batch in datagen.flow(X, Y, batch_size=9, shuffle=False):
    # create a grid of 3x3 images
    for i in range(0, 9):
        plt.subplot(330 + 1 + i)
        plt.imshow(X_batch[i].reshape(40, 40), cmap=plt.get_cmap('gray'))
    # show the plot
    plt.show()
    break
    


Visualize filters

In [None]:
from keras import applications

# build the VGG16 network
#model = applications.VGG16(include_top=False,
#                           weights='imagenet')

# get the symbolic outputs of each "key" layer (we gave them unique names).
layer_dict = dict([(layer.name, layer) for layer in model.layers])

In [None]:
from keras import backend as K

layer_name = 'block5_conv3'
filter_index = 0  # can be any integer from 0 to 511, as there are 512 filters in that layer

# build a loss function that maximizes the activation
# of the nth filter of the layer considered
layer_output = layer_dict[layer_name].output
loss = K.mean(layer_output[:, :, :, filter_index])

# compute the gradient of the input picture wrt this loss
grads = K.gradients(loss, input_img)[0]

# normalization trick: we normalize the gradient
grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)

# this function returns the loss and grads given the input picture
iterate = K.function([input_img], [loss, grads])

In [None]:
import numpy as np

# we start from a gray image with some noise
input_img_data = np.random.random((1, 3, img_width, img_height)) * 20 + 128.
# run gradient ascent for 20 steps
for i in range(20):
    loss_value, grads_value = iterate([input_img_data])
    input_img_data += grads_value * step

In [None]:
from scipy.misc import imsave

# util function to convert a tensor into a valid image
def deprocess_image(x):
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + 1e-5)
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    x = x.transpose((1, 2, 0))
    x = np.clip(x, 0, 255).astype('uint8')
    return x

img = input_img_data[0]
img = deprocess_image(img)
imsave('%s_filter_%d.png' % (layer_name, filter_index), img)