In [1]:
import pandas as pd
import numpy as np
import os, random
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
import shutil
from PIL import Image
from keras import backend as K

Using TensorFlow backend.


# Preprocessing

In [2]:
#Import labels
l1 = pd.read_csv('label_map.txt', delim_whitespace=True, names=['Plankton', 'Index_number'])
l1.set_index(['Index_number'], inplace=True)

#Import train labels
train_labels = pd.read_csv('train_onelabel.csv')

In [3]:
#Paths of imagery
train_data_path = 'data/train_images'
test_data_path = 'data/test_images'

In [4]:
#Add label names to train_labels
labels = pd.merge(train_labels, l1, how='outer', left_on='class', right_index=True, copy=True)

In [5]:
#Set image dimensions
img_width = 95
img_height = 95

In [6]:
#List directories
train_data_listing = os.listdir(train_data_path)
test_data_listing = os.listdir(test_data_path)

In [33]:
#Image name and label to dictionary
label_dict = labels.set_index('image')['Plankton'].to_dict()

#Move files to subfolders
for file in train_data_listing:
    for key, value in label_dict.items():
        if file == key:
            path = os.path.join(train_data_path, value)
            if not os.path.exists(path):
                os.makedirs(path)
            shutil.move(os.path.join(train_data_path, file), path)
            
print('Finished with moving files to subfolders')

Finished with moving files to subfolders


In [34]:
#Specify validation split
validation_split = 0.2
#Folder for validation split
os.mkdir('data/validation_split')
validation_folder = 'data/validation_split'
#Relist new folder structure
train_data_listing = os.listdir(train_data_path)
#For loop for creating validation split
for folder in train_data_listing:
    subfolder = os.listdir(os.path.join(train_data_path, folder))
    num_files_in_folder = len(subfolder)
    validation_copy = int(np.ceil((num_files_in_folder * validation_split)))
    for file in range(validation_copy):
        random_file = random.choice(subfolder)
        validation_path = os.path.join(validation_folder, folder)
        if not os.path.exists(validation_path):
            os.makedirs(validation_path)
        try:
            shutil.move(os.path.join(os.path.join(train_data_path, folder), random_file), validation_path)
        except Exception:
            continue
print('Finished with creating validation split folder')

Finished with creating validation split folder


# Test Run

In [7]:
train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

In [8]:
#Load one image
img = load_img('data/train_images/acantharia_protist_halo/4844.jpg')
#Convert to Numpy array (xx, xx, xx)
x = img_to_array(img)
print('Image shape:', x.shape)
#Numpy array with shape (1, xx, xx, xx)
x = x.reshape((1,) + x.shape)
#print shape
print('New shape:', x.shape)

Image shape: (72, 70, 3)
New shape: (1, 72, 70, 3)


In [10]:
#Create test images for preview
os.mkdir('data/preview')
i = 0
for batch in train_datagen.flow(x, batch_size=1, save_to_dir='data/preview', save_prefix='acantharia_protist_halo', save_format='jpeg'):
    i += 1
    if i > 20:
        break
        
print('Check the new folder data\preview for what these lines of code did')

Check the new folder data\preview for what these lines of code did


# First Keras model run

In [7]:
#Parameters
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 360
batch_size = 32

#Set image dimensions
img_width = 95
img_height = 95

In [8]:
#Prepare data
if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

In [9]:
input_shape

(95, 95, 3)

In [10]:
from keras.layers.advanced_activations import LeakyReLU

#Build model
model = Sequential()
#cyclic slice
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(LeakyReLU(alpha=1/3))
model.add(Conv2D(16, (3, 3)))
model.add(LeakyReLU(alpha=1/3))
model.add(MaxPooling2D(pool_size=(3, 3), strides=2))

#Cyclic roll
model.add(Conv2D(64, (3, 3)))
model.add(LeakyReLU(alpha=1/3))
model.add(Conv2D(32, (3, 3)))
model.add(LeakyReLU(alpha=1/3))
model.add(MaxPooling2D(pool_size=(3, 3), strides=2))

#Cyclic roll
model.add(Conv2D(128, (3, 3)))
model.add(LeakyReLU(alpha=1/3))
model.add(Conv2D(128, (3, 3)))
model.add(LeakyReLU(alpha=1/3))
model.add(Conv2D(64, (3, 3)))
model.add(LeakyReLU(alpha=1/3))
model.add(MaxPooling2D(pool_size=(3, 3), strides=2))   

#Cyclic roll
model.add(Conv2D(256, (3, 3)))
model.add(LeakyReLU(alpha=1/3))
#model.add(Conv2D(256, (3, 3)))
#model.add(LeakyReLU(alpha=1/3))
#model.add(Conv2D(128, (3, 3)))
#model.add(LeakyReLU(alpha=1/3))
model.add(MaxPooling2D(pool_size=(3, 3), strides=2))

#Cyclic roll
model.add(Flatten())
model.add(Dense(512))
model.add(LeakyReLU(alpha=1/3))

#cyclic pooling(rms)
model.add(Dense(512))
model.add(LeakyReLU(alpha=1/3))
model.add(Dense(512))
model.add(LeakyReLU(alpha=1/3))
model.add(Dense(121))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

In [11]:
train_datagen = ImageDataGenerator(
        rotation_range=360,
        rescale=1./1.6,
        shear_range=0.2,
        channel_shift_range=10,
        zoom_range=0.2,
        horizontal_flip=True)

In [12]:
train_generator = train_datagen.flow_from_directory(
    train_data_path,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

Found 19782 images belonging to 121 classes.


In [13]:
test_datagen = ImageDataGenerator(rescale=1. / 255)

In [14]:
validation_folder = 'data/validation_split'
validation_generator = test_datagen.flow_from_directory(
    validation_folder,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

Found 4422 images belonging to 121 classes.


In [15]:
from keras.callbacks import EarlyStopping
early_stopping_monitor = EarlyStopping(patience=10)

In [None]:
model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size, 
    callbacks = [early_stopping_monitor])

model.save_weights('first_try.h5')
model.save('Model_1.h5')

Epoch 1/360
Epoch 2/360
Epoch 3/360
Epoch 4/360
Epoch 5/360
Epoch 6/360
Epoch 7/360
Epoch 8/360
Epoch 9/360
Epoch 10/360
Epoch 11/360
Epoch 12/360
Epoch 13/360
Epoch 14/360
Epoch 15/360
Epoch 16/360
Epoch 17/360
Epoch 18/360
Epoch 19/360
Epoch 20/360
Epoch 21/360
Epoch 22/360
Epoch 23/360
Epoch 24/360
Epoch 25/360
Epoch 26/360
Epoch 27/360
Epoch 28/360
Epoch 29/360
Epoch 30/360
Epoch 31/360
Epoch 32/360
Epoch 33/360
Epoch 34/360
Epoch 35/360
Epoch 36/360
Epoch 37/360
Epoch 38/360
Epoch 39/360
Epoch 40/360
Epoch 41/360
Epoch 42/360
Epoch 43/360
Epoch 44/360
Epoch 45/360
Epoch 46/360
Epoch 47/360
Epoch 48/360
Epoch 49/360
Epoch 50/360
Epoch 51/360
Epoch 52/360
Epoch 53/360
Epoch 54/360
Epoch 55/360
Epoch 56/360
Epoch 57/360
Epoch 58/360
Epoch 59/360
Epoch 60/360
Epoch 61/360
Epoch 62/360


Epoch 63/360
Epoch 64/360
Epoch 65/360
Epoch 66/360
Epoch 67/360
Epoch 68/360
Epoch 69/360
Epoch 70/360
Epoch 71/360
Epoch 72/360
Epoch 73/360


In [None]:
model = load_model('model_1.h5')