In [25]:
import pandas as pd
import numpy as np
import os, random
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
import shutil
from PIL import Image
from keras import backend as K

# Preprocessing

In [26]:
#Import labels
l1 = pd.read_csv('label_map.txt', delim_whitespace=True, names=['Plankton', 'Index_number'])
l1.set_index(['Index_number'], inplace=True)

#Import train labels
train_labels = pd.read_csv('train_onelabel.csv')

In [27]:
#Paths of imagery
train_data_path = 'data/train_images'
test_data_path = 'data/test_images'

In [28]:
#Add label names to train_labels
labels = pd.merge(train_labels, l1, how='outer', left_on='class', right_index=True, copy=True)

In [29]:
#Set image dimensions
img_width = 224
img_height = 224

#Paths of new folders
train_preprocessed_path = 'data/train_processed'
test_preprocessed_path = 'data/test_processed'

In [30]:
#Make new dirs
os.mkdir('data/train_processed')
os.mkdir('data/test_processed')

#List directories
train_data_listing = os.listdir(train_data_path)
test_data_listing = os.listdir(test_data_path)

print('Number of train images:', len(train_data_listing))

#Resize images to 224x224 and add to path 'data/train_preprocessed'
for file in train_data_listing:
    im = Image.open(train_data_path + '\\' + file)
    img = im.resize((img_width, img_height))
    img.save(train_preprocessed_path + '\\' +file, 'JPEG')
    
print('Finished with resizing Train Data')
print('New image size: ', img_width, "x", img_height, '\n')
print('Number of test images:', len(test_data_listing))
    
#Resize images to 224x224 and add to path 'data/test_preprocessed'
for file in test_data_listing:
    im = Image.open(test_data_path + '\\' + file)
    img = im.resize((img_width, img_height))
    img.save(test_preprocessed_path + '\\' +file, 'JPEG')
    
print('Finished with resizing Test Data')
print('New image size: ', img_width, "x", img_height)

Number of train images: 24204
Finished with resizing Train Data
New image size:  224 x 224 

Number of test images: 6132
Finished with resizing Test Data
New image size:  224 x 224


In [31]:
#Image name and label to dictionary
label_dict = labels.set_index('image')['Plankton'].to_dict()

#Move files to subfolders
for file in train_data_listing:
    for key, value in label_dict.items():
        if file == key:
            path = os.path.join(train_preprocessed_path, value)
            if not os.path.exists(path):
                os.makedirs(path)
            shutil.move(os.path.join(train_preprocessed_path, file), path)
            
print('Finished with moving files to subfolders')

Finished with moving files to subfolders


In [32]:
#Specify validation split
validation_split = 0.2
#List Train preprocessed folder
Train_preprocessed_listing = os.listdir(train_preprocessed_path)
#Folder for validation split
os.mkdir('data/validation_split')
validation_folder = 'data/validation_split'
#For loop for creating validation split
for folder in Train_preprocessed_listing:
    subfolder = os.listdir(os.path.join(train_preprocessed_path, folder))
    num_files_in_folder = len(os.listdir(os.path.join(train_preprocessed_path, folder)))
    validation_copy = int(np.ceil((num_files_in_folder * validation_split)))
    for file in range(validation_copy):
        random_file = random.choice(subfolder)
        validation_path = os.path.join(validation_folder, folder)
        if not os.path.exists(validation_path):
            os.makedirs(validation_path)
        try:
            shutil.move(os.path.join(os.path.join(train_preprocessed_path, folder), random_file), validation_path)
        except Exception:
            continue
print('Finished with creating validation split folder')

Finished with creating validation split folder


# Test Run

In [41]:
train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

In [42]:
#Load one image
img = load_img('data/train_processed/acantharia_protist_halo/4844.jpg')
#Convert to Numpy array (xx, xx, xx)
x = img_to_array(img)
print('Image shape:', x.shape)
#Numpy array with shape (1, xx, xx, xx)
x = x.reshape((1,) + x.shape)
#print shape
print('New shape:', x.shape)

Image shape: (224, 224, 3)
New shape: (1, 224, 224, 3)


In [43]:
#Create test images for preview
i = 0
for batch in train_datagen.flow(x, batch_size=1, save_to_dir='data/preview', save_prefix='acantharia_protist_halo', save_format='jpeg'):
    i += 1
    if i > 20:
        break
        
print('Check the new folder data\preview for what these lines of code did')

Check the new folder data\preview for what these lines of code did


# First Keras model run

In [52]:
#Parameters
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50
batch_size = 16

In [53]:
#Prepare data
if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

In [54]:
#Build model
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [55]:
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

In [56]:
train_generator = train_datagen.flow_from_directory(
    train_preprocessed_path,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

Found 19730 images belonging to 121 classes.


In [57]:
test_datagen = ImageDataGenerator(rescale=1. / 255)

In [58]:
validation_generator = test_datagen.flow_from_directory(
    validation_folder,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

Found 4474 images belonging to 121 classes.


In [59]:
model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size)

model.save_weights('first_try.h5')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
