In [1]:
import pandas as pd
import numpy as np
import os
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
import shutil
from PIL import Image

Using TensorFlow backend.


In [2]:
#Import labels
l1 = pd.read_csv('label_map.txt', delim_whitespace=True, names=['Plankton', 'Index_number'])
l1.set_index(['Index_number'], inplace=True)

#Import train labels
train_labels = pd.read_csv('train_onelabel.csv')

In [3]:
#Paths of imagery
train_data_path = 'data/train_images'
test_data_path = 'data/test_images'

In [4]:
#Add label names to train_labels
labels = pd.merge(train_labels, l1, how='outer', left_on='class', right_index=True, copy=True)

In [5]:
#Make new dirs
os.mkdir('data/train_processed')
os.mkdir('data/test_processed')
#Paths of new folders
train_preprocessed_path = 'data/train_processed'
test_preprocessed_path = 'data/test_processed'

#Set image dimensions
img_rows = 224
img_cols = 224

#List directories
train_data_listing = os.listdir(train_data_path)
test_data_listing = os.listdir(test_data_path)

print('Number of train images:', len(train_data_listing))

#Resize images to 224x224 and add to path 'data/train_preprocessed'
for file in train_data_listing:
    im = Image.open(train_data_path + '\\' + file)
    img = im.resize((img_rows, img_cols))
    img.save(train_preprocessed_path + '\\' +file, 'JPEG')
    
print('Finished with resizing Train Data')
print('New image size: ', img_rows, "x", img_cols)
print('Number of test images:', len(test_data_listing), '\n')
    
#Resize images to 224x224 and add to path 'data/test_preprocessed'
for file in test_data_listing:
    im = Image.open(test_data_path + '\\' + file)
    img = im.resize((img_rows, img_cols))
    img.save(test_preprocessed_path + '\\' +file, 'JPEG')
    
print('Finished with resizing Test Data')
print('New image size: ', img_rows, "x", img_cols)

Number of train images: 24204
Finished with resizing Train Data
New image size:  224 x 224
Number of test images: 6132 

Finished with resizing Test Data
New image size:  224 x 224


In [6]:
#Image name and label to dictionary
label_dict = labels.set_index('image')['Plankton'].to_dict()

#Move files to subfolders
for file in train_data_listing:
    for key, value in label_dict.items():
        if file == key:
            path = os.path.join(train_preprocessed_path, value)
            if not os.path.exists(path):
                os.makedirs(path)
            shutil.move(os.path.join(train_preprocessed_path, file), path)
            
print('Finished with moving files to subfolders')

Finished with moving files to subfolders


# Code in development

In [9]:
train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

In [11]:
#Load one image
img = load_img('data/train_processed/acantharia_protist_halo/4844.jpg')
#Convert to Numpy array (xx, xx, xx)
x = img_to_array(img)
print('Image shape:', x.shape)
#Numpy array with shape (1, xx, xx, xx)
x = x.reshape((1,) + x.shape)
#print shape
print('New shape:', x.shape)

Image shape: (224, 224, 3)
New shape: (1, 224, 224, 3)


In [12]:
#Create test images for preview
i = 0
for batch in train_datagen.flow(x, batch_size=1, save_to_dir='data/preview', save_prefix='acantharia_protist_halo', save_format='jpeg'):
    i += 1
    if i > 20:
        break

In [13]:
#Build model
model = Sequential()

In [14]:
model.add(Dense(units=64, activation='relu', input_dim=100))
model.add(Dense(units=10, activation='softmax'))

In [15]:
model.compile(loss='categorical_crossentropy', optimizer='sgd')

In [None]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        train_data_path,
        target_size=(img_rows, img_cols),
        batch_size=32,
        class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
        test_data_path,
        target_size=(img_rows, img_cols),
        batch_size=32,
        class_mode='binary')

model.fit_generator(
        train_generator,
        steps_per_epoch=2000,
        epochs=50,
        validation_data=validation_generator,
        validation_steps=800)

Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.
Epoch 1/50
