# National Data Science Bowl - Plankton

## Action Plan

* Make overfitting model
* Data augmentation
* Batch normalization
* Dropout
* Ensembling

## Imports and Directories

In [1]:
#Create references to important directories we will use over and over
import os, sys
current_dir = os.getcwd()
SCRIPTS_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data'

In [2]:
#import modules
from utils import *
%matplotlib inline

Using gpu device 0: GeForce GTX 1070 (CNMeM is enabled with initial size: 90.0% of memory, cuDNN 5105)
Using Theano backend.


In [3]:
%cd $DATA_HOME_DIR

#Set path to sample/ path if desired
path = DATA_HOME_DIR #'/sample/'
test_path = DATA_HOME_DIR + '/test/' #We use all the test data
results_path=DATA_HOME_DIR + '/results/'
train_path=path + '/train/'
valid_path=path + '/valid/'

/home/nathan/git/planktonDataScienceBowl/scripts/data


## VGG Like Model

#### Constants

In [4]:
img_rows, img_cols = 128, 128
in_shape = (img_rows, img_cols)
batch_size = 64
nb_classes = 121

#### Batches

In [5]:
gen = image.ImageDataGenerator(rescale=1. / 255)
# gen = image.ImageDataGenerator()

In [6]:
train_batches = get_batches(train_path, batch_size=batch_size, 
                            target_size=in_shape, color_mode="grayscale", 
                            gen=gen)
val_batches   = get_batches(valid_path, batch_size=batch_size, 
                            target_size=in_shape, color_mode="grayscale", 
                            gen=gen)

Found 27184 images belonging to 121 classes.
Found 3152 images belonging to 121 classes.


#### Model

In [7]:
def get_model():
    model = Sequential([
        Convolution2D(64,3,3, border_mode='same', activation='relu', input_shape=(1, img_rows, img_cols)),
        Convolution2D(64,3,3, border_mode='same', activation='relu'),
        MaxPooling2D(),
        Convolution2D(128,3,3, border_mode='same', activation='relu'),
        MaxPooling2D(),
        Convolution2D(256,3,3, border_mode='same', activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(2048, activation='relu'),
        Dense(2048, activation='relu'),
        Dense(2048, activation='relu'),
        Dense(nb_classes, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [8]:
model = get_model()

In [None]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_1 (Convolution2D)  (None, 64, 128, 128)  640         convolution2d_input_1[0][0]      
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)  (None, 64, 128, 128)  36928       convolution2d_1[0][0]            
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)    (None, 64, 64, 64)    0           convolution2d_2[0][0]            
____________________________________________________________________________________________________
convolution2d_3 (Convolution2D)  (None, 128, 64, 64)   73856       maxpooling2d_1[0][0]             
___________________________________________________________________________________________

#### Train

In [None]:
model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                    nb_epoch=5,
                    validation_data=val_batches,
                    nb_val_samples=val_batches.nb_sample,
                    verbose=True)

Epoch 1/5
Epoch 2/5

In [None]:
model.optimizer.lr=0.1
model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                    nb_epoch=1,
                    validation_data=val_batches, nb_val_samples=val_batches.nb_sample,
                    verbose=True)

In [None]:
model.optimizer.lr=0.01
model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                    nb_epoch=4,
                    validation_data=val_batches, nb_val_samples=val_batches.nb_sample,
                    verbose=True)

## Data Augmentation

#### Constants

In [4]:
img_rows, img_cols = 128, 128
in_shape = (img_rows, img_cols)
batch_size = 64
nb_classes = 121

#### Batches

In [7]:
gen = image.ImageDataGenerator(
                rotation_range=360,
                width_shift_range=0.03,
                height_shift_range=0.03,
                shear_range=0.10,
                zoom_range=0.10,
                rescale=1. / 255,
                horizontal_flip)

In [8]:
train_batches = get_batches(train_path, batch_size=batch_size, 
                            target_size=in_shape, color_mode="grayscale", 
                            gen=gen)
val_batches   = get_batches(valid_path, batch_size=batch_size, 
                            target_size=in_shape, color_mode="grayscale", 
                            gen=gen)

Found 27184 images belonging to 121 classes.
Found 3152 images belonging to 121 classes.


#### Model

In [9]:
def get_model():
    model = Sequential([
        Convolution2D(64,3,3, border_mode='same', activation='relu', input_shape=(1, img_rows, img_cols)),
        Convolution2D(64,3,3, border_mode='same', activation='relu'),
        MaxPooling2D(),
        Convolution2D(128,3,3, border_mode='same', activation='relu'),
        MaxPooling2D(),
        Convolution2D(256,3,3, border_mode='same', activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(2048, activation='relu'),
        Dense(2048, activation='relu'),
        Dense(2048, activation='relu'),
        Dense(nb_classes, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [10]:
model = get_model()

In [11]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_1 (Convolution2D)  (None, 64, 128, 128)  640         convolution2d_input_1[0][0]      
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)  (None, 64, 128, 128)  36928       convolution2d_1[0][0]            
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)    (None, 64, 64, 64)    0           convolution2d_2[0][0]            
____________________________________________________________________________________________________
convolution2d_3 (Convolution2D)  (None, 128, 64, 64)   73856       maxpooling2d_1[0][0]             
___________________________________________________________________________________________

#### Train

In [12]:
model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                    nb_epoch=100,
                    validation_data=val_batches,
                    nb_val_samples=val_batches.nb_sample,
                    verbose=True)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f870fc03390>

In [None]:
model.optimizer.lr=0.1
model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                    nb_epoch=1,
                    validation_data=val_batches, nb_val_samples=val_batches.nb_sample,
                    verbose=True)

In [None]:
model.optimizer.lr=0.01
model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                    nb_epoch=4,
                    validation_data=val_batches, nb_val_samples=val_batches.nb_sample,
                    verbose=True)

## Data Augmentation

In [None]:
# image_generator = image.ImageDataGenerator(
#                 rotation_range=360,
#                 width_shift_range=0.02,
#                 height_shift_range=0.02,
#                 shear_range=0.05,
#                 zoom_range=0.05)

## Batch Normalization 

## Dropout 

## Ensembling 