# National Data Science Bowl - Plankton

## Action Plan

* Make overfitting model
* Data augmentation
* Batch normalization
* Dropout
* Ensembling

## Imports and Directories

In [1]:
#Create references to important directories we will use over and over
import os, sys
current_dir = os.getcwd()
SCRIPTS_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data'

In [2]:
#import modules
from utils import *
%matplotlib inline

Using gpu device 0: GeForce GTX 1070 (CNMeM is enabled with initial size: 85.0% of memory, cuDNN 5105)
Using Theano backend.


In [3]:
%cd $DATA_HOME_DIR

#Set path to sample/ path if desired
path = DATA_HOME_DIR #'/sample/'
test_path = DATA_HOME_DIR + '/test/' #We use all the test data
results_path=DATA_HOME_DIR + '/results/'
train_path=path + '/train/'
valid_path=path + '/valid/'

/home/nathan/git/planktonDataScienceBowl/scripts/data


## Initial overfitting model

### Constants

In [4]:
img_rows, img_cols = 128, 128
in_shape = (img_rows, img_cols)
batch_size = 128
nb_classes = 121

### Set up batches

In [5]:
train_batches = get_batches(train_path, batch_size=batch_size, target_size=in_shape, color_mode="grayscale")
val_batches   = get_batches(valid_path, batch_size=batch_size, target_size=in_shape, color_mode="grayscale")

Found 27184 images belonging to 121 classes.
Found 3152 images belonging to 121 classes.


### Set up Model

In [6]:
model = Sequential()

#### Input layer

In [7]:
model.add(Convolution2D(64, 3, 3, border_mode='same', input_shape=(1, img_rows, img_cols)))
model.add(Activation('relu'))

#### Convolution layers

In [8]:
def convBlock(model, layers, filters):
    for i in range(layers):
        model.add(Convolution2D(filters, 3, 3, border_mode='same', activation='relu'))
    model.add(MaxPooling2D((2, 2)))

In [9]:
convBlock(model, 1, 64)
convBlock(model, 2, 128)
convBlock(model, 3, 256)
convBlock(model, 3, 512)

In [10]:
def FCBlock(model):
    model.add(Dense(4096, activation='relu'))

In [11]:
model.add(Flatten())
FCBlock(model)
FCBlock(model)
model.add(Dense(nb_classes, activation='softmax'))

In [12]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"])

In [13]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_1 (Convolution2D)  (None, 64, 128, 128)  640         convolution2d_input_1[0][0]      
____________________________________________________________________________________________________
activation_1 (Activation)        (None, 64, 128, 128)  0           convolution2d_1[0][0]            
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)  (None, 64, 128, 128)  36928       activation_1[0][0]               
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)    (None, 64, 64, 64)    0           convolution2d_2[0][0]            
___________________________________________________________________________________________

In [None]:
model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample,
                    nb_epoch=1,
                    validation_data=val_batches,
                    nb_val_samples=val_batches.nb_sample,
                    verbose=True)

Epoch 1/1
  640/27184 [..............................] - ETA: 264s - loss: 13.1362 - acc: 0.0516

In [None]:
model.optimizer.lr=0.1
model.fit_generator(train_batches, samples_per_epoch=27184,
                    nb_epoch=1,
                    validation_data=val_batches, nb_val_samples=3152,
                    verbose=True)

Epoch 1/1

In [None]:
model.optimizer.lr=0.01
model.fit_generator(train_batches, samples_per_epoch=27184,
                    nb_epoch=4,
                    validation_data=val_batches, nb_val_samples=3152,
                    verbose=True)

## Data Augmentation

In [None]:
image_generator = image.ImageDataGenerator(
                rotation_range=360,
                width_shift_range=0.02,
                height_shift_range=0.02,
                shear_range=0.05,
                zoom_range=0.05)

## Batch Normalization 

## Dropout 

## Ensembling 