# Invasive Species

In [1]:
path = 'data/'

## Create training and validation sets

In [None]:
%cd data/train
%mkdir 0
%mkdir 1

In [2]:
import csv
import math
import os
import numpy as np
from glob import glob

In [None]:
g = glob('*.jpg')
with open('../train_labels.csv') as csvfile:
    labels = csv.reader(csvfile, delimiter='\n')
    for row in labels:
        columns = row[0].split(',')
        if columns[0] != 'name':
            if columns[1] == '0':
                os.rename(columns[0] + '.jpg', '0/{}.jpg'.format(columns[0]))
            else:
                os.rename(columns[0] + '.jpg', '1/{}.jpg'.format(columns[0]))

In [None]:
%mkdir ../valid
%mkdir ../valid/0
%mkdir ../valid/1
%cd 0

In [None]:
g = glob('*.jpg')
np.random.shuffle(g)
for i in range(math.floor(len(g) / 10)):
    os.rename(g[i], '../../valid/0/' + g[i])

In [None]:
%cd ../1

In [None]:
g = glob('*.jpg')
np.random.shuffle(g)
for i in range(math.floor(len(g) / 10)):
    os.rename(g[i], '../../valid/1/' + g[i])

In [None]:
%cd ../../..

## Set up VGG model

In [3]:
batch_size = 32

In [4]:
from vgg16 import Vgg16

Using Theano backend.
Using cuDNN version 6021 on context None
Preallocating 3633/4037 Mb (0.900000) on cuda1
Mapped name None to device cuda1: GeForce GTX 970 (0000:02:00.0)


In [5]:
vgg = Vgg16()

In [None]:
batches = vgg.get_batches(path + 'train', batch_size=batch_size)
valid_batches = vgg.get_batches(path + 'valid', batch_size=batch_size)

In [6]:
vgg.finetune()

In [7]:
vgg.model.load_weights(path + 'finetune1.h5')

In [None]:
vgg.fit(batches, math.floor(2067/batch_size), epochs=20, 
        validation_batches=valid_batches, 
        validation_steps=math.floor(228/batch_size))

In [None]:
vgg.model.save_weights(path + 'finetune1.h5')

### Finetune earlier layers

In [None]:
for layer in vgg.model.layers[12:]:
    layer.trainable = True

In [None]:
vgg.fit(batches, math.floor(2067/batch_size), epochs=20, 
        validation_batches=valid_batches, 
        validation_steps=math.floor(228/batch_size))

In [None]:
for layer in vgg.model.layers[10:]:
    layer.trainable = True

In [None]:
vgg.fit(batches, math.floor(2067/batch_size), epochs=5, 
        validation_batches=valid_batches, 
        validation_steps=math.floor(228/batch_size))

In [None]:
for layer in vgg.model.layers[8:]:
    layer.trainable = True

In [None]:
vgg.fit(batches, math.floor(2067/batch_size), epochs=10, 
        validation_batches=valid_batches, 
        validation_steps=math.floor(228/batch_size))

In [None]:
for layer in vgg.model.layers[4:]:
    layer.trainable = True

In [None]:
vgg.fit(batches, math.floor(2067/batch_size), epochs=10, 
        validation_batches=valid_batches, 
        validation_steps=math.floor(228/batch_size))

In [None]:
for layer in vgg.model.layers:
    layer.trainable = True

In [None]:
vgg.fit(batches, math.floor(2067/batch_size), epochs=10, 
        validation_batches=valid_batches, 
        validation_steps=math.floor(228/batch_size))

In [None]:
vgg.model.save_weights(path + 'finetune1.h5')

## Fix overfitting

In [8]:
from keras.preprocessing.image import ImageDataGenerator

In [13]:
gen = ImageDataGenerator(horizontal_flip=True, zoom_range=0.2, shear_range=0.2, 
                         width_shift_range=0.2, 
                         height_shift_range=0.2,)
batches = vgg.get_batches(path + 'train', batch_size=batch_size, gen=gen)
valid_batches = vgg.get_batches(path + 'valid', batch_size=batch_size, gen=gen)

Found 2067 images belonging to 2 classes.
Found 228 images belonging to 2 classes.


In [None]:
vgg.fit(batches, math.floor(2067/batch_size), epochs=30,
        validation_batches=valid_batches, 
        validation_steps=math.floor(228/batch_size))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
