In [1]:
#Verify current directory
%pwd

u'/media/eric/SSD 850 EVO 500Go/fastai/deeplearning1/nbs/lesson1'

In [2]:
#Create references to important directories we will use over and over
import os, sys
current_dir = os.getcwd()
sys.path.insert(1, os.path.join(sys.path[0], '..'))
LESSON_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data/redux'

In [3]:
#import modules
from utils import *
from vgg16 import Vgg16

#Instantiate plotting tool
#In Jupyter notebooks, you will need to run this command before doing any plotting
%matplotlib inline

 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GTX 1080 Ti (CNMeM is disabled, cuDNN 5103)
Using Theano backend.


# Introduction

We are going to learn a linear model trained using the 1,000 predictions from the imagenet model for each image as input, and the dog/cat label as target.

In [4]:
%matplotlib inline
from __future__ import division,print_function
import os, json
from glob import glob
import numpy as np
import scipy
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
import utils; reload(utils)
from utils import plots, get_batches, plot_confusion_matrix, get_data

In [5]:
from numpy.random import random, permutation
from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom

import keras
from keras import backend as K
from keras.utils.data_utils import get_file
from keras.models import Sequential
from keras.layers import Input
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD, RMSprop
from keras.preprocessing import image

# Linear models in Keras

Each of the Dense() layers is just a *linear model*, followed by a *simple activation function*.

A linear mode is simply a model where each row is calculated as sum(row * weights), where weights needs to be learnt from the data, and will be the same for every row.
Let's create some data that we know is linearly related:


In [6]:
x = random((30,2))
y = np.dot(x, [2., 3.]) + 1

In [7]:
x[:5]

array([[ 0.6422,  0.3483],
       [ 0.4475,  0.2888],
       [ 0.0426,  0.2675],
       [ 0.0678,  0.374 ],
       [ 0.7363,  0.6479]])

In [8]:
y[:5]

array([ 3.3293,  2.7614,  1.8876,  2.2577,  4.4164])

We use Keras to create a simple linear model (*Dense()* -with no activation- in Keras) and optimize it using SGD to minimize mean squared error (mse):

In [9]:
lm = Sequential([ Dense(1, input_shape=(2,)) ])
lm.compile(optimizer=SGD(lr=0.1), loss='mse')

Now that the lm model learnt its internal weights, we can evaluate the loss function (MSE):

In [10]:
lm.get_weights()

[array([[-0.2801],
        [-0.6526]], dtype=float32), array([ 0.], dtype=float32)]

In [11]:
lm.evaluate(x, y, verbose=0)

15.883236885070801

Let's start training the model

In [12]:
lm.fit(x, y, nb_epoch=5, batch_size = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7ff6e9d81890>

In [13]:
# The loss function improves
lm.evaluate(x, y, verbose=0)

0.020241307094693184

In [14]:
# And the weights improve as well, tending to expected values (2. , 3., +1.)
lm.get_weights()

[array([[ 2.011 ],
        [ 2.5999]], dtype=float32), array([ 1.2705], dtype=float32)]

Another round of training and evaluation

In [15]:
lm.fit(x, y, nb_epoch=5, batch_size = 1)
lm.get_weights()

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[array([[ 2.0149],
        [ 2.9315]], dtype=float32), array([ 1.022], dtype=float32)]

In [16]:
lm.evaluate(x, y, verbose=0)

0.00030592232360504568

In [17]:
lm.fit(x, y, nb_epoch=5, batch_size = 1)
lm.get_weights()

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[array([[ 2.0063],
        [ 2.9876]], dtype=float32), array([ 1.003], dtype=float32)]

lm.evaluate(x, y, verbose=0)

In [18]:
lm.fit(x, y, nb_epoch=5, batch_size = 1)
lm.get_weights()

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[array([[ 2.0013],
        [ 2.997 ]], dtype=float32), array([ 0.9999], dtype=float32)]

In [19]:
lm.evaluate(x, y, verbose=0)

1.3109588508086745e-06

In [20]:
lm.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
dense_1 (Dense)                  (None, 1)             3           dense_input_1[0][0]              
Total params: 3
Trainable params: 3
Non-trainable params: 0
____________________________________________________________________________________________________


# Train linear model on predictions

Now that we saw how Kears operates a *Linear Model*, we can use a *Dense()* layer to convert the 1,000 predictions -as input- given by ImageNet *VGG 16* model into a probability of Dog vs. Cat -as output-, learning from the Kaggle data.

## Training the model

We start with basic config steps and we copy a small amount of our data into a 'sample' directory, with the exact same structure as our 'train' directory.
It's *always* a good idea in Machine Learning to run intial testing on smaller dataset for time sake.

In [21]:
#path = "data/redux/sample/"
path = "data/redux/"
model_path = path + 'models/'
if not os.path.exists(model_path): os.mkdir(model_path)

We process as many images in a batch as our GFX card allows (GTX 1080 Ti here) through trial and error.

In [22]:
batch_size = 64

We need to start with our VGG 16 model since we are using its predictions and features.

In [34]:
from vgg16 import Vgg16
vgg = Vgg16()
model1 = vgg.model

Our approach is:

    1. Get the true labels for every image
    2. Get the 1,000 imagenet category predictions for every image.
    3. Feed those predictions as input to a simple linear model.
    
We start by grabbing training and validation batches.

In [35]:
# Use batch_size = 1 since doing preprocessing on the CPU
batches = get_batches(path + 'train', shuffle = False, batch_size=1)
val_batches = get_batches(path + 'valid', shuffle = False, batch_size=1)

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [36]:
def onehot(x): return np.array(OneHotEncoder().fit_transform(x.reshape(-1,1)).todense())

In [37]:
def Linear1(batches):
    model1 = Sequential([ Dense(2, activation='softmax', input_shape=(1000,)) ])
    model1.compile(optimizer=RMSprop(lr=0.1), loss='categorical_crossentropy', metrics=['accuracy'])
    model1.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches, 
                     nb_val_samples=val_batches.nb_sample)
    model1.optimizer.lr = 0.01
    return model1

In [38]:
model1.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_2 (Lambda)                (None, 3, 224, 224)   0           lambda_input_2[0][0]             
____________________________________________________________________________________________________
zeropadding2d_14 (ZeroPadding2D) (None, 3, 226, 226)   0           lambda_2[0][0]                   
____________________________________________________________________________________________________
convolution2d_16 (Convolution2D) (None, 64, 224, 224)  1792        zeropadding2d_14[0][0]           
____________________________________________________________________________________________________
zeropadding2d_15 (ZeroPadding2D) (None, 64, 226, 226)  0           convolution2d_16[0][0]           
___________________________________________________________________________________________

In [26]:
def conv1(batches):
    model = Sequential([
            BatchNormalization(axis=1, input_shape=(3,224,224)),
            Convolution2D(32,3,3, activation='relu'),
            BatchNormalization(axis=1),
            MaxPooling2D((3,3)),
            Convolution2D(64,3,3, activation='relu'),
            BatchNormalization(axis=1),
            MaxPooling2D((3,3)),
            Flatten(),
            Dense(200, activation='relu'),
            BatchNormalization(),
            Dense(10, activation='softmax')
        ])

    model.compile(Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches, 
                     nb_val_samples=val_batches.nb_sample)
    model.optimizer.lr = 0.001
    model.fit_generator(batches, batches.nb_sample, nb_epoch=4, validation_data=val_batches, 
                     nb_val_samples=val_batches.nb_sample)
    return model

In [28]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_1 (Lambda)                (None, 3, 224, 224)   0           lambda_input_1[0][0]             
____________________________________________________________________________________________________
zeropadding2d_1 (ZeroPadding2D)  (None, 3, 226, 226)   0           lambda_1[0][0]                   
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 64, 224, 224)  1792        zeropadding2d_1[0][0]            
____________________________________________________________________________________________________
zeropadding2d_2 (ZeroPadding2D)  (None, 64, 226, 226)  0           convolution2d_1[0][0]            
___________________________________________________________________________________________

In [27]:
model = conv1(batches)

Epoch 1/2


ValueError: Error when checking model target: expected dense_6 to have shape (None, 10) but got array with shape (1, 2)

In [None]:
val_classes = val_batches.classes
trn_classes = batches.classes
val_labels = onehot(val_classes)
trn_labels = onehot(trn_classes)

In [None]:
trn_labels.shape

In [None]:
trn_classes[:4]

In [None]:
trn_labels[:4]

In [None]:
trn_features = model.predict(trn_data, batch_size=batch_size)
val_features = model.predict(val_data, batch_size=batch_size)