# Linear model in Keras from scratch


In [1]:
#Verify current directory
%pwd

u'/media/eric/SSD 850 EVO 500Go/fastai/deeplearning1/nbs/lesson1'

In [2]:
#Create references to important directories we will use over and over
import os, sys
current_dir = os.getcwd()
sys.path.insert(1, os.path.join(sys.path[0], '..'))
LESSON_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data/redux'

In [3]:
#import modules
from utils import *
from vgg16 import Vgg16

#Instantiate plotting tool
#In Jupyter notebooks, you will need to run this command before doing any plotting
%matplotlib inline

 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GTX 1080 Ti (CNMeM is disabled, cuDNN 5103)
Using Theano backend.


# Introduction

We are going to learn a linear model trained using the 1,000 predictions from the imagenet model for each image as input, and the dog/cat label as target.

In [4]:
%matplotlib inline
from __future__ import division,print_function
import os, json
from glob import glob
import numpy as np
import scipy
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
import utils; reload(utils)
from utils import plots, get_batches, plot_confusion_matrix, get_data

In [5]:
from numpy.random import random, permutation
from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom

import keras
from keras import backend as K
from keras.utils.data_utils import get_file
from keras.models import Sequential
from keras.layers import Input
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD, RMSprop
from keras.preprocessing import image

# Linear models in Keras

Each of the Dense() layers is just a *linear model*, followed by a *simple activation function*.

A linear mode is simply a model where each row is calculated as sum(row * weights), where weights needs to be learnt from the data, and will be the same for every row.
Let's create some data that we know is linearly related:


In [6]:
x = random((30,2))
y = np.dot(x, [2., 3.]) + 1

In [7]:
x[:5]

array([[ 0.7843,  0.0574],
       [ 0.2492,  0.7196],
       [ 0.5988,  0.8857],
       [ 0.7511,  0.7068],
       [ 0.2394,  0.1745]])

In [8]:
y[:5]

array([ 2.7408,  3.6572,  4.8549,  4.6226,  2.0025])

We use Keras to create a simple linear model (*Dense()* -with no activation- in Keras) and optimize it using SGD to minimize mean squared error (mse):

In [9]:
lm = Sequential([ Dense(1, input_shape=(2,)) ])
lm.compile(optimizer=SGD(lr=0.1), loss='mse')

Now that the lm model learnt its internal weights, we can evaluate the loss function (MSE):

In [10]:
lm.get_weights()

[array([[-0.9306],
        [-0.5012]], dtype=float32), array([ 0.], dtype=float32)]

In [11]:
lm.evaluate(x, y, verbose=0)

18.766643524169922

Let's start training the model

In [12]:
lm.fit(x, y, nb_epoch=5, batch_size = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f353072bf90>

In [13]:
# The loss function improves
lm.evaluate(x, y, verbose=0)

0.015833029523491859

In [14]:
# And the weights improve as well, tending to expected values (2. , 3., +1.)
lm.get_weights()

[array([[ 1.5724],
        [ 2.8654]], dtype=float32), array([ 1.233], dtype=float32)]

Another round of training and evaluation

In [15]:
lm.fit(x, y, nb_epoch=5, batch_size = 1)
lm.get_weights()

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[array([[ 1.9215],
        [ 2.9917]], dtype=float32), array([ 1.0541], dtype=float32)]

In [16]:
lm.evaluate(x, y, verbose=0)

0.00058968900702893734

In [17]:
lm.fit(x, y, nb_epoch=5, batch_size = 1)
lm.get_weights()

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[array([[ 1.983 ],
        [ 2.9984]], dtype=float32), array([ 1.0077], dtype=float32)]

lm.evaluate(x, y, verbose=0)

In [18]:
lm.fit(x, y, nb_epoch=5, batch_size = 1)
lm.get_weights()

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[array([[ 1.9965],
        [ 3.    ]], dtype=float32), array([ 1.0018], dtype=float32)]

In [19]:
lm.evaluate(x, y, verbose=0)

8.0341595776189934e-07

In [20]:
lm.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
dense_1 (Dense)                  (None, 1)             3           dense_input_1[0][0]              
Total params: 3
Trainable params: 3
Non-trainable params: 0
____________________________________________________________________________________________________


# Train linear model on predictions

Now that we saw how Kears operates a *Linear Model*, we can use a *Dense()* layer to convert the 1,000 predictions -as input- given by ImageNet *VGG 16* model into a probability of Dog vs. Cat -as output-, learning from the Kaggle data.

## Training the model

We start with basic config steps and we copy a small amount of our data into a 'sample' directory, with the exact same structure as our 'train' directory.
It's *always* a good idea in Machine Learning to run intial testing on smaller dataset for time sake.

In [22]:
#path = "data/redux/sample/"
path = "data/redux/"
model_path = path + 'models/'
if not os.path.exists(model_path): os.mkdir(model_path)

We process as many images in a batch as our GFX card allows (GTX 1080 Ti here) through trial and error.

In [23]:
batch_size = 64

We need to start with our VGG 16 model since we are using its predictions and features.

In [24]:
from vgg16 import Vgg16
vgg = Vgg16()
model = vgg.model

Our approach is:

    1. Get the true labels for every image
    2. Get the 1,000 imagenet category predictions for every image.
    3. Feed those predictions as input to a simple linear model.
    
We start by grabbing training and validation batches.

In [25]:
# Use batch_size = 1 since doing preprocessing on the CPU
batches = get_batches(path + 'train', shuffle = False, batch_size=1)
val_batches = get_batches(path + 'valid', shuffle = False, batch_size=1)

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


Loading and resizing images every time we want to use them isn't necessary, instead we should save the processed arrays.
By far the fastest way to save and load numpy arrays is *bcolz* which also compresses the arrays, saving disk space.

In [None]:
import bcolz
def save_array(fname, arr): c=bcolz.carray(arr, rootdir=fname, mode='w'); c.flush()
def load_array(fname): return bcolz.open(fname)[:]

We use a simple function from /utils.py that joins the arrays from all the batches so we can grab the training and validation data:

In [21]:
??get_data

In [26]:
val_data = get_batches(path + 'valid')

Found 2000 images belonging to 2 classes.


In [27]:
trn_data = get_batches(path + 'train')

Found 23000 images belonging to 2 classes.


In [28]:
trn_data.shape

AttributeError: 'DirectoryIterator' object has no attribute 'shape'

In [None]:
val_data.shape

We can load our training and validation data later without recalculating them.

In [None]:
save_array(model_path + 'train_data.bc', trn_data)
save_array(model_path + 'valid_data.bc', val_data)

In [None]:
def onehot(x): return np.array(OneHotEncoder().fit_transform(x.reshape(-1,1)).todense())

In [None]:
val_classes = val_batches.classes
trn_classes = batches.classes
val_labels = onehot(val_classes)
trn_labels = onehot(trn_classes)

In [None]:
trn_labels.shape

In [None]:
trn_classes[:4]

In [None]:
trn_labels[:4]

In [None]:
trn_features = model.predict(trn_data, batch_size=batch_size)
val_features = model.predict(val_data, batch_size=batch_size)