# Introduction to Convolutional Neural Networks

**Goal** : This notebook explains how to implement a simple convolutional neural network model. 

## MLP with Keras

In [11]:
import numpy as np
import pandas as pd
import os
from PIL import Image

### Load Data

In [2]:
#Load the data

labels = pd.read_csv("../data/BeeSpotter/train_labels.csv")
submissionFormat = pd.read_csv("../data/BeeSpotter/SubmissionFormat.csv")

In [3]:
labels.head()

Unnamed: 0,id,genus
0,520,1
1,3800,1
2,3289,1
3,2695,1
4,4922,1


In [4]:
submissionFormat.head()

Unnamed: 0,id,genus
0,2783,0.5
1,2175,0.5
2,4517,0.5
3,2831,0.5
4,3556,0.5


In [5]:
print submissionFormat.shape, labels.shape

(992, 2) (3969, 2)


##### Util to get an image. This will also help us resize the image after we read it

In [6]:
def get_image(filename, folder, size):
    filepath = os.path.join("../data/BeeSpotter/images", folder, filename + ".jpg")
    image = Image.open(filepath)
    image.thumbnail((size, size), Image.ANTIALIAS)
    return (np.array(image, dtype=np.int32)[:,:,:3]).swapaxes(0,2)

##### Util to load all the images. This will call the `get_image` function defined above

In [8]:
def load_images(filenames, folder, size):
    features = np.empty([len(filenames), 3, size, size])
    for counter, file in enumerate(filenames):
#        print file, counter
        features[counter] = get_image(str(file), folder, size)
    
    return features

##### Load the train and test datasets

In [9]:
train_X = load_images(labels.id, "train", 32)
test_X = load_images(submissionFormat.id, "test", 32)

In [10]:
print train_X.shape, test_X.shape

(3969, 3, 32, 32) (992, 3, 32, 32)


##### Load `keras` package for CNN

In [12]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD

##### First Model: Just one convolution layer

In [19]:
model = Sequential()
model.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 

model.add(Flatten())

model.add(Dense(32*34*34, 1))
model.add(Activation('sigmoid'))

In [20]:
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)
model.fit(train_X, np.array(labels.genus), batch_size=32, nb_epoch=10)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9


<keras.callbacks.History at 0x12888bd90>

In [17]:
model = Sequential()
model.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 

model.add(Flatten())

model.add(Dense(32*34*34, 256))
model.add(Activation('relu'))
model.add(Dense(256, 1))
model.add(Activation('sigmoid'))

In [18]:
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)
model.fit(train_X, np.array(labels.genus), batch_size=32, nb_epoch=10)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
 896/3969 [=====>........................] - ETA: 4s - loss: 0.0000

KeyboardInterrupt: 

In [None]:


model.add(Activation('relu'))
model.add(Convolution2D(32, 32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 32, 3, 3, border_mode='full')) 
model.add(Activation('relu'))
model.add(Convolution2D(64, 64, 3, 3)) 
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(64*8*8, 256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256, 1))
model.add(Activation('sigmoid'))

In [4]:
(X_train, y_train) = cifar10_load_data()

In [5]:
print X_train.shape
print y_train.shape

(10000, 3, 32, 32)
(10000, 1)


In [6]:
X_train_reshaped = X_train.reshape(10000,3072)

In [7]:
X_train_reshaped.shape

(10000, 3072)

In [8]:
y_train.shape

(10000, 1)

### Data pre-processing

Create 2 categories of data:
* 5000 cats
* 5000 others

In [9]:
df = pd.DataFrame(X_train_reshaped, dtype=float)
df = pd.concat([df, pd.DataFrame(y_train)], axis=1)

df = df.reindex(np.random.permutation(df.index))
df.columns = range(0,3073)
cats = df[df[3072] == 3].copy()
cats[3072] = 1
dogs = df[df[3072] != 5]
dogs = dogs[:5000].copy()
dogs[3072] = 0
df = cats.append(dogs)
print df.shape

data = np.array(df.ix[:, :3071])
labels = np.array(df[3072])

data.shape
X_train = data
Y_train = np_utils.to_categorical(labels)
print X_train.shape
print Y_train.shape

(10000, 3073)
(10000, 3072)
(10000, 2)


In [10]:
# for faster convergence
dims = 64
# dims = 1500

Create a sequential model & add layers

In [11]:
model = Sequential()
model.add(Dense(X_train.shape[1], dims , init='uniform'))
model.add(Activation('relu'))

model.add(Dense(dims , dims, init='uniform'))
model.add(Activation('relu'))

model.add(Dense(dims, Y_train.shape[1], init='uniform'))
model.add(Activation('softmax'))

In [12]:
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)

In [13]:
%timeit model.compile(loss='mean_squared_error', optimizer=sgd)

1 loops, best of 3: 2.65 s per loop


In [14]:
%timeit model.fit(X_train, Y_train, nb_epoch=2, show_accuracy=True)

Epoch 0
Epoch 1
Epoch 0
Epoch 1
Epoch 0
Epoch 1
Epoch 0
Epoch 1
1 loops, best of 3: 2.46 s per loop


# Convolutional Neural Network

In [15]:
import os
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten, Reshape
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.datasets import cifar10
from keras.utils import np_utils

In [16]:
from keras.layers.convolutional import Convolution2D, MaxPooling2D

### fetch data

In [17]:
X_train, y_train = cifar10_load_data()

#### Each image has 3 channel (RGB) & is of size 32x32

In [18]:
X_train.shape

(10000, 3, 32, 32)

In [19]:
y_train[1]

array([3], dtype=uint32)

### Data pre-processing

Create 2 categories of data:
* 5000 cats
* 5000 others

In [20]:
z = zip(X_train, y_train)

In [21]:
def get_cats(x):
    if x[1][0] == 3:
        return True
    else:
        return False
    
def get_others(x):
    if x[1][0] != 3:
        return True
    else:
        return False
    

In [22]:
catz = filter(get_cats, z)
x_cat, y_cat = zip(*catz)

In [23]:
otherz = filter(get_others, z)
otherz = otherz[:5000]
x_other, y_other = zip(*otherz)

In [24]:
x_cats = np.array(x_cat)
x_others = np.array(x_other)

X_train = np.concatenate([x_cats,x_others])

In [25]:
X_train.shape

(10000, 3, 32, 32)

In [26]:
y_cats = np.array(y_cat)
y_others = np.array(y_other)

y_train = np.concatenate([y_cats,y_others])

In [27]:
y_train.shape

(10000, 1)

In [28]:
y_train[y_train == 3] = 1
y_train[y_train != 1] = 0

In [29]:
y_train

array([[1],
       [1],
       [1],
       ..., 
       [0],
       [0],
       [0]], dtype=uint32)

In [30]:
y_train.shape

(10000, 1)

#### Reshape y_train to represent 2 categories - Cats & Others

In [31]:
Y_train = np_utils.to_categorical(y_train, 2)
Y_train.shape

(10000, 2)

#### Create a sequential model, add convolutional layers with activation functions & regularizations

In [None]:
#This works - Just one layer of Convolution
model = Sequential()
model.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 

model.add(Flatten())
model.add(Dense(32*34*34, 256))

model.add(Dense(256, 2))
model.add(Activation('softmax'))

X_train = X_train.astype("float32")
X_train /= 255

sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)

model.fit(X_train, Y_train, batch_size=32, nb_epoch=1, show_accuracy=True)

In [55]:
#This works - Just one layer of Convolution and max pooling
model = Sequential()
model.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 
model.add(MaxPooling2D(poolsize=(2, 2)))

model.add(Flatten())
model.add(Dense(32*17*17, 256))

model.add(Dense(256, 2))
model.add(Activation('softmax'))

X_train = X_train.astype("float32")
X_train /= 255

sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)

model.fit(X_train, Y_train, batch_size=32, nb_epoch=1, show_accuracy=True)

Epoch 0


<keras.callbacks.History at 0x1173a9d10>

In [57]:
#This works - Just one layer of Convolution but no full padding
model = Sequential()
model.add(Convolution2D(32, 3, 3, 3)) 
#model.add(MaxPooling2D(poolsize=(2, 2)))

model.add(Flatten())
model.add(Dense(32*30*30, 256))

model.add(Dense(256, 2))
model.add(Activation('softmax'))

X_train = X_train.astype("float32")
X_train /= 255

sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)

model.fit(X_train, Y_train, batch_size=32, nb_epoch=1, show_accuracy=True)

Epoch 0


<keras.callbacks.History at 0x190f03190>

In [54]:
#This works - all layers once
model = Sequential()
model.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 
#model.add(Convolution2D(32, 3, 3, 3)) 
#model.add(Activation('relu'))
#model.add(Convolution2D(32, 32, 3, 3))
#model.add(Activation('relu'))
#model.add(MaxPooling2D(poolsize=(2, 2)))
#model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(32*34*34, 256))
#model.add(Activation('relu'))
#model.add(Dropout(0.5))

model.add(Dense(256, 2))
model.add(Activation('softmax'))


X_train = X_train.astype("float32")
X_train /= 255

sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)

model.fit(X_train, Y_train, batch_size=32, nb_epoch=1, show_accuracy=True)

Epoch 0


<keras.callbacks.History at 0x132e08850>

In [36]:
#This works - all layers once
model = Sequential()
model.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 
model.add(Activation('relu'))
model.add(Convolution2D(32, 32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(32*16*16, 256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256, 2))
model.add(Activation('softmax'))


X_train = X_train.astype("float32")
X_train /= 255

sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)

model.fit(X_train, Y_train, batch_size=32, nb_epoch=1, show_accuracy=True)

Epoch 0


<keras.callbacks.History at 0x1891fc050>

In [None]:
# print X_train.shape, Y_train.shape

In [None]:
#This is blank cell

In [None]:

model = Sequential()
model.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 
model.add(Activation('relu'))
model.add(Convolution2D(32, 32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 32, 3, 3, border_mode='full')) 
model.add(Activation('relu'))
model.add(Convolution2D(64, 64, 3, 3)) 
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(64*8*8, 256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256, 2))
model.add(Activation('softmax'))






In [None]:
X_train = X_train.astype("float32")
X_train /= 255

In [None]:

sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)

In [None]:
model.fit(X_train, Y_train, batch_size=32, nb_epoch=1, show_accuracy=True)

### CNN with Dataset 2

In [None]:
from __future__ import absolute_import
import os
import sys
import numpy as np
import scipy
import six.moves.cPickle
from six.moves import range

In [None]:
with open("../data/rajni_cigar", "rb") as rf:
    l_data = six.moves.cPickle.loads(rf.read())

In [None]:
l_data.shape

In [None]:
unzipped = zip(*l_data)

In [None]:
data = np.array(unzipped[0])

In [None]:
labels = np.array(unzipped[1])

In [None]:
data.shape

In [None]:
labels.shape

In [None]:
Y_LABELS = np_utils.to_categorical(labels, 2)

In [None]:
Y_LABELS.shape

In [None]:
model = Sequential()
model.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 
model.add(Activation('relu'))
model.add(Convolution2D(32, 32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 32, 3, 3, border_mode='full')) 
model.add(Activation('relu'))
model.add(Convolution2D(64, 64, 3, 3)) 
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(64*8*8, 256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256, 2))
model.add(Activation('softmax'))


In [None]:
X_train = data
Y_train = Y_LABELS

In [None]:
X_train = X_train.astype("float32")
X_train /= 255

In [None]:

sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)

In [None]:
model.fit(X_train, Y_train, batch_size=32, nb_epoch=1, show_accuracy=True)

In [None]:
df = pd.DataFrame([labels, predict]).T
df.columns = ['label', 'prediction']
df

### Data Augmentation

<img style="float: left;" src="img/2_data_augmentation.png" height="520" width="820">
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
[Source](http://benanne.github.io/2015/03/17/plankton.html)

# Momentum

Problem with Stochastic Gradient Descent are Valleys (local minima). Bounce up and down the walls and don‘t descent the slope. What's to our rescue? **Momentum**. Nesterov Momentum (NAG) 

<img style="float: left;" src="img/3_ momentum.png" height="520" width="620">