Now lets implement an Artificial Neural Network in python with a beautifully simple library called [Keras](http://keras.io)!

So far in this series we have been implementing the low level functions ourselves but lets look at what some higher level libraries can do! Keras keeps all that crap in the background and lets us focus on crafting the actual architecture using pre made layers and training functions! 

Another advantage is that it can run ontop of both Tensorflow and Theano! I havent actually tested the Tensorflow backend however the theano backend work's great for running networks on your GPU! This give absolutely spectacular speed improvements over using the CPU alone.

Check out how to use Theano with the GPU [here](http://deeplearning.net/software/theano/tutorial/using_gpu.html). I use the CUDA backend with CUDNN because I have an NVidia card, it also works spectacularly well!

Ok, lets implement a simple ANN in Keras! We will be using another library called [kerosene](https://github.com/dribnet/kerosene) to allow us easy access to the [Iris](https://archive.ics.uci.edu/ml/datasets/Iris) dataset, you dont need to down load the data set, Kerosene will do that for us. Install kerosene using ```pip install kerosene```.

In [None]:
'''
an artificial neural network to tackle the iris data set
'''

from __future__ import print_function
import numpy as np
np.random.seed(0)

from kerosene.datasets import iris
from keras.models import Sequential # import model
from keras.layers import Dense, Dropout, Activation, Flatten # import core layers
from keras.utils import np_utils # import helper funcs
from keras.regularizers import WeightRegularizer, l2 # import l2 regularizer
from keras.layers.advanced_activations import SReLU

batch_size = 3
nb_classes = 3
nb_epoch = 300


# the data, shuffled and split between train and test sets
(X_train, y_train), = iris.load_data()

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)

_, input_size = X_train.shape

model = Sequential()


model.add(Dense(16, input_dim=input_size))#, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001)))
model.add(SReLU())
#model.add(Dropout(0.25))

model.add(Dense(12))#, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001)))
model.add(SReLU())
model.add(Dropout(0.25))

model.add(Dense(16))#, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001)))
model.add(SReLU())
model.add(Dropout(0.25))

model.add(Dense(9))#, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001)))
model.add(SReLU())
#model.add(Dropout(0.25))

model.add(Dense(8))#, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001)))
model.add(SReLU())
#model.add(Dropout(0.25))

model.add(Dense(7))#, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001)))
model.add(SReLU())
#model.add(Dropout(0.25))

model.add(Dense(6))#, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001)))
model.add(SReLU())
#model.add(Dropout(0.25))

#model.add(Dense(5))#, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001)))
#model.add(SReLU())
##model.add(Dropout(0.25))

#model.add(Dense(4))#, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001)))
#model.add(SReLU())
##model.add(Dropout(0.25))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', #loss='sparse_categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, validation_split=0.2)

from keras.utils.visualize_util import plot
plot(model, to_file='model_iris.png', show_layer_names=False, show_shapes=True)

How easy is that! We just implemented an Artificial Neural Network, trained on our GPU achieving a high accuracy aswell!

Let's have a shot at a Convolutional Neural Network tackling the good old MNIST data set! We dont need to use Kerosene for this one as its already included in the native Keras datasets.

In [None]:
'''
a convolutional neural network to tackle the mnist data set
'''

from __future__ import print_function
import numpy as np
np.random.seed(0)

from keras.datasets import mnist # mnist # import data set
from keras.models import Sequential # import model
from keras.layers import Dense, Dropout, Activation, Flatten # import core layers
from keras.layers import Convolution2D, MaxPooling2D # import convolutional layers
from keras.utils import np_utils # import helper funcs
from keras.regularizers import WeightRegularizer, l2 # import l2 regularizer
from keras.layers.noise import GaussianNoise
from keras.layers.advanced_activations import SReLU

batch_size = 512
nb_classes = 10
nb_epoch = 20

# input image dimensions
img_chan, img_rows, img_cols = 1, 28, 28
# number of convolutional filters to use
nb_filters = 25
# size of pooling area for max pooling
nb_pool = 2
# convolution kernel size
nb_conv = 3

# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(X_train.shape[0], img_chan, img_rows, img_cols)
X_test = X_test.reshape(X_test.shape[0], img_chan, img_rows, img_cols)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

model = Sequential()

# noise input
percent_noise = 0.1
noise = (1.0/255) * percent_noise
model.add(GaussianNoise(noise, input_shape=(img_chan, img_rows, img_cols)))

model.add(Convolution2D(nb_filters, nb_conv, nb_conv, border_mode='valid'))
model.add(SReLU())

model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
model.add(SReLU())

model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool, nb_pool)))
model.add(Dropout(0.15))

model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
model.add(SReLU())

model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
model.add(SReLU())

model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool, nb_pool)))
model.add(Dropout(0.15))

model.add(Flatten())

model.add(Dense(256))#, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001)))
model.add(SReLU())
#model.add(Dropout(0.25))

model.add(Dense(256))#, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001)))
model.add(SReLU())
#model.add(Dropout(0.25))

model.add(Dense(256))#, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001)))
model.add(SReLU())
#model.add(Dropout(0.25))

model.add(Dense(256))#, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001)))
model.add(SReLU())
#model.add(Dropout(0.25))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', #loss='sparse_categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
          verbose=2, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)
print(model.summary())
print('Test score:', score[0])
print('Test accuracy:', score[1])

from keras.utils.visualize_util import plot
plot(model, to_file='model.png', show_layer_names=False, show_shapes=True)

Now there are a couple of new concepts here. Firstly can you see the input layer? its actually a Gaussian Noise layer. This will slightly distort the input signals to give us a variance on the dataset. This has the effect of mitigating over fitting by increasing the effective sample size!

Secondly can you see the convolutional layers? we are using a Kernel size of 3x3 and 25 filters per convolutional layer.

We then add a Maxpool layer to reduce the amount of space the data takes up with only a minimal effect on the actual information contained there in. This is the beauty of convolutional networks, we can take massive inputs and reduce them down to much smaller arrays, which are still very rich in information.

After this we flatten the output and feed it into a standard Artificial Neural Network.

Another interesting choice here is the use of S-shaped Rectified Linear Units. These are a special type of activation function that build upon the original idea of a Rectified Linear unit in the sense that they don't saturate like standard Sigmoid neurons.

How about we try a dataset that consists of colour images? Let's build another netowork but this time use the cifar-10 dataset.

In [None]:
'''
a convolutional neural network to tackle the CIFAR-10 data set
'''

from __future__ import print_function
import numpy as np
np.random.seed(0)

from keras.datasets import cifar10 # import data set
from keras.models import Sequential # import model
from keras.layers import Dense, Dropout, Activation, Flatten # import core layers
from keras.layers import Convolution2D, MaxPooling2D # import convolutional layers
from keras.utils import np_utils # import helper funcs
from keras.layers.noise import GaussianNoise
from keras.layers.advanced_activations import SReLU

batch_size = 256
nb_classes = 10
nb_epoch = 20

# input image dimensions
img_chan, img_rows, img_cols = 3, 32, 32
# number of convolutional filters to use
nb_filters = 18
# size of pooling area for max pooling
nb_pool = 2
# convolution kernel size
nb_conv = 3

# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

X_train = X_train.reshape(X_train.shape[0], img_chan, img_rows, img_cols)
X_test = X_test.reshape(X_test.shape[0], img_chan, img_rows, img_cols)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

model = Sequential()

# noise input
percent_noise = 0.1
noise = (1.0/255) * percent_noise
model.add(GaussianNoise(noise, input_shape=(img_chan, img_rows, img_cols)))

model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
model.add(SReLU())

model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
model.add(SReLU())

model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
model.add(SReLU())

model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
model.add(SReLU())

model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
model.add(Dropout(0.15))

model.add(Flatten())

#99.51 0.0, 0.0
model.add(Dense(1024))
model.add(SReLU())
#model.add(Dropout(0.25))

model.add(Dense(512))
model.add(SReLU())
#model.add(Dropout(0.25))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', 
              optimizer='adadelta',
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
          verbose=2, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)
print(model.summary())
print('Test score:', score[0])
print('Test accuracy:', score[1])

from keras.utils.visualize_util import plot
plot(model, to_file='model.png', show_layer_names=False, show_shapes=True)

So simple!

Now you can see how easy it is to actually use keras, i highly recommend that you get acquainted with it!