# Title

** About the Data **

We are using the MNIST data obtained from the [digit recognizer competition on Kaggle](https://www.kaggle.com/c/digit-recognizer/data). MNIST was produced by [Yann Lecun et al.](http://yann.lecun.com/exdb/mnist/) and their page has a great list of benchmark results. The street view house numbers (SVHN) was obtained from [stanfords website](http://ufldl.stanford.edu/housenumbers/). 

In [116]:
import pandas as pd
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
from ipywidgets import interact
from keras.layers import Input, Dense, Dropout,Conv2D,MaxPooling2D,Flatten,GlobalMaxPooling2D
from keras.models import Model
from keras.utils import to_categorical

## Extract, Transform, Load (ETL)

In [20]:
mnistTrainingData = pd.read_csv("MNIST_train_28x28.csv")
mnistTrainingData.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
mnistTrain_y = mnistTrainingData.values[:,0]
mnistTrain_x = mnistTrainingData.values[:,1:]

In [60]:
def dispMNIST(imSel = 0):
    plt.title("Digit class: {0}".format(mnistTrain_y[imSel]))
    plt.imshow(mnistTrain_x[imSel].reshape(28,28))
    plt.show()

interact(dispMNIST,imSel=(0,100))

<function __main__.dispMNIST>

In [164]:
imSel = 0
svhn = sio.loadmat("SVHN_train_32x32.mat")
svhn_x = svhn["X"]
svhn_x = np.moveaxis(svhn_x,-1,0)
svhn_y = svhn["y"]
svhn_y = (svhn_y-1).reshape(svhn_y.shape[0])

In [137]:
def dispSVHN(imSel = 0):
    plt.title("Digit class: {0}".format(svhn_y[imSel][0]))
    plt.imshow(svhn_x[imSel,:,:,:])
    plt.show()
    
interact(dispSVHN,imSel=(0,100))

<function __main__.dispSVHN>

## Modeling

In [124]:
def makeModel(inputSize):
    inputs = Input(shape=inputSize,name="input")
    x = Conv2D(32, (3, 3), input_shape=(28, 28,1), padding='same', activation='relu')(inputs)
    x = MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid')(x)
    x = Conv2D(32, (3, 3), input_shape=(28, 28,1), padding='same', activation='relu')(x)
    x = GlobalMaxPooling2D()(x)
    x = Dense(100,activation='relu')(x)
    out = Dense(10,activation='softmax', name="output")(x)

    model = Model(inputs=inputs, outputs=out)
    model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=['acc'])
    
    return model

model = makeModel((28,28,1,))
model.summary()
hist = model.fit(mnistTrain_x.reshape(mnistTrain_x.shape[0],28,28,1), to_categorical(mnistTrain_y,10),batch_size=100,epochs=10)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_30 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 14, 14, 32)        9248      
_________________________________________________________________
global_max_pooling2d_6 (Glob (None, 32)                0         
_________________________________________________________________
dense_11 (Dense)             (None, 100)               3300      
_________________________________________________________________
output (Dense)               (None, 10)                1010      
Total para

In [166]:
model = makeModel((32,32,3,))
hist = model.fit(svhn_x, to_categorical(svhn_y,10),batch_size=100,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [121]:
res = model.predict(mnistTrain_x[0].reshape(1,28,28,1))
np.argmax(res)

1

In [149]:
to_categorical(mnistTrain_y,10).shape

(42000, 10)

In [146]:
.shape

(73257,)

In [151]:
svhn_y.shape

(73257,)