In [1]:
# 1. Import libraries and modules
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras import regularizers, optimizers
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
from keras.preprocessing import image

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

from keras.callbacks import EarlyStopping

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [9]:
#importing files

#train includes the pixel values & label
inputs = pd.read_csv('./train.csv')

#test includes only pixel values, from which we want to predict the labels
target = pd.read_csv('./test.csv')

In [10]:
#defining our Data & Labels
# our training  X  &   Y
labels = inputs.label
data = inputs.drop(labels = 'label',axis=1)

In [11]:
#random state for reproduciblity
X_train, X_test, y_train, y_test = train_test_split(data,labels,test_size=(0.15),random_state=42)

### Preprocessing

In [21]:
#reshaping into usable CNN dimensions
X_target = target.values.reshape(-1,28,28,1)
X_train = X_train.values.reshape(-1,28,28,1)
X_test = X_test.values.reshape(-1,28,28,1)

#confirming typing
X_target = X_target.astype('float32')
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

#NNs only take values between 0-1
#normalizing data
X_target /= 255
X_train /= 255
X_test /= 255

AttributeError: 'numpy.ndarray' object has no attribute 'values'

In [22]:
#producing categorical values for training data
y_train_cat = np_utils.to_categorical(y_train, 10)
y_test_cat = np_utils.to_categorical(y_test, 10)

In [23]:
#To increase the amount of variablity and thus resiliance of final model
#this generates augmented data that shifts&changes the X_train slightly

datagen = image.ImageDataGenerator(
        rotation_range=20,  # Randomly rotate images in degrees
        zoom_range = 0.2, # Randomly zooms into image 
        width_shift_range=0.2,  # randomly shift images horizontally
        height_shift_range=0.2)  # randomly shift images vertically

datagen.fit(X_train)

### Model Generation

In [24]:
adam = optimizers.Adam(lr=0.001)


model = Sequential()

model.add(Convolution2D(filters = 16, 
                        padding = 'same',
                        kernel_size = 5,
                       activation = 'relu', 
                        input_shape = (28,28,1)))
model.add(Convolution2D(filters = 16, 
                        kernel_size = 5,
                        padding = 'same',
                       activation = 'relu',
                        input_shape = (28,28,1)))
model.add(Convolution2D(filters = 16, 
                        kernel_size = 5,
                        padding = 'same',
                       activation = 'relu',
                        input_shape = (28,28,1)))
model.add(MaxPooling2D( pool_size= (2,2)))
model.add(Dropout(0.4))

model.add(Convolution2D(filters = 8, 
                        kernel_size = 3,
                        padding = 'same',
                       activation = 'relu'))
model.add(Convolution2D(filters = 8, 
                        kernel_size = 3,
                        padding = 'same',
                       activation = 'relu'))
model.add(Convolution2D(filters = 8, 
                        kernel_size = 3,
                        padding = 'same',
                       activation = 'relu'))
model.add(MaxPooling2D( pool_size= (2,2)))
model.add(Flatten())
model.add(Dropout(0.4))

model.add(Dense(64, 
                activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(64, 
                activation='relu'))

In [25]:
model.add(Dense(10, 
                activation='softmax'))

In [26]:
model.compile(loss = 'categorical_crossentropy',
              metrics=['accuracy'],
             optimizer = adam)

early_stop = EarlyStopping(monitor='val_loss', min_delta=0.01, patience=10, mode='auto')
callbacks_list = [early_stop]

In [27]:
history = model.fit(X_train, y_train_cat, validation_data=(X_test, y_test_cat), 
                    epochs=100, 
                    batch_size=128, 
                    callbacks=callbacks_list, 
                    verbose =1)

Train on 35700 samples, validate on 6300 samples
Epoch 1/100
 1920/35700 [>.............................] - ETA: 7:26 - loss: 2.2902 - acc: 0.1167

KeyboardInterrupt: 

In [None]:
model.fit

In [None]:
train_loss = history.history['loss']
test_loss = history.history['val_loss']
plt.plot(train_loss, label='Training loss')
plt.plot(test_loss, label='Testing loss')
#plt.xticks(history.epoch)
plt.title('Loss')
plt.grid()
plt.legend();

In [None]:
plt.figure()
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
#plt.xticks(history.epoch)
plt.xlim()
plt.legend(['train', 'test'], loc='best')
plt.show();

In [None]:
confusion_matrix(np.argmax(y_test_cat,axis=1),model.predict_classes(X_test))

In [None]:
accuracy_score(np.argmax(y_test_cat,axis=1),model.predict_classes(X_test))

In [None]:
val_predicts = model.predict_classes(X_val)

In [None]:
df= pd.DataFrame(val_predicts)
df['ImageId'] = df.index+1
df.columns=['Label','ImageId']
df = df[['ImageId','Label']]

In [None]:
df.to_csv('Digit Recog Predicts_mix_2', index=False)

In [None]:
kay = pd.read_csv('Digit Recog Predicts_mix_2')
kay

In [68]:
#|0.9235| loss: 1.0872 - val_loss: 0.4303 @ adam = .01, 2convo - d - hid(128) - d - out
#|bad   | loss: bad - val_loss: bad @ adam = .01, 2convo - d - hid(128)(l2=0.001) - d - hid2(128) - out

#|0.9235| loss: 1.0872 - val_loss: 0.4303 @ adam = .01, 2convo - hid(128) - d - hid2(128) - d - out
#|0.9556| loss: 0.5472 - val_loss: 0.1765 @ adam = .01, 2convo - hid(128) - d - hid2(128) - d - hid3(128) - out

#|0.9812| loss: 0.1270 - val_loss: 0.0645 @ 2convo - hid(128) - d(0.5) - hid(128) - d(0.5) - hid2(128) - out
#|0.9814| loss: 0.1216 - val_loss: 0.0775 @ 2convo - hid(256) - d(0.5) - hid(128) - d(0.5) - hid2(128) - out
#|0.9828| loss: 0.1230 - val_loss: 0.0627 @ 2convo - hid(256) - d(0.5) - hid(256) - d(0.5) - hid2(128) - out(5epoch)

#|0.9816| loss: 0.1051 - val_loss: 0.0680 @ 2convo - hid(256) - d(0.4) - hid(256) - d(0.4) - hid2(128) - out(5epoch)
#|0.9816| loss: 0.1005 - val_loss: 0.0630 @ 2convo - hid(256) - d(0.3) - hid(256) - d(0.4) - hid2(128) - out(4epoch)
#|0.9843| loss: 0.0751 - val_loss: 0.0627 @ 2convo - hid(256) - d(0.25) - hid(256) - d(0.3) - hid2(128) - out(7ish)

In [None]:
#|0.9878| loss: 0.1508 - acc: 0.9586 - val_loss: 0.0489 - val_acc: 0.9852 
#convo(.25)convo(.25) - hid(528) - d(0.25) - hid(256) - d(0.25) - hid2(128)- out
#~70 epochs

In [None]:
#loss: 0.1034 - acc: 0.9701 - val_loss: 0.0440 - val_acc: 0.9881
#convo(.25)convo(.25) - hid(508) - d(0.2) - hid(256) - d(0.2) - hid2(128)- out
#

In [None]:
#loss: 0.0847 - acc: 0.9752 - val_loss: 0.0527 - val_acc: 0.9840
#convo[5x5](k32)(.25)-convo[3x3](k16)(.25) - hid(528) - d(0.2) - hid(256) - d(0.2) - hid2(128)- out

In [None]:
#97.8
#convo[5x5](k32)(same)(.25)-convo[5x5](k16)(.25)-convo[3x3](same)(k8)(.25) - 
#hid(528) - d(0.2) - hid(256) - d(0.2) - hid2(128)- out

In [None]:
#96
#convo[5x5](k32)(.25)-convo[5x5](k16)(.25)-convo[3x3](k8)(.25) - 
#hid(528) - d(0.2) - hid(256) - d(0.2) - hid2(128)- out

In [None]:
#.990: 5533 & .15

#.992 : 553 & .2
# : 553 & .25

#.990 : 533 & .15

#.990 : 533 & .25

In [None]:
0.99226190476190479
#553 & .2

adam = optimizers.Adam(lr=0.005)
l2 = regularizers.l2(0.1)

# 5. Model Architecture
model = Sequential()

model.add(Convolution2D(filters = 32, 
                        kernel_size = 5,
                       activation = 'relu', 
                        input_shape = (28,28,1)))
model.add(Convolution2D(filters = 32, 
                        kernel_size = 5,
                        padding = 'same',
                       activation = 'relu',
                        input_shape = (28,28,1)))
model.add(MaxPooling2D( pool_size= (2,2)))
model.add(Dropout(0.2))
model.add(Convolution2D(filters = 8, 
                        kernel_size = 3,
                        padding = 'same',
                       activation = 'relu'))
model.add(Convolution2D(filters = 8, 
                        kernel_size = 3,
                        padding = 'same',
                       activation = 'relu'))
model.add(MaxPooling2D( pool_size= (2,2)))
model.add(Flatten())
model.add(Dropout(0.2))

model.add(Dense(256, 
                activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(128, 
                activation='relu'))

In [None]:
0.99011904761904757
#533 & .25

adam = optimizers.Adam(lr=0.005)
l2 = regularizers.l2(0.1)

# 5. Model Architecture
model = Sequential()

model.add(Convolution2D(filters = 32, 
                        kernel_size = 5,
                       activation = 'relu', 
                        input_shape = (28,28,1)))
model.add(Convolution2D(filters = 16, 
                        kernel_size = 3,
                        padding = 'same',
                       activation = 'relu',
                        input_shape = (28,28,1)))
model.add(MaxPooling2D( pool_size= (2,2)))
model.add(Dropout(0.25))
model.add(Convolution2D(filters = 8, 
                        kernel_size = 3,
                        padding = 'same',
                       activation = 'relu'))
model.add(Convolution2D(filters = 8, 
                        kernel_size = 3,
                        padding = 'same',
                       activation = 'relu'))
model.add(MaxPooling2D( pool_size= (2,2)))
model.add(Flatten())
model.add(Dropout(0.25))

model.add(Dense(256, 
                activation='relu'))
model.add(Dropout(0.25))

model.add(Dense(128, 
                activation='relu'))