In [1]:
import numpy as np
import pandas as pd 

from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.optimizers import SGD
from keras.utils import np_utils

np.random.seed(1337)  # for reproducibility

Using TensorFlow backend.


In [38]:
# The data, shuffled and split between train and test sets:
df = pd.read_csv("../data/mnist/train.csv")
data = df.as_matrix()

# Split data into training set and validation set
y_train = data[:, 0]
X_train = data[:, 1:].astype(np.float32)

data = pd.read_csv("../data/mnist/test.csv").as_matrix()
X_test = data[:, :].astype(np.float32)

print('X_train, Y_train shape:', X_train.shape, y_train.shape)
print('X_test shape:', X_test.shape)

('X_train, Y_train shape:', (42000, 784), (42000,))
('X_test shape:', (28000, 784))


In [39]:
num_classes = 10

# Convert class vectors to binary class matrices.
Y_train = np_utils.to_categorical(y_train, num_classes)

# Preprocessing 
standard = StandardScaler()
X_train = standard.fit_transform(X_train)
X_test = standard.fit_transform(X_test)

X_train /= 255
X_test /= 255

print('X_train, Y_train shape:', X_train.shape, y_train.shape)
print (X_train.shape[1:])

('X_train, Y_train shape:', (42000, 784), (42000,))
(784,)


In [40]:
X_train = X_train.reshape(-1,28,28,1)
X_test = X_test.reshape(-1,28,28,1)
print('X_train, Y_train shape:', X_train.shape, y_train.shape)

('X_train, Y_train shape:', (42000, 28, 28, 1), (42000,))


In [41]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_9 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dropout_10 (Dropout)         (None, 128)               0         
__________

In [42]:
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=128, epochs=12, verbose=1, validation_split=0.1)

Train on 37800 samples, validate on 4200 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
  896/37800 [..............................] - ETA: 140s - loss: 0.2450 - acc: 0.9241

KeyboardInterrupt: 

In [None]:
pred = model.predict_classes(X_test)
numpy.savetxt('output.txt',pred,delimiter=',')