<a href="https://colab.research.google.com/github/f00-/project-euler/blob/master/digit-recognizer/digit_recognizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Goal
The goal in this competition is to take an image of a handwritten single digit, and determine what that digit is.

For every ImageId in the test set, you should predict the correct label.

#Metric
This competition is evaluated on the **categorization accuracy of your predictions** (*the percentage of images you get correct*).

# Submission File Format
The file should contain a header and have the following format:
```
ImageId,Label
1,0
2,0
3,0
etc.
```

In [0]:
# install libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

In [3]:
train_data_url = "https://github.com/f00-/kaggle-competitions/blob/master/digit-recognizer/train.csv?raw=true"
test_data_url = "https://github.com/f00-/kaggle-competitions/blob/master/digit-recognizer/test.csv?raw=true"

# load data (MNIST)
train_data = pd.read_csv(train_data_url)
test_data = pd.read_csv(test_data_url)

train_data.head(10)
# train_data['label']

# train models (LeNet/Random Forest) using cross validation
# https://github.com/f00-/mnist-lenet-keras
# https://github.com/f00-/sklearn-mnist-randomforest

#clf = RandomForestClassifier(n_estimators=10, n_jobs=2)
#clf.fit(train_data.drop('label', 1), train_data['label'])

# todo cross validation, LeNet

# make a prediction
#y_pred = clf.predict(X_test)
#print("Making predictions on test test: {}".format(y_pred))

# evalutate the prediction
#print("Precision: \t {}".format(metrics.precision_score(y_test, y_pred, average='micro')))
#print("Recall: \t {}".format(metrics.recall_score(y_test, y_pred, average='micro')))
#print("F1 score: \t {}".format(metrics.f1_score(y_test, y_pred, average='micro')))
#print("Mean accuracy: \t {}".format(clf.score(X_test, y_test)))

# todo generate submission file

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [0]:
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dense

class LeNet:
    @staticmethod
    def build(width, height, depth, classes, weightsPath=None):
        # initialize the model
        model = Sequential()

        # first set of CONV => RELU => POOL
        model.add(Convolution2D(20, 5, 5, border_mode="same",
            input_shape=(depth, height, width)))
        model.add(Activation("relu"))
        model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

        # second set of CONV => RELU => POOL
        model.add(Convolution2D(50, 5, 5, border_mode="same"))
        model.add(Activation("relu"))
        model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

        # set of FC => RELU layers
        model.add(Flatten())
        model.add(Dense(500))
        model.add(Activation("relu"))

        # softmax classifier
        model.add(Dense(classes))
        model.add(Activation("softmax"))

        # if weightsPath is specified load the weights
        if weightsPath is not None:
            model.load_weights(weightsPath)

        return model

In [0]:
#https://github.com/f00-/mnist-lenet-keras/blob/master/weights/lenet_weights.hdf5
weightsPath = "weights/lenet_weights.hdf5"

# initialize the optimizer and model
print("[INFO] compiling model...")
opt = SGD(lr=0.01)
model = LeNet.build(width=28, height=28, depth=1, classes=10,
	weightsPath=weightsPath)
model.compile(loss="categorical_crossentropy", optimizer=opt,
	metrics=["accuracy"])

# if no weights specified train the model
if weightsPath is None:
	print("[INFO] training...")
	model.fit(trainData, trainLabels, batch_size=128, nb_epoch=20,
		verbose=1)

	# show the accuracy on the testing set
	print("[INFO] evaluating...")
	(loss, accuracy) = model.evaluate(testData, testLabels,
		batch_size=128, verbose=1)
	print("[INFO] accuracy: {:.2f}%".format(accuracy * 100))

	print("[INFO] dumping weights to file...")
	model.save_weights(weightsPath, overwrite=True)