In [None]:
require 'torch'
require 'nn'
require 'nnx'
require 'optim'
require 'image'
require 'dataset-mnist'
require 'pl'
require 'paths'

In [None]:
function loadKaggleTrainDataset()
    local file = "data/train.csv"

    local lines = {}
    for line in io.lines(file) do
        table.insert(lines, line)
    end
    
    local dataset = {}
    for idx = 1, #lines do
        local pixels = lines[idx]:split(",")
        local label = torch.Tensor(1):zero()
        local image = torch.Tensor(28, 28):zero()

        for row = 1, 28 do
            for column = 1, 28 do
                image[row][column] = tonumber(pixels[(row - 1) * 28 + column + 1])
            end
        end

        label[1] = pixels[1] + 1
        table.insert(dataset, {[1] = image, [2] = label})
        xlua.progress(idx, #lines)
    end

    return dataset
end

function loadKaggleTestDataset()
    local file = "data/test.csv"
    
    local lines = {}
    for line in io.lines(file) do
        table.insert(lines, line)
    end
    table.remove(lines, 1) -- remove headers

    local dataset = {}
    for idx = 1, #lines do
        local pixels = lines[idx]:split(",")
        local image = torch.Tensor(28, 28):zero()

        for row = 1, 28 do
            for column = 1, 28 do
                image[row][column] = tonumber(pixels[(row - 1) * 28 + column])
            end
        end

        table.insert(dataset, {[1] = image})
        xlua.progress(idx, #lines)
    end

    return dataset
end

In [None]:
-- Setting Hyperparameters
batchSize = 10
learningRate = 0.05
momentum = 0
maxIter = 3

In [None]:
-- Fix Seed
torch.manualSeed(1)

In [None]:
-- Setting worker
torch.setnumthreads(2)
print('<torch> set nb of threads to ' .. torch.getnumthreads())

In [None]:
-- Use Floats, For SGD
torch.setdefaulttensortype('torch.FloatTensor')

In [None]:
-- Define model to Train
-- On the 10-class Classification Problem
-- Geometry : width and height of input images

classes = {'0', '1','2','3','4','5','6','7','8','9'}
geometry = {28, 28}

In [None]:
-- Convolutional Network
model = nn.Sequential()

-- stage 1 : mean suppresion -> filter bank -> squashing -> max pooling
model:add(nn.SpatialConvolutionMM(1, 32, 5, 5))
model:add(nn.Sigmoid())
model:add(nn.SpatialMaxPooling(3, 3, 3, 3))

-- stage 2 : mean suppresion -> filter bank -> squashing -> max pooling
model:add(nn.SpatialConvolutionMM(32, 64, 5, 5))
model:add(nn.Sigmoid())
model:add(nn.SpatialMaxPooling(2, 2, 2, 2))

-- stage 3 : standard 2-layer MLP:
model:add(nn.Reshape(64*2*2))
model:add(nn.Linear(64*2*2, 200))
model:add(nn.Sigmoid())
model:add(nn.Linear(200, #classes))

In [None]:
-- Retrieve parameters and gradients
parameters, gradParameters = model:getParameters()

In [None]:
print('<mnist> using model:')
print(model)

In [None]:
-- Loss function : Negative log-likeligood
model:add(nn.LogSoftMax())
criterion = nn.ClassNLLCriterion()

In [None]:
nbTrainingPatches = 42000
nbTestingPatches = 28000

In [None]:
trainData = loadKaggleTrainDataset()
--trainData:normalizeGlobal(mean, std)

In [None]:
trainData

In [None]:
testData = loadKaggleTestDataset()
--testData:normalizeGlobal(mean, std)

In [None]:
testData

In [None]:
confusion = optim.ConfusionMatrix(classes)

In [None]:
-- Train function
function train(dataset)
    -- epoch tracker
    epoch = epoch or 1

    -- local vars
    local time = sys.clock()

    -- do one epoch
    print('<trainer> on training set:')
    print("<trainer> online epoch # " .. epoch .. ' [batchSize = ' .. batchSize .. ']')
    for t = 1,nbTrainingPatches,batchSize do
        -- create mini batch
        local inputs = torch.Tensor(batchSize,1,geometry[1],geometry[2])
        local targets = torch.Tensor(batchSize)
        local k = 1
        for i = t,math.min(t+batchSize-1,nbTrainingPatches) do
            -- load new sample
            inputs[k] = dataset[i][1]:clone() -- copy data
            targets[k] = dataset[i][2]:clone():squeeze() -- copy label
            k = k + 1
        end

        -- create closure to evaluate f(X) and df/dX
        local feval = function(x)
            -- just in case:
            collectgarbage()

            -- get new parameters
            if x ~= parameters then
                parameters:copy(x)
            end

            -- reset gradients
            gradParameters:zero()

            -- evaluate function for complete mini batch
            local outputs = model:forward(inputs)
            local f = criterion:forward(outputs, targets)

            -- estimate df/dW
            local df_do = criterion:backward(outputs, targets)
            model:backward(inputs, df_do)

            -- update confusion
            for i = 1,batchSize do
            confusion:add(outputs[i], targets[i])
            end

            -- return f and df/dX
            return f,gradParameters
        end

        -- Perform SGD step:
        sgdState = sgdState or {
            learningRate = learningRate,
            momentum = momentum,
            learningRateDecay = 5e-7
        }
        optim.sgd(feval, parameters, sgdState)

        -- disp progress
        xlua.progress(t, nbTrainingPatches)
    end
   
    -- time taken
    time = sys.clock() - time
    time = time / nbTrainingPatches
    print("<trainer> time to learn 1 sample = " .. (time*1000) .. 'ms')

    -- print confusion matrix
    print(confusion)
    confusion:zero()

    -- next epoch
    epoch = epoch + 1
end

In [None]:
-- Test function
function test(dataset)

    os.execute('rm -f submission.csv; touch submission.csv')
    results = io.open('submission.csv', "a")
    results:write('ImageId,Label\n')
    
    -- local vars
    local time = sys.clock()

    -- test over given dataset
    print('<trainer> on testing Set:')
    for t = 1,nbTestingPatches,batchSize do
        -- disp progress
        xlua.progress(t, nbTestingPatches)

        -- create mini batch
        local inputs = torch.Tensor(batchSize,1,geometry[1],geometry[2])
        
        local k = 1
        for i = t,math.min(t+batchSize-1,nbTestingPatches) do
            inputs[k] = dataset[i][1]:clone() -- copy data
            k = k + 1
        end

        -- predict
        local predicted = model:forward(inputs)

        local _, prediction = predicted:max(2)
        for i = 1, prediction:size(1) do
            results:write('' .. (t - 1 + i) .. ',' .. classes[prediction[i][1]] .. '\n')
        end
    end

    results:close()

    print(confusion)
    confusion:zero()
end

In [None]:
while true do
    train(trainData)
    test(testData)
end

Rewrite submission.csv after every epoch.
About 50 Epochs are enough.