### Reference.
Torch-Demos : [digit classifier](https://github.com/torch/demos/blob/master/train-a-digit-classifier/train-on-mnist.lua)

In [1]:
require 'torch'
require 'nn'
require 'optim'
require 'image'
require 'dataset-mnist'

In [2]:
-- Setting Hyperparameters
batchSize = 50
learningRate = 0.05
momentum = 0
numIter = 20

-- Fix Seed
torch.manualSeed(1)

-- Setting worker
torch.setnumthreads(2)
print('<torch> set nb of threads to ' .. torch.getnumthreads())

-- Use Floats
torch.setdefaulttensortype('torch.FloatTensor')

<torch> set nb of threads to 1	


In [3]:
-- Define model to Train
-- On the 10-class Classification Problem
-- Geometry : width and height of input images

classes = {'1', '2' ,'3' ,'4' ,'5' ,'6' ,'7' ,'8' ,'9', '10'}
geometry = {32, 32}

In [4]:
numTrain = 60000
numTest = 10000

-- Create training set and normalize
trainData = mnist.loadTrainSet(numTrain, geometry)
trainData:normalizeGlobal(mean, std)

-- Create test set and normalize
testData = mnist.loadTestSet(numTest, geometry)
testData:normalizeGlobal(mean, std)

<mnist> done	


<mnist> done	


In [5]:
print(trainData)
print(testData)

{
  data : FloatTensor - size: 60000x1x32x32
  normalize : function: 0x0ce96930
  labels : ByteTensor - size: 60000
  normalizeGlobal : function: 0x0ce968f8
  size : function: 0x0ce969d8
}
{
  data : FloatTensor - size: 10000x1x32x32
  normalize : function: 0x0ce977c0
  labels : ByteTensor - size: 10000
  normalizeGlobal : function: 0x0ce97788
  size : function: 0x0ce97868
}


In [6]:
-- 2 Layer Neural Network
model = nn.Sequential()

model:add(nn.Reshape(32*32))
model:add(nn.Linear(32*32, 2048))
model:add(nn.Tanh())
model:add(nn.Linear(2048, #classes))
model:add(nn.LogSoftMax())

criterion = nn.ClassNLLCriterion()

In [7]:
-- Retrieve parameters and gradients
parameters, gradParameters = model:getParameters()

confusion = optim.ConfusionMatrix(classes)
trainHistory = {}
testHistory = {}

In [8]:
-- Train function
function train(dataset)
    -- epoch tracker
    epoch = epoch or 1

    -- local vars
    local time = sys.clock()

    -- do one epoch
    print('<trainer> on training set:')
    print("<trainer> online epoch # " .. epoch .. ' [batchSize = ' .. batchSize .. ']')
    for t = 1, numTrain, batchSize do
        -- create mini batch
        local inputs = torch.Tensor(batchSize,1,geometry[1],geometry[2])
        local targets = torch.Tensor(batchSize)
        local k = 1
        for i = t, math.min(t+batchSize-1, numTrain) do
            -- load new sample
            local sample = dataset[i]
            local input = sample[1]:clone()
            local _,target = sample[2]:clone():max(1)
            target = target:squeeze()
            inputs[k] = input
            targets[k] = target
            k = k + 1
        end

        -- create closure to evaluate f(X) and df/dX
        local feval = function(x)
            -- just in case:
            collectgarbage()

            -- get new parameters
            if x ~= parameters then
                parameters:copy(x)
            end

            -- reset gradients
            gradParameters:zero()

            -- evaluate function for complete mini batch
            local outputs = model:forward(inputs)
            local f = criterion:forward(outputs, targets)

            -- estimate df/dW
            local df_do = criterion:backward(outputs, targets)
            model:backward(inputs, df_do)

            -- update confusion
            for i = 1,batchSize do
                confusion:add(outputs[i], targets[i])
            end

            -- return f and df/dX
            return f,gradParameters
        end

        -- Perform SGD step:
        sgdState = sgdState or {
            learningRate = learningRate,
            momentum = momentum,
            learningRateDecay = 5e-7
        }
        optim.sgd(feval, parameters, sgdState)

        -- disp progress
        xlua.progress(t, numTrain)
    end
   
    -- time taken
    time = sys.clock() - time
    time = time / numTrain
    print("<trainer> time to learn 1 sample = " .. (time*1000) .. 'ms')

    -- print confusion matrix
    print(confusion)
    table.insert(trainHistory, confusion.totalValid * 100)
    confusion:zero()

    -- next epoch
    epoch = epoch + 1
end

In [9]:
-- test function
function test(dataset)
    -- local vars
    local time = sys.clock()

    -- test over given dataset
    print('<trainer> on testing Set:')
    for t = 1, dataset:size(), batchSize do
        -- disp progress
        xlua.progress(t, dataset:size())

        -- create mini batch
        local inputs = torch.Tensor(batchSize, 1, geometry[1], geometry[2])
        local targets = torch.Tensor(batchSize)
        local k = 1
        for i = t,math.min(t+batchSize-1, dataset:size()) do
            -- load new sample
            local sample = dataset[i]
            local input = sample[1]:clone()
            local _,target = sample[2]:clone():max(1)
            target = target:squeeze()
            inputs[k] = input
            targets[k] = target
            k = k + 1
        end

        -- test samples
        local preds = model:forward(inputs)

        -- confusion:
        for i = 1, batchSize do
            confusion:add(preds[i], targets[i])
        end
    end

    -- timing
    time = sys.clock() - time
    time = time / dataset:size()
    print("<trainer> time to test 1 sample = " .. (time*1000) .. 'ms')

    -- print confusion matrix
    print(confusion)
    table.insert(testHistory, confusion.totalValid * 100)
    confusion:zero()
end

In [None]:
for i = 1, numIter do
    train(trainData)
    test(testData)
end

<trainer> on training set:	
<trainer> online epoch # 1 [batchSize = 50]	


Progress: 23901 / 60000	


Progress: 50451 / 60000	


<trainer> time to learn 1 sample = 0.21535853147507ms	
ConfusionMatrix:
[[    5679       1      38      25      13      60      44       4      49      10]   95.880% 	[class: 1]
 [       1    6508      39      32       7      33       7      15      85      15]   96.529% 	[class: 2]
 [      49      71    5291      98      90      35      90      87     121      26]   88.805% 	[class: 3]
 [      32      39     127    5408       6     219      39      72     124      65]   88.207% 	[class: 4]
 [      13      30      45       8    5375       7      63      18      33     250]   92.006% 	[class: 5]
 [      71      42      33     216      72    4650     108      29     137      63]   85.778% 	[class: 6]
 [      51      27      64       5      65      78    5584       3      39       2]   94.356% 	[class: 7]
 [      30      56      84      24      68      11       8    5780      13     191]   92.259% 	[class: 8]
 [      33     118      88     166      33     170      49      22    5073      

<trainer> time to test 1 sample = 0.086409401893616ms	
ConfusionMatrix:
[[     971       0       1       1       0       3       2       1       1       0]   99.082% 	[class: 1]
 [       0    1121       2       2       1       1       3       2       3       0]   98.767% 	[class: 2]
 [      11      25     901      40      11       3      10       6      18       7]   87.306% 	[class: 3]
 [       5       3      12     925       1      34       1      10       5      14]   91.584% 	[class: 4]
 [       2       2       3       1     928       0       8       0       4      34]   94.501% 	[class: 5]
 [      10       3       3      13       9     826       5       4      13       6]   92.601% 	[class: 6]
 [      17       3       5       2       9      27     892       1       2       0]   93.111% 	[class: 7]
 [       3      17      20       9      12       2       0     902       3      60]   87.743% 	[class: 8]
 [      10      20       3      36      12      31       6       4     836      

Progress: 13151 / 60000	


Progress: 38901 / 60000	


<trainer> time to learn 1 sample = 0.22026749849319ms	
ConfusionMatrix:
[[    5768       1      11      13       9      30      35       5      35      16]   97.383% 	[class: 1]
 [       1    6598      31      22       9      15       3      10      40      13]   97.864% 	[class: 2]
 [      20      34    5583      64      53      15      46      59      70      14]   93.706% 	[class: 3]
 [       7      22      93    5666       5     127      21      53      96      41]   92.416% 	[class: 4]
 [      11      19      34       4    5513       8      48      13      20     172]   94.368% 	[class: 5]
 [      35      17      22     115      37    5020      66      12      57      40]   92.603% 	[class: 6]
 [      34      13      33       5      40      56    5711       2      24       0]   96.502% 	[class: 7]
 [      13      31      58      18      52      11       2    5965      11     104]   95.211% 	[class: 8]
 [      22      63      34     102      25      84      30      16    5426      

<trainer> time to test 1 sample = 0.080104398727417ms	


ConfusionMatrix:
[[     970       0       1       1       0       4       2       1       1       0]   98.980% 	[class: 1]
 [       0    1123       2       1       1       1       3       1       3       0]   98.943% 	[class: 2]
 [       8      13     966      15       6       1       6       4      11       2]   93.605% 	[class: 3]
 [       3       2       8     954       1      22       0       7       4       9]   94.455% 	[class: 4]
 [       1       0       4       1     946       0       7       0       3      20]   96.334% 	[class: 5]
 [       8       2       2       7       3     851       6       0       8       5]   95.404% 	[class: 6]
 [      11       3       4       0       6      30     899       1       4       0]   93.841% 	[class: 7]
 [       2      14      19       7       9       2       0     931       4      40]   90.564% 	[class: 8]
 [       4       6       4      20      10      15       3       4     900       8]   92.402% 	[class: 9]
 [       9       8       1   

Progress: 2151 / 60000	


Progress: 28701 / 60000	


Progress: 56301 / 60000	


<trainer> time to learn 1 sample = 0.2150576988856ms	
ConfusionMatrix:
[[    5804       1      11       5       7      19      29       5      25      17]   97.991% 	[class: 1]
 [       1    6632      30      15      12       5       2      11      26       8]   98.368% 	[class: 2]
 [      20      22    5728      29      35       9      20      48      42       5]   96.140% 	[class: 3]
 [       7      13      61    5826       5      80       9      32      65      33]   95.025% 	[class: 4]
 [       9      15      26       2    5607       5      35      13      15     115]   95.977% 	[class: 5]
 [      21      12      12      73      21    5161      47      11      39      24]   95.204% 	[class: 6]
 [      32       9      14       4      21      34    5781       3      20       0]   97.685% 	[class: 7]
 [       9      22      38      15      40       8       1    6055       8      69]   96.648% 	[class: 8]
 [      16      44      27      66      16      42      28      10    5569      3

<trainer> time to test 1 sample = 0.075965189933777ms	


ConfusionMatrix:
[[     969       0       1       1       0       5       2       1       1       0]   98.878% 	[class: 1]
 [       0    1126       2       1       0       1       2       1       2       0]   99.207% 	[class: 2]
 [       7       6     992       7       3       1       4       4       7       1]   96.124% 	[class: 3]
 [       0       1       5     974       0      18       0       4       3       5]   96.436% 	[class: 4]
 [       1       0       4       1     953       0       4       0       3      16]   97.047% 	[class: 5]
 [       6       1       0       5       1     868       4       0       4       3]   97.309% 	[class: 6]
 [      10       3       2       0       6      26     907       0       4       0]   94.676% 	[class: 7]
 [       2      14      17       5       9       1       0     945       2      33]   91.926% 	[class: 8]
 [       4       4       3      15       8      12       4       3     915       6]   93.943% 	[class: 9]
 [       5       7       1   

Progress: 18701 / 60000	


Progress: 45551 / 60000	


<trainer> time to learn 1 sample = 0.22243924935659ms	


ConfusionMatrix:
[[    5831       2       8       4       5      15      23       5      17      13]   98.447% 	[class: 1]
 [       1    6656      27       8      13       1       4      10      16       6]   98.724% 	[class: 2]
 [      15      15    5800      23      18       6      12      39      26       4]   97.348% 	[class: 3]
 [       4      13      42    5906       4      53       7      29      50      23]   96.330% 	[class: 4]
 [       6      16      18       1    5672       4      24       9      15      77]   97.090% 	[class: 5]
 [      20       6      10      49      14    5235      36       8      25      18]   96.569% 	[class: 6]
 [      24       6      12       1      17      17    5819       1      21       0]   98.327% 	[class: 7]
 [       5      19      34       7      31       7       1    6104       4      53]   97.430% 	[class: 8]
 [      11      31      17      51      14      28      18       9    5651      21]   96.582% 	[class: 9]
 [      15      14       2   

<trainer> time to test 1 sample = 0.085174703598022ms	
ConfusionMatrix:
[[     970       0       1       2       0       3       2       1       1       0]   98.980% 	[class: 1]
 [       0    1127       2       1       0       1       1       1       2       0]   99.295% 	[class: 2]
 [       7       3     997       4       4       0       5       4       8       0]   96.609% 	[class: 3]
 [       0       1       2     981       0      15       0       3       2       6]   97.129% 	[class: 4]
 [       1       0       4       1     958       0       4       0       2      12]   97.556% 	[class: 5]
 [       4       1       0       5       1     874       2       0       3       2]   97.982% 	[class: 6]
 [       8       3       1       0       5      26     912       0       3       0]   95.198% 	[class: 7]
 [       1      12      14       4       9       1       0     957       2      28]   93.093% 	[class: 8]
 [       4       4       3      12       7       9       2       3     925      

Progress: 9051 / 60000	


Progress: 35051 / 60000	


<trainer> time to learn 1 sample = 0.21553116639455ms	
ConfusionMatrix:
[[    5858       2       5       2       4      11      15       2      13      11]   98.903% 	[class: 1]
 [       1    6672      20       6      11       1       4      11      11       5]   98.962% 	[class: 2]
 [      13      11    5843      16      13       4       9      27      19       3]   98.070% 	[class: 3]
 [       4       5      30    5959       3      45       4      25      39      17]   97.195% 	[class: 4]
 [       4      11      10       0    5708       3      20       8       8      70]   97.706% 	[class: 5]
 [      15       4       8      31       7    5287      29       4      22      14]   97.528% 	[class: 6]
 [      20       6       9       0      11      10    5845       0      17       0]   98.766% 	[class: 7]
 [       4      18      30       2      26       6       1    6129       4      45]   97.829% 	[class: 8]
 [       9      29      12      28       9      15      12       8    5714      

<trainer> on testing Set:	


Progress: 4401 / 10000	


<trainer> time to test 1 sample = 0.080771398544312ms	


ConfusionMatrix:
[[     971       0       1       2       0       3       1       1       1       0]   99.082% 	[class: 1]
 [       0    1127       2       1       0       1       1       1       2       0]   99.295% 	[class: 2]
 [       7       1    1001       3       3       1       5       3       8       0]   96.996% 	[class: 3]
 [       0       1       3     987       0      12       0       2       3       2]   97.723% 	[class: 4]
 [       2       0       3       1     962       0       3       0       2       9]   97.963% 	[class: 5]
 [       4       1       0       5       1     875       2       0       2       2]   98.094% 	[class: 6]
 [       7       3       0       0       4      27     913       0       4       0]   95.303% 	[class: 7]
 [       2      10      14       4       8       0       0     962       1      27]   93.580% 	[class: 8]
 [       3       3       4       8       6       6       2       3     935       4]   95.996% 	[class: 9]
 [       5       4       0   

Progress: 23651 / 60000	


In [None]:
function table2Tensor(table)
    tensor = torch.Tensor(#table)
    for i = 1, tensor:size()[1] do
        tensor[i] = table[i]
    end
    return tensor
end

In [None]:
Plot = require 'itorch.Plot'

trainValid = table2Tensor(trainHistory)
testValid = table2Tensor(testHistory)

epochCount = torch.Tensor(numIter)
i = 0

epochCount:apply(function()
    i = i + 1
    return i
end)


-- line plots
plot = Plot():line(epochCount, trainValid, 'green', 'Train')
plot:line(epochCount, testValid, 'blue', 'Test')
plot:legend(true)
plot:title('Train/Test TotalValid'):draw()