### Reference.
Torch-Demos : [digit classifier](https://github.com/torch/demos/blob/master/train-a-digit-classifier/train-on-mnist.lua)

In [1]:
require 'torch'
require 'nn'
require 'optim'
require 'image'
require 'dataset-mnist'

In [2]:
-- Setting Hyperparameters
batchSize = 50
learningRate = 0.05
momentum = 0
numIter = 20

-- Fix Seed
torch.manualSeed(1)

-- Setting worker
torch.setnumthreads(2)
print('<torch> set nb of threads to ' .. torch.getnumthreads())

-- Use Floats
torch.setdefaulttensortype('torch.FloatTensor')

<torch> set nb of threads to 1	


In [3]:
-- Define model to Train
-- On the 10-class Classification Problem
-- Geometry : width and height of input images

classes = {'1', '2' ,'3' ,'4' ,'5' ,'6' ,'7' ,'8' ,'9', '10'}
geometry = {32, 32}

In [4]:
numTrain = 60000
numTest = 10000

-- Create training set and normalize
trainData = mnist.loadTrainSet(numTrain, geometry)
trainData:normalizeGlobal(mean, std)

-- Create test set and normalize
testData = mnist.loadTestSet(numTest, geometry)
testData:normalizeGlobal(mean, std)

<mnist> done	


<mnist> done	


In [5]:
print(trainData)
print(testData)

{
  data : FloatTensor - size: 60000x1x32x32
  normalize : function: 0x0ce96930
  labels : ByteTensor - size: 60000
  normalizeGlobal : function: 0x0ce968f8
  size : function: 0x0ce969d8
}
{
  data : FloatTensor - size: 10000x1x32x32
  normalize : function: 0x0ce977c0
  labels : ByteTensor - size: 10000
  normalizeGlobal : function: 0x0ce97788
  size : function: 0x0ce97868
}


In [6]:
-- 2 Layer Neural Network
model = nn.Sequential()

model:add(nn.Reshape(32*32))
model:add(nn.Linear(32*32, 2048))
model:add(nn.Tanh())
model:add(nn.Linear(2048, #classes))
model:add(nn.LogSoftMax())

criterion = nn.ClassNLLCriterion()

In [7]:
-- Retrieve parameters and gradients
parameters, gradParameters = model:getParameters()

confusion = optim.ConfusionMatrix(classes)
trainHistory = {}
testHistory = {}

In [8]:
-- Train function
function train(dataset)
    -- epoch tracker
    epoch = epoch or 1

    -- local vars
    local time = sys.clock()

    -- do one epoch
    print('<trainer> on training set:')
    print("<trainer> online epoch # " .. epoch .. ' [batchSize = ' .. batchSize .. ']')
    for t = 1, numTrain, batchSize do
        -- create mini batch
        local inputs = torch.Tensor(batchSize,1,geometry[1],geometry[2])
        local targets = torch.Tensor(batchSize)
        local k = 1
        for i = t, math.min(t+batchSize-1, numTrain) do
            -- load new sample
            local sample = dataset[i]
            local input = sample[1]:clone()
            local _,target = sample[2]:clone():max(1)
            target = target:squeeze()
            inputs[k] = input
            targets[k] = target
            k = k + 1
        end

        -- create closure to evaluate f(X) and df/dX
        local feval = function(x)
            -- just in case:
            collectgarbage()

            -- get new parameters
            if x ~= parameters then
                parameters:copy(x)
            end

            -- reset gradients
            gradParameters:zero()

            -- evaluate function for complete mini batch
            local outputs = model:forward(inputs)
            local f = criterion:forward(outputs, targets)

            -- estimate df/dW
            local df_do = criterion:backward(outputs, targets)
            model:backward(inputs, df_do)

            -- update confusion
            for i = 1,batchSize do
                confusion:add(outputs[i], targets[i])
            end

            -- return f and df/dX
            return f,gradParameters
        end

        -- Perform SGD step:
        sgdState = sgdState or {
            learningRate = learningRate,
            momentum = momentum,
            learningRateDecay = 5e-7
        }
        optim.sgd(feval, parameters, sgdState)

        -- disp progress
        xlua.progress(t, numTrain)
    end
   
    -- time taken
    time = sys.clock() - time
    time = time / numTrain
    print("<trainer> time to learn 1 sample = " .. (time*1000) .. 'ms')

    -- print confusion matrix
    print(confusion)
    table.insert(trainHistory, confusion.totalValid * 100)
    confusion:zero()

    -- next epoch
    epoch = epoch + 1
end

In [9]:
-- test function
function test(dataset)
    -- local vars
    local time = sys.clock()

    -- test over given dataset
    print('<trainer> on testing Set:')
    for t = 1, dataset:size(), batchSize do
        -- disp progress
        xlua.progress(t, dataset:size())

        -- create mini batch
        local inputs = torch.Tensor(batchSize, 1, geometry[1], geometry[2])
        local targets = torch.Tensor(batchSize)
        local k = 1
        for i = t,math.min(t+batchSize-1, dataset:size()) do
            -- load new sample
            local sample = dataset[i]
            local input = sample[1]:clone()
            local _,target = sample[2]:clone():max(1)
            target = target:squeeze()
            inputs[k] = input
            targets[k] = target
            k = k + 1
        end

        -- test samples
        local preds = model:forward(inputs)

        -- confusion:
        for i = 1, batchSize do
            confusion:add(preds[i], targets[i])
        end
    end

    -- timing
    time = sys.clock() - time
    time = time / dataset:size()
    print("<trainer> time to test 1 sample = " .. (time*1000) .. 'ms')

    -- print confusion matrix
    print(confusion)
    table.insert(testHistory, confusion.totalValid * 100)
    confusion:zero()
end

In [10]:
for i = 1, numIter do
    train(trainData)
    test(testData)
end

<trainer> on training set:	
<trainer> online epoch # 1 [batchSize = 50]	


Progress: 23901 / 60000	


Progress: 50451 / 60000	


<trainer> time to learn 1 sample = 0.21535853147507ms	
ConfusionMatrix:
[[    5679       1      38      25      13      60      44       4      49      10]   95.880% 	[class: 1]
 [       1    6508      39      32       7      33       7      15      85      15]   96.529% 	[class: 2]
 [      49      71    5291      98      90      35      90      87     121      26]   88.805% 	[class: 3]
 [      32      39     127    5408       6     219      39      72     124      65]   88.207% 	[class: 4]
 [      13      30      45       8    5375       7      63      18      33     250]   92.006% 	[class: 5]
 [      71      42      33     216      72    4650     108      29     137      63]   85.778% 	[class: 6]
 [      51      27      64       5      65      78    5584       3      39       2]   94.356% 	[class: 7]
 [      30      56      84      24      68      11       8    5780      13     191]   92.259% 	[class: 8]
 [      33     118      88     166      33     170      49      22    5073      

<trainer> time to test 1 sample = 0.086409401893616ms	
ConfusionMatrix:
[[     971       0       1       1       0       3       2       1       1       0]   99.082% 	[class: 1]
 [       0    1121       2       2       1       1       3       2       3       0]   98.767% 	[class: 2]
 [      11      25     901      40      11       3      10       6      18       7]   87.306% 	[class: 3]
 [       5       3      12     925       1      34       1      10       5      14]   91.584% 	[class: 4]
 [       2       2       3       1     928       0       8       0       4      34]   94.501% 	[class: 5]
 [      10       3       3      13       9     826       5       4      13       6]   92.601% 	[class: 6]
 [      17       3       5       2       9      27     892       1       2       0]   93.111% 	[class: 7]
 [       3      17      20       9      12       2       0     902       3      60]   87.743% 	[class: 8]
 [      10      20       3      36      12      31       6       4     836      

Progress: 13151 / 60000	


Progress: 38901 / 60000	


<trainer> time to learn 1 sample = 0.22026749849319ms	
ConfusionMatrix:
[[    5768       1      11      13       9      30      35       5      35      16]   97.383% 	[class: 1]
 [       1    6598      31      22       9      15       3      10      40      13]   97.864% 	[class: 2]
 [      20      34    5583      64      53      15      46      59      70      14]   93.706% 	[class: 3]
 [       7      22      93    5666       5     127      21      53      96      41]   92.416% 	[class: 4]
 [      11      19      34       4    5513       8      48      13      20     172]   94.368% 	[class: 5]
 [      35      17      22     115      37    5020      66      12      57      40]   92.603% 	[class: 6]
 [      34      13      33       5      40      56    5711       2      24       0]   96.502% 	[class: 7]
 [      13      31      58      18      52      11       2    5965      11     104]   95.211% 	[class: 8]
 [      22      63      34     102      25      84      30      16    5426      

<trainer> time to test 1 sample = 0.080104398727417ms	


ConfusionMatrix:
[[     970       0       1       1       0       4       2       1       1       0]   98.980% 	[class: 1]
 [       0    1123       2       1       1       1       3       1       3       0]   98.943% 	[class: 2]
 [       8      13     966      15       6       1       6       4      11       2]   93.605% 	[class: 3]
 [       3       2       8     954       1      22       0       7       4       9]   94.455% 	[class: 4]
 [       1       0       4       1     946       0       7       0       3      20]   96.334% 	[class: 5]
 [       8       2       2       7       3     851       6       0       8       5]   95.404% 	[class: 6]
 [      11       3       4       0       6      30     899       1       4       0]   93.841% 	[class: 7]
 [       2      14      19       7       9       2       0     931       4      40]   90.564% 	[class: 8]
 [       4       6       4      20      10      15       3       4     900       8]   92.402% 	[class: 9]
 [       9       8       1   

Progress: 2151 / 60000	


Progress: 28701 / 60000	


Progress: 56301 / 60000	


<trainer> time to learn 1 sample = 0.2150576988856ms	
ConfusionMatrix:
[[    5804       1      11       5       7      19      29       5      25      17]   97.991% 	[class: 1]
 [       1    6632      30      15      12       5       2      11      26       8]   98.368% 	[class: 2]
 [      20      22    5728      29      35       9      20      48      42       5]   96.140% 	[class: 3]
 [       7      13      61    5826       5      80       9      32      65      33]   95.025% 	[class: 4]
 [       9      15      26       2    5607       5      35      13      15     115]   95.977% 	[class: 5]
 [      21      12      12      73      21    5161      47      11      39      24]   95.204% 	[class: 6]
 [      32       9      14       4      21      34    5781       3      20       0]   97.685% 	[class: 7]
 [       9      22      38      15      40       8       1    6055       8      69]   96.648% 	[class: 8]
 [      16      44      27      66      16      42      28      10    5569      3

<trainer> time to test 1 sample = 0.075965189933777ms	


ConfusionMatrix:
[[     969       0       1       1       0       5       2       1       1       0]   98.878% 	[class: 1]
 [       0    1126       2       1       0       1       2       1       2       0]   99.207% 	[class: 2]
 [       7       6     992       7       3       1       4       4       7       1]   96.124% 	[class: 3]
 [       0       1       5     974       0      18       0       4       3       5]   96.436% 	[class: 4]
 [       1       0       4       1     953       0       4       0       3      16]   97.047% 	[class: 5]
 [       6       1       0       5       1     868       4       0       4       3]   97.309% 	[class: 6]
 [      10       3       2       0       6      26     907       0       4       0]   94.676% 	[class: 7]
 [       2      14      17       5       9       1       0     945       2      33]   91.926% 	[class: 8]
 [       4       4       3      15       8      12       4       3     915       6]   93.943% 	[class: 9]
 [       5       7       1   

Progress: 18701 / 60000	


Progress: 45551 / 60000	


<trainer> time to learn 1 sample = 0.22243924935659ms	


ConfusionMatrix:
[[    5831       2       8       4       5      15      23       5      17      13]   98.447% 	[class: 1]
 [       1    6656      27       8      13       1       4      10      16       6]   98.724% 	[class: 2]
 [      15      15    5800      23      18       6      12      39      26       4]   97.348% 	[class: 3]
 [       4      13      42    5906       4      53       7      29      50      23]   96.330% 	[class: 4]
 [       6      16      18       1    5672       4      24       9      15      77]   97.090% 	[class: 5]
 [      20       6      10      49      14    5235      36       8      25      18]   96.569% 	[class: 6]
 [      24       6      12       1      17      17    5819       1      21       0]   98.327% 	[class: 7]
 [       5      19      34       7      31       7       1    6104       4      53]   97.430% 	[class: 8]
 [      11      31      17      51      14      28      18       9    5651      21]   96.582% 	[class: 9]
 [      15      14       2   

<trainer> time to test 1 sample = 0.085174703598022ms	
ConfusionMatrix:
[[     970       0       1       2       0       3       2       1       1       0]   98.980% 	[class: 1]
 [       0    1127       2       1       0       1       1       1       2       0]   99.295% 	[class: 2]
 [       7       3     997       4       4       0       5       4       8       0]   96.609% 	[class: 3]
 [       0       1       2     981       0      15       0       3       2       6]   97.129% 	[class: 4]
 [       1       0       4       1     958       0       4       0       2      12]   97.556% 	[class: 5]
 [       4       1       0       5       1     874       2       0       3       2]   97.982% 	[class: 6]
 [       8       3       1       0       5      26     912       0       3       0]   95.198% 	[class: 7]
 [       1      12      14       4       9       1       0     957       2      28]   93.093% 	[class: 8]
 [       4       4       3      12       7       9       2       3     925      

Progress: 9051 / 60000	


Progress: 35051 / 60000	


<trainer> time to learn 1 sample = 0.21553116639455ms	
ConfusionMatrix:
[[    5858       2       5       2       4      11      15       2      13      11]   98.903% 	[class: 1]
 [       1    6672      20       6      11       1       4      11      11       5]   98.962% 	[class: 2]
 [      13      11    5843      16      13       4       9      27      19       3]   98.070% 	[class: 3]
 [       4       5      30    5959       3      45       4      25      39      17]   97.195% 	[class: 4]
 [       4      11      10       0    5708       3      20       8       8      70]   97.706% 	[class: 5]
 [      15       4       8      31       7    5287      29       4      22      14]   97.528% 	[class: 6]
 [      20       6       9       0      11      10    5845       0      17       0]   98.766% 	[class: 7]
 [       4      18      30       2      26       6       1    6129       4      45]   97.829% 	[class: 8]
 [       9      29      12      28       9      15      12       8    5714      

<trainer> on testing Set:	


Progress: 4401 / 10000	


<trainer> time to test 1 sample = 0.080771398544312ms	


ConfusionMatrix:
[[     971       0       1       2       0       3       1       1       1       0]   99.082% 	[class: 1]
 [       0    1127       2       1       0       1       1       1       2       0]   99.295% 	[class: 2]
 [       7       1    1001       3       3       1       5       3       8       0]   96.996% 	[class: 3]
 [       0       1       3     987       0      12       0       2       3       2]   97.723% 	[class: 4]
 [       2       0       3       1     962       0       3       0       2       9]   97.963% 	[class: 5]
 [       4       1       0       5       1     875       2       0       2       2]   98.094% 	[class: 6]
 [       7       3       0       0       4      27     913       0       4       0]   95.303% 	[class: 7]
 [       2      10      14       4       8       0       0     962       1      27]   93.580% 	[class: 8]
 [       3       3       4       8       6       6       2       3     935       4]   95.996% 	[class: 9]
 [       5       4       0   

Progress: 23651 / 60000	


Progress: 50801 / 60000	


<trainer> time to learn 1 sample = 0.22018346389135ms	
ConfusionMatrix:
[[    5875       1       3       1       2       7      13       2      10       9]   99.190% 	[class: 1]
 [       1    6684      19       3       9       0       4      11       7       4]   99.140% 	[class: 2]
 [       9       9    5875      13      11       3       4      19      13       2]   98.607% 	[class: 3]
 [       3       3      23    5993       2      43       3      19      28      14]   97.749% 	[class: 4]
 [       2      10       6       0    5745       1      14       6       8      50]   98.340% 	[class: 5]
 [      12       5       8      23       3    5320      20       3      14      13]   98.137% 	[class: 6]
 [      17       7       3       0       7      10    5860       0      14       0]   99.020% 	[class: 7]
 [       3      18      25       1      22       5       1    6153       4      33]   98.212% 	[class: 8]
 [       9      20       8      24       6      14       9       5    5746      

  _prediction : FloatTensor - size: 10
  _pred_idx : LongTensor - size: 1
  nclasses : 10
  _max : FloatTensor - size: 1
  _target : FloatTensor - empty
  unionvalids : FloatTensor - size: 10
  totalValid : 0.98421666666667
}
<trainer> on testing Set:	


<trainer> time to test 1 sample = 0.089466190338135ms	


ConfusionMatrix:
[[     973       0       1       2       0       2       0       1       1       0]   99.286% 	[class: 1]
 [       0    1127       2       1       0       1       1       1       2       0]   99.295% 	[class: 2]
 [       6       1    1005       2       3       1       5       3       6       0]   97.384% 	[class: 3]
 [       0       0       4     985       0      10       0       2       4       5]   97.525% 	[class: 4]
 [       2       0       3       1     963       0       2       0       2       9]   98.065% 	[class: 5]
 [       2       1       0       4       1     878       2       0       2       2]   98.430% 	[class: 6]
 [       8       3       0       0       4      26     913       0       4       0]   95.303% 	[class: 7]
 [       2      10      14       4       6       0       0     965       1      26]   93.872% 	[class: 8]
 [       3       3       4       6       4       6       1       3     941       3]   96.612% 	[class: 9]
 [       5       2       0   

Progress: 13751 / 60000	


Progress: 39701 / 60000	


<trainer> time to learn 1 sample = 0.21755961577098ms	
ConfusionMatrix:
[[    5884       1       2       1       2       5      12       1       8       7]   99.342% 	[class: 1]
 [       1    6691      15       2       9       0       3      10       6       5]   99.244% 	[class: 2]
 [       9       6    5894      10       8       2       3      16       7       3]   98.926% 	[class: 3]
 [       2       5      19    6029       1      33       1      16      15      10]   98.336% 	[class: 4]
 [       2       9       3       1    5761       1      11       7       7      40]   98.613% 	[class: 5]
 [       6       4       4      16       3    5348      17       3       9      11]   98.653% 	[class: 6]
 [      15       6       2       0       7      10    5869       0       9       0]   99.172% 	[class: 7]
 [       2      15      23       2      18       2       1    6174       3      25]   98.547% 	[class: 8]
 [       6      15       6      18       5      10       9       4    5770      

<trainer> time to test 1 sample = 0.07678530216217ms	
ConfusionMatrix:
[[     973       0       1       2       0       2       0       1       1       0]   99.286% 	[class: 1]
 [       0    1127       2       1       0       1       1       1       2       0]   99.295% 	[class: 2]
 [       6       1    1006       1       3       1       4       4       6       0]   97.481% 	[class: 3]
 [       0       0       5     988       0       5       0       2       4       6]   97.822% 	[class: 4]
 [       2       0       3       1     965       0       2       0       1       8]   98.269% 	[class: 5]
 [       2       1       0       4       1     877       3       0       2       2]   98.318% 	[class: 6]
 [       8       3       0       0       4      22     918       0       3       0]   95.825% 	[class: 7]
 [       2      10      15       4       5       0       0     967       1      24]   94.066% 	[class: 8]
 [       3       2       4       4       4       4       1       2     947       

  _pred_idx : LongTensor - size: 1
  nclasses : 10
  _max : FloatTensor - size: 1
  _target : FloatTensor - empty
  unionvalids : FloatTensor - size: 10
  totalValid : 0.9751
}
<trainer> on training set:	
<trainer> online epoch # 8 [batchSize = 50]	


Progress: 3001 / 60000	


Progress: 29451 / 60000	


Progress: 55851 / 60000	


<trainer> time to learn 1 sample = 0.21407628456752ms	
ConfusionMatrix:
[[    5893       1       1       0       2       3       7       1       8       7]   99.493% 	[class: 1]
 [       1    6698      12       2       5       0       3      10       5       6]   99.347% 	[class: 2]
 [       5       4    5915       6       5       1       1      14       5       2]   99.278% 	[class: 3]
 [       2       4      15    6058       1      23       0      10       9       9]   98.809% 	[class: 4]
 [       0       8       2       1    5782       0       8       6       5      30]   98.973% 	[class: 5]
 [       6       1       4      15       2    5360      17       1       6       9]   98.875% 	[class: 6]
 [      12       5       2       0       4       7    5881       0       7       0]   99.375% 	[class: 7]
 [       2      12      14       2      15       1       1    6196       3      19]   98.899% 	[class: 8]
 [       5       9       4      11       3       7       8       3    5795      

<trainer> time to test 1 sample = 0.08493971824646ms	
ConfusionMatrix:
[[     972       0       1       2       1       2       0       1       1       0]   99.184% 	[class: 1]
 [       0    1127       2       1       0       1       1       1       2       0]   99.295% 	[class: 2]
 [       6       1    1006       1       3       1       4       4       6       0]   97.481% 	[class: 3]
 [       0       0       5     989       0       4       0       2       3       7]   97.921% 	[class: 4]
 [       2       0       3       1     967       0       2       0       1       6]   98.473% 	[class: 5]
 [       2       0       0       5       2     876       3       0       2       2]   98.206% 	[class: 6]
 [       8       3       0       0       4      19     921       0       3       0]   96.138% 	[class: 7]
 [       2      10      14       3       5       0       0     969       3      22]   94.261% 	[class: 8]
 [       3       2       4       4       4       5       0       2     947       

  _prediction : FloatTensor - size: 10
  _pred_idx : LongTensor - size: 1
  nclasses : 10
  _max : FloatTensor - size: 1
  _target : FloatTensor - empty
  unionvalids : FloatTensor - size: 10
  totalValid : 0.9758
}
<trainer> on training set:	
<trainer> online epoch # 9 [batchSize = 50]	


Progress: 18051 / 60000	


Progress: 43801 / 60000	


<trainer> time to learn 1 sample = 0.21480043331782ms	
ConfusionMatrix:
[[    5908       0       0       0       0       1       4       1       5       4]   99.747% 	[class: 1]
 [       1    6703       9       2       4       0       3       9       5       6]   99.422% 	[class: 2]
 [       3       5    5927       2       3       1       1      10       4       2]   99.480% 	[class: 3]
 [       1       4      12    6075       2      17       0       6       7       7]   99.087% 	[class: 4]
 [       0       6       2       0    5794       0       6       5       3      26]   99.178% 	[class: 5]
 [       5       1       3      11       2    5376      12       0       4       7]   99.170% 	[class: 6]
 [       9       5       2       0       2       5    5889       0       6       0]   99.510% 	[class: 7]
 [       1      13      10       1      11       0       1    6208       3      17]   99.090% 	[class: 8]
 [       2       9       3       3       3       7       7       3    5808      

<trainer> time to test 1 sample = 0.078780102729797ms	
ConfusionMatrix:
[[     973       0       1       2       1       1       0       1       1       0]   99.286% 	[class: 1]
 [       0    1127       2       1       0       1       1       1       2       0]   99.295% 	[class: 2]
 [       6       1    1007       1       3       1       3       4       6       0]   97.578% 	[class: 3]
 [       0       0       6     988       0       4       0       2       3       7]   97.822% 	[class: 4]
 [       2       0       3       1     969       0       2       0       0       5]   98.676% 	[class: 5]
 [       2       0       0       4       2     877       3       0       2       2]   98.318% 	[class: 6]
 [       7       3       1       0       4      17     924       0       2       0]   96.451% 	[class: 7]
 [       2       9      14       3       6       0       0     970       3      21]   94.358% 	[class: 8]
 [       3       2       4       3       4       5       0       2     948      

Progress: 7151 / 60000	


Progress: 33201 / 60000	


<trainer> time to learn 1 sample = 0.21469389994939ms	
ConfusionMatrix:
[[    5914       0       0       0       0       0       3       0       3       3]   99.848% 	[class: 1]
 [       1    6708       9       1       3       0       2       9       5       4]   99.496% 	[class: 2]
 [       3       4    5934       1       3       0       1       9       2       1]   99.597% 	[class: 3]
 [       1       2       9    6090       1      14       0       4       5       5]   99.331% 	[class: 4]
 [       0       6       0       0    5805       0       5       3       3      20]   99.367% 	[class: 5]
 [       3       1       2       7       2    5394       7       0       2       3]   99.502% 	[class: 6]
 [       6       5       1       0       2       4    5895       0       5       0]   99.611% 	[class: 7]
 [       1      12       8       1       8       0       0    6219       3      13]   99.266% 	[class: 8]
 [       1       8       3       2       2       5       6       2    5816      

Progress: 51 / 10000	


<trainer> time to test 1 sample = 0.07877881526947ms	
ConfusionMatrix:
[[     974       0       0       1       1       1       0       1       2       0]   99.388% 	[class: 1]
 [       0    1126       2       1       0       1       2       1       2       0]   99.207% 	[class: 2]
 [       4       1    1013       1       2       0       3       4       4       0]   98.159% 	[class: 3]
 [       0       0       6     987       0       5       0       2       2       8]   97.723% 	[class: 4]
 [       1       0       2       1     970       0       3       0       0       5]   98.778% 	[class: 5]
 [       2       0       0       6       2     876       2       0       2       2]   98.206% 	[class: 6]
 [       7       3       1       0       4      17     925       0       1       0]   96.555% 	[class: 7]
 [       2       8      13       1       6       0       0     976       3      19]   94.942% 	[class: 8]
 [       3       2       4       3       4       5       0       2     948       

  _pred_idx : LongTensor - size: 1
  nclasses : 10
  _max : FloatTensor - size: 1
  _target : FloatTensor - empty
  unionvalids : FloatTensor - size: 10
  totalValid : 0.9781
}
<trainer> on training set:	
<trainer> online epoch # 11 [batchSize = 50]	


Progress: 23701 / 60000	


Progress: 50551 / 60000	


<trainer> time to learn 1 sample = 0.22042889992396ms	
ConfusionMatrix:
[[    5915       0       0       0       0       0       2       0       3       3]   99.865% 	[class: 1]
 [       0    6717       8       1       3       0       2       7       3       1]   99.629% 	[class: 2]
 [       2       3    5941       1       2       0       0       6       2       1]   99.715% 	[class: 3]
 [       1       2       6    6098       1      11       0       3       4       5]   99.462% 	[class: 4]
 [       0       4       0       0    5814       0       5       2       1      16]   99.521% 	[class: 5]
 [       0       1       1       2       1    5411       3       0       1       1]   99.816% 	[class: 6]
 [       2       4       1       0       2       4    5902       0       3       0]   99.730% 	[class: 7]
 [       1       9       6       0       5       0       0    6229       4      11]   99.425% 	[class: 8]
 [       0       5       3       1       1       2       6       2    5825      

<trainer> time to test 1 sample = 0.08415892124176ms	


ConfusionMatrix:
[[     974       0       0       1       1       1       0       1       2       0]   99.388% 	[class: 1]
 [       0    1126       2       1       0       1       2       1       2       0]   99.207% 	[class: 2]
 [       4       1    1013       1       2       0       3       4       4       0]   98.159% 	[class: 3]
 [       0       0       5     989       0       4       0       2       2       8]   97.921% 	[class: 4]
 [       1       0       2       1     969       0       3       0       0       6]   98.676% 	[class: 5]
 [       2       0       0       6       2     875       2       0       2       3]   98.094% 	[class: 6]
 [       7       3       1       0       4      14     928       0       1       0]   96.868% 	[class: 7]
 [       2       7      12       0       7       0       0     978       4      18]   95.136% 	[class: 8]
 [       3       2       4       3       4       5       1       2     947       3]   97.228% 	[class: 9]
 [       3       2       0   

Progress: 13701 / 60000	


Progress: 40551 / 60000	


<trainer> time to learn 1 sample = 0.21582996447881ms	
ConfusionMatrix:
[[    5916       0       0       0       0       0       2       0       2       3]   99.882% 	[class: 1]
 [       0    6722       6       1       3       0       0       7       3       0]   99.703% 	[class: 2]
 [       2       3    5946       0       1       0       0       3       2       1]   99.799% 	[class: 3]
 [       0       2       4    6108       1       7       0       2       3       4]   99.625% 	[class: 4]
 [       0       4       0       0    5823       0       3       1       1      10]   99.675% 	[class: 5]
 [       0       1       1       1       0    5416       2       0       0       0]   99.908% 	[class: 6]
 [       1       2       0       0       2       2    5909       0       2       0]   99.848% 	[class: 7]
 [       1       9       3       0       3       0       0    6238       4       7]   99.569% 	[class: 8]
 [       0       4       2       1       0       1       4       2    5832      

<trainer> time to test 1 sample = 0.08353910446167ms	
ConfusionMatrix:
[[     974       0       1       1       1       1       0       1       1       0]   99.388% 	[class: 1]
 [       0    1124       3       1       0       1       2       1       2       1]   99.031% 	[class: 2]
 [       4       1    1012       1       2       0       3       5       4       0]   98.062% 	[class: 3]
 [       0       0       5     990       0       4       0       2       1       8]   98.020% 	[class: 4]
 [       1       0       2       1     969       0       3       0       0       6]   98.676% 	[class: 5]
 [       2       0       0       6       2     875       2       0       2       3]   98.094% 	[class: 6]
 [       7       3       1       0       5      12     929       0       1       0]   96.973% 	[class: 7]
 [       2       6      11       0       7       0       0     981       4      17]   95.428% 	[class: 8]
 [       3       2       4       3       4       5       1       2     947       

  unionvalids : FloatTensor - size: 10
  totalValid : 0.9787
}
<trainer> on training set:	
<trainer> online epoch # 13 [batchSize = 50]	


Progress: 3601 / 60000	


Progress: 29851 / 60000	


Progress: 56051 / 60000	


<trainer> time to learn 1 sample = 0.21978528499603ms	
ConfusionMatrix:
[[    5919       0       0       0       0       0       2       0       1       1]   99.932% 	[class: 1]
 [       0    6727       5       0       3       0       0       6       1       0]   99.778% 	[class: 2]
 [       1       3    5947       0       1       0       0       3       2       1]   99.815% 	[class: 3]
 [       0       1       3    6117       1       3       0       1       2       3]   99.772% 	[class: 4]
 [       0       4       0       0    5828       0       1       1       1       7]   99.760% 	[class: 5]
 [       0       1       0       1       0    5417       2       0       0       0]   99.926% 	[class: 6]
 [       1       2       0       0       2       0    5911       0       2       0]   99.882% 	[class: 7]
 [       1       9       2       0       3       0       0    6244       1       5]   99.665% 	[class: 8]
 [       0       4       1       0       0       0       2       1    5840      

<trainer> time to test 1 sample = 0.079618883132935ms	
ConfusionMatrix:
[[     974       0       1       1       1       1       0       1       1       0]   99.388% 	[class: 1]
 [       0    1125       3       1       0       1       2       1       1       1]   99.119% 	[class: 2]
 [       4       1    1013       1       2       0       2       5       4       0]   98.159% 	[class: 3]
 [       0       0       5     990       0       4       0       2       1       8]   98.020% 	[class: 4]
 [       1       0       2       1     969       0       3       0       0       6]   98.676% 	[class: 5]
 [       2       0       0       8       2     873       2       0       2       3]   97.870% 	[class: 6]
 [       7       3       1       1       5      10     930       0       1       0]   97.077% 	[class: 7]
 [       2       6      10       0       7       0       0     982       4      17]   95.525% 	[class: 8]
 [       3       2       4       4       5       5       1       2     945      

  }
  _prediction : FloatTensor - size: 10
  _pred_idx : LongTensor - size: 1
  nclasses : 10
  _max : FloatTensor - size: 1
  _target : FloatTensor - empty
  unionvalids : FloatTensor - size: 10
  totalValid : 0.9787
}
<trainer> on training set:	
<trainer> online epoch # 14 [batchSize = 50]	


Progress: 18551 / 60000	


Progress: 46151 / 60000	


<trainer> time to learn 1 sample = 0.21632163524628ms	
ConfusionMatrix:
[[    5920       0       0       0       0       0       1       0       1       1]   99.949% 	[class: 1]
 [       0    6730       2       0       3       0       0       6       1       0]   99.822% 	[class: 2]
 [       1       3    5950       0       1       0       0       2       1       0]   99.866% 	[class: 3]
 [       0       1       1    6123       0       1       0       1       1       3]   99.870% 	[class: 4]
 [       0       3       0       0    5833       0       1       1       0       4]   99.846% 	[class: 5]
 [       0       1       0       1       0    5419       0       0       0       0]   99.963% 	[class: 6]
 [       1       2       0       0       1       0    5913       0       1       0]   99.916% 	[class: 7]
 [       1       8       1       0       0       0       0    6251       1       3]   99.777% 	[class: 8]
 [       0       4       0       0       0       0       2       0    5844      

<trainer> time to test 1 sample = 0.07662239074707ms	
ConfusionMatrix:
[[     974       0       1       1       1       1       0       1       1       0]   99.388% 	[class: 1]
 [       0    1125       3       1       0       1       2       1       1       1]   99.119% 	[class: 2]
 [       4       1    1014       1       2       0       2       5       3       0]   98.256% 	[class: 3]
 [       0       0       5     990       0       4       0       2       1       8]   98.020% 	[class: 4]
 [       1       0       2       1     969       0       3       0       0       6]   98.676% 	[class: 5]
 [       2       0       0       8       2     873       2       0       2       3]   97.870% 	[class: 6]
 [       7       3       1       1       5       9     931       0       1       0]   97.182% 	[class: 7]
 [       2       7       9       1       7       0       0     982       4      16]   95.525% 	[class: 8]
 [       3       2       4       4       6       5       1       2     944       

Progress: 8851 / 60000	


Progress: 34501 / 60000	


<trainer> time to learn 1 sample = 0.21503714720408ms	
ConfusionMatrix:
[[    5921       0       0       0       0       0       1       0       0       1]   99.966% 	[class: 1]
 [       0    6733       0       0       3       0       0       5       1       0]   99.867% 	[class: 2]
 [       1       2    5953       0       0       0       0       2       0       0]   99.916% 	[class: 3]
 [       0       0       0    6128       0       0       0       1       1       1]   99.951% 	[class: 4]
 [       0       1       0       0    5836       0       1       1       0       3]   99.897% 	[class: 5]
 [       0       1       0       1       0    5419       0       0       0       0]   99.963% 	[class: 6]
 [       1       2       0       0       1       0    5914       0       0       0]   99.932% 	[class: 7]
 [       1       6       0       0       0       0       0    6255       1       2]   99.840% 	[class: 8]
 [       0       4       0       0       0       0       2       0    5844      

  averageValid : 0.99899492263794
  classes : 
    {
      1 : 1
      2 : 2
      3 : 3
      4 : 4
      5 : 5
      6 : 6
      7 : 7
      8 : 8
      9 : 9
      10 : 10
    }
  _prediction : FloatTensor - size: 10
  _pred_idx : LongTensor - size: 1
  nclasses : 10
  _max : FloatTensor - size: 1
  _target : FloatTensor - empty
  unionvalids : FloatTensor - size: 10
  totalValid : 0.99898333333333
}
<trainer> on testing Set:	


Progress: 1451 / 10000	


<trainer> time to test 1 sample = 0.07702419757843ms	


ConfusionMatrix:
[[     974       0       1       1       1       0       0       1       2       0]   99.388% 	[class: 1]
 [       0    1125       3       1       0       1       2       1       1       1]   99.119% 	[class: 2]
 [       4       1    1014       1       2       0       2       5       3       0]   98.256% 	[class: 3]
 [       0       0       4     992       0       3       0       2       1       8]   98.218% 	[class: 4]
 [       1       0       2       1     969       0       3       0       0       6]   98.676% 	[class: 5]
 [       2       0       0       8       2     874       2       0       1       3]   97.982% 	[class: 6]
 [       7       3       1       1       5       9     931       0       1       0]   97.182% 	[class: 7]
 [       2       6       8       1       8       0       0     982       5      16]   95.525% 	[class: 8]
 [       3       2       4       4       6       5       1       2     944       3]   96.920% 	[class: 9]
 [       3       2       0   

Progress: 22851 / 60000	


Progress: 48251 / 60000	


<trainer> time to learn 1 sample = 0.21673796971639ms	
ConfusionMatrix:
[[    5922       0       0       0       0       0       1       0       0       0]   99.983% 	[class: 1]
 [       0    6734       0       0       3       0       0       5       0       0]   99.881% 	[class: 2]
 [       0       2    5954       0       0       0       0       2       0       0]   99.933% 	[class: 3]
 [       0       0       0    6129       0       0       0       1       0       1]   99.967% 	[class: 4]
 [       0       1       0       0    5837       0       0       1       0       3]   99.914% 	[class: 5]
 [       0       1       0       1       0    5419       0       0       0       0]   99.963% 	[class: 6]
 [       1       2       0       0       1       0    5914       0       0       0]   99.932% 	[class: 7]
 [       0       6       0       0       0       0       0    6258       0       1]   99.888% 	[class: 8]
 [       0       3       0       0       0       0       1       0    5847      

<trainer> time to test 1 sample = 0.079041504859924ms	
ConfusionMatrix:
[[     974       0       1       1       1       0       0       1       2       0]   99.388% 	[class: 1]
 [       0    1125       3       1       0       1       2       1       1       1]   99.119% 	[class: 2]
 [       4       1    1014       1       2       0       2       4       4       0]   98.256% 	[class: 3]
 [       0       0       4     992       0       3       0       2       1       8]   98.218% 	[class: 4]
 [       1       0       2       1     968       0       3       0       0       7]   98.574% 	[class: 5]
 [       2       0       0       8       2     874       2       0       1       3]   97.982% 	[class: 6]
 [       7       3       1       1       5       8     933       0       0       0]   97.390% 	[class: 7]
 [       2       6       8       1       9       0       0     985       5      12]   95.817% 	[class: 8]
 [       3       2       4       4       6       5       1       2     944      

 }
  _prediction : FloatTensor - size: 10
  _pred_idx : LongTensor - size: 1
  nclasses : 10
  _max : FloatTensor - size: 1
  _target : FloatTensor - empty
  unionvalids : FloatTensor - size: 10
  totalValid : 0.9795
}
<trainer> on training set:	
<trainer> online epoch # 17 [batchSize = 50]	


Progress: 10451 / 60000	


Progress: 36651 / 60000	


<trainer> time to learn 1 sample = 0.21464621623357ms	
ConfusionMatrix:
[[    5922       0       0       0       0       0       1       0       0       0]   99.983% 	[class: 1]
 [       0    6736       0       0       2       0       0       4       0       0]   99.911% 	[class: 2]
 [       0       2    5954       0       0       0       0       2       0       0]   99.933% 	[class: 3]
 [       0       0       0    6129       0       0       0       1       0       1]   99.967% 	[class: 4]
 [       0       1       0       0    5838       0       0       1       0       2]   99.932% 	[class: 5]
 [       0       1       0       1       0    5419       0       0       0       0]   99.963% 	[class: 6]
 [       1       0       0       0       1       0    5916       0       0       0]   99.966% 	[class: 7]
 [       0       6       0       0       0       0       0    6258       0       1]   99.888% 	[class: 8]
 [       0       2       0       0       0       0       0       0    5849      

Progress: 8851 / 10000	


<trainer> time to test 1 sample = 0.079308104515076ms	
ConfusionMatrix:
[[     974       0       1       1       1       0       0       1       2       0]   99.388% 	[class: 1]
 [       0    1126       2       1       0       1       2       1       1       1]   99.207% 	[class: 2]
 [       4       1    1014       1       2       0       2       4       4       0]   98.256% 	[class: 3]
 [       0       0       4     992       0       3       0       2       1       8]   98.218% 	[class: 4]
 [       1       0       2       1     968       0       3       0       0       7]   98.574% 	[class: 5]
 [       2       0       0       8       2     875       2       0       1       2]   98.094% 	[class: 6]
 [       7       3       1       1       4       8     934       0       0       0]   97.495% 	[class: 7]
 [       2       6       8       1       9       0       0     986       5      11]   95.914% 	[class: 8]
 [       3       2       4       4       6       4       2       2     944      

  valids : FloatTensor - size: 10
  mat : LongTensor - size: 10x10
  averageUnionValid : 0.96040686368942
  _targ_idx : LongTensor - empty
  averageValid : 0.97978656291962
  classes : 
    {
      1 : 1
      2 : 2
      3 : 3
      4 : 4
      5 : 5
      6 : 6
      7 : 7
      8 : 8
      9 : 9
      10 : 10
    }
  _prediction : FloatTensor - size: 10
  _pred_idx : LongTensor - size: 1
  nclasses : 10
  _max : FloatTensor - size: 1
  _target : FloatTensor - empty
  unionvalids : FloatTensor - size: 10
  totalValid : 0.9799
}
<trainer> on training set:	
<trainer> online epoch # 18 [batchSize = 50]	


Progress: 26051 / 60000	


Progress: 53001 / 60000	


<trainer> time to learn 1 sample = 0.22380093336105ms	


ConfusionMatrix:
[[    5922       0       0       0       0       0       1       0       0       0]   99.983% 	[class: 1]
 [       0    6737       0       0       2       0       0       3       0       0]   99.926% 	[class: 2]
 [       0       2    5955       0       0       0       0       1       0       0]   99.950% 	[class: 3]
 [       0       0       0    6130       0       0       0       0       0       1]   99.984% 	[class: 4]
 [       0       1       0       0    5839       0       0       1       0       1]   99.949% 	[class: 5]
 [       0       1       0       1       0    5419       0       0       0       0]   99.963% 	[class: 6]
 [       1       0       0       0       0       0    5917       0       0       0]   99.983% 	[class: 7]
 [       0       5       0       0       0       0       0    6260       0       0]   99.920% 	[class: 8]
 [       0       2       0       0       0       0       0       0    5849       0]   99.966% 	[class: 9]
 [       0       1       0   

<trainer> time to test 1 sample = 0.07979621887207ms	


ConfusionMatrix:
[[     974       0       1       1       1       0       0       1       2       0]   99.388% 	[class: 1]
 [       0    1126       2       1       0       1       2       1       1       1]   99.207% 	[class: 2]
 [       4       1    1013       1       2       0       2       4       5       0]   98.159% 	[class: 3]
 [       0       0       4     992       0       3       0       2       1       8]   98.218% 	[class: 4]
 [       1       0       2       1     968       0       3       0       0       7]   98.574% 	[class: 5]
 [       2       0       0       8       2     875       2       0       1       2]   98.094% 	[class: 6]
 [       6       3       0       1       4       9     935       0       0       0]   97.599% 	[class: 7]
 [       2       6       6       1       9       0       0     988       5      11]   96.109% 	[class: 8]
 [       3       2       4       4       6       4       2       2     944       3]   96.920% 	[class: 9]
 [       3       2       0   

Progress: 15601 / 60000	


Progress: 41601 / 60000	


<trainer> time to learn 1 sample = 0.22383470137914ms	
ConfusionMatrix:
[[    5923       0       0       0       0       0       0       0       0       0]   100.000% 	[class: 1]
 [       0    6737       0       0       2       0       0       3       0       0]   99.926% 	[class: 2]
 [       0       2    5956       0       0       0       0       0       0       0]   99.966% 	[class: 3]
 [       0       0       0    6130       0       0       0       0       0       1]   99.984% 	[class: 4]
 [       0       1       0       0    5839       0       0       1       0       1]   99.949% 	[class: 5]
 [       0       1       0       0       0    5420       0       0       0       0]   99.982% 	[class: 6]
 [       0       0       0       0       0       0    5918       0       0       0]   100.000% 	[class: 7]
 [       0       4       0       0       0       0       0    6261       0       0]   99.936% 	[class: 8]
 [       0       1       0       0       0       0       0       0    5850    

<trainer> time to test 1 sample = 0.077850413322449ms	
ConfusionMatrix:
[[     974       0       1       1       1       0       0       1       2       0]   99.388% 	[class: 1]
 [       0    1126       2       1       0       1       2       1       1       1]   99.207% 	[class: 2]
 [       4       1    1012       1       2       0       2       5       5       0]   98.062% 	[class: 3]
 [       0       0       4     992       0       3       0       3       1       7]   98.218% 	[class: 4]
 [       1       0       2       1     967       0       3       0       0       8]   98.473% 	[class: 5]
 [       2       0       0       8       2     874       2       0       2       2]   97.982% 	[class: 6]
 [       6       3       0       1       4       7     937       0       0       0]   97.808% 	[class: 7]
 [       2       5       6       1       9       0       0     991       5       9]   96.401% 	[class: 8]
 [       3       2       4       4       5       4       2       2     945      

{
  valids : FloatTensor - size: 10
  mat : LongTensor - size: 10x10
  averageUnionValid : 0.96136252880096
  _targ_idx : LongTensor - empty
  averageValid : 0.98028101921082
  classes : 
    {
      1 : 1
      2 : 2
      3 : 3
      4 : 4
      5 : 5
      6 : 6
      7 : 7
      8 : 8
      9 : 9
      10 : 10
    }
  _prediction : FloatTensor - size: 10
  _pred_idx : LongTensor - size: 1
  nclasses : 10
  _max : FloatTensor - size: 1
  _target : FloatTensor - empty
  unionvalids : FloatTensor - size: 10
  totalValid : 0.9804
}
<trainer> on training set:	
<trainer> online epoch # 20 [batchSize = 50]	


Progress: 4651 / 60000	


Progress: 30651 / 60000	


Progress: 56151 / 60000	


<trainer> time to learn 1 sample = 0.2189066807429ms	


ConfusionMatrix:
[[    5923       0       0       0       0       0       0       0       0       0]   100.000% 	[class: 1]
 [       0    6737       0       0       2       0       0       3       0       0]   99.926% 	[class: 2]
 [       0       1    5957       0       0       0       0       0       0       0]   99.983% 	[class: 3]
 [       0       0       0    6130       0       0       0       0       0       1]   99.984% 	[class: 4]
 [       0       0       0       0    5840       0       0       1       0       1]   99.966% 	[class: 5]
 [       0       0       0       0       0    5421       0       0       0       0]   100.000% 	[class: 6]
 [       0       0       0       0       0       0    5918       0       0       0]   100.000% 	[class: 7]
 [       0       4       0       0       0       0       0    6261       0       0]   99.936% 	[class: 8]
 [       0       1       0       0       0       0       0       0    5850       0]   99.983% 	[class: 9]
 [       0       0       0

<trainer> time to test 1 sample = 0.081539392471313ms	


ConfusionMatrix:
[[     973       0       1       1       1       0       1       1       2       0]   99.286% 	[class: 1]
 [       0    1127       2       1       0       1       2       1       1       0]   99.295% 	[class: 2]
 [       4       1    1012       1       2       0       2       5       5       0]   98.062% 	[class: 3]
 [       0       0       4     992       0       3       0       3       1       7]   98.218% 	[class: 4]
 [       1       0       2       1     967       0       3       0       0       8]   98.473% 	[class: 5]
 [       2       0       0       8       2     874       2       0       2       2]   97.982% 	[class: 6]
 [       6       3       0       1       4       7     937       0       0       0]   97.808% 	[class: 7]
 [       2       5       6       1       8       0       0     992       5       9]   96.498% 	[class: 8]
 [       3       2       4       4       5       4       1       2     946       3]   97.125% 	[class: 9]
 [       3       2       0   

In [11]:
function table2Tensor(table)
    tensor = torch.Tensor(#table)
    for i = 1, tensor:size()[1] do
        tensor[i] = table[i]
    end
    return tensor
end

In [12]:
Plot = require 'itorch.Plot'

trainValid = table2Tensor(trainHistory)
testValid = table2Tensor(testHistory)

epochCount = torch.Tensor(numIter)
i = 0

epochCount:apply(function()
    i = i + 1
    return i
end)


-- line plots
plot = Plot():line(epochCount, trainValid, 'green', 'Train')
plot:line(epochCount, testValid, 'blue', 'Test')
plot:legend(true)
plot:title('Train/Test TotalValid'):draw()