# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Loading-dataset-and-testing" data-toc-modified-id="Loading-dataset-and-testing-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Loading dataset and testing</a></div><div class="lev2 toc-item"><a href="#Recreating-the-original-sentence-for-test-purposes" data-toc-modified-id="Recreating-the-original-sentence-for-test-purposes-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Recreating the original sentence for test purposes</a></div><div class="lev2 toc-item"><a href="#OneHot" data-toc-modified-id="OneHot-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>OneHot</a></div><div class="lev1 toc-item"><a href="#Implementing-the-RNN-training-as-baseline" data-toc-modified-id="Implementing-the-RNN-training-as-baseline-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Implementing the RNN training as baseline</a></div><div class="lev2 toc-item"><a href="#Batch-Loader" data-toc-modified-id="Batch-Loader-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Batch Loader</a></div><div class="lev2 toc-item"><a href="#Next-Batch-Method" data-toc-modified-id="Next-Batch-Method-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Next Batch Method</a></div><div class="lev3 toc-item"><a href="#Test-for-batch-reconstruction" data-toc-modified-id="Test-for-batch-reconstruction-2.2.1"><span class="toc-item-num">2.2.1&nbsp;&nbsp;</span>Test for batch reconstruction</a></div><div class="lev2 toc-item"><a href="#Loading-the-LSTM" data-toc-modified-id="Loading-the-LSTM-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Loading the LSTM</a></div><div class="lev3 toc-item"><a href="#Drawing-the-graph" data-toc-modified-id="Drawing-the-graph-2.3.1"><span class="toc-item-num">2.3.1&nbsp;&nbsp;</span>Drawing the graph</a></div><div class="lev2 toc-item"><a href="#Initializing-LSTM" data-toc-modified-id="Initializing-LSTM-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>Initializing LSTM</a></div><div class="lev2 toc-item"><a href="#Training-LSTM" data-toc-modified-id="Training-LSTM-2.5"><span class="toc-item-num">2.5&nbsp;&nbsp;</span>Training LSTM</a></div><div class="lev3 toc-item"><a href="#Defining-the-cloning-list-utils" data-toc-modified-id="Defining-the-cloning-list-utils-2.5.1"><span class="toc-item-num">2.5.1&nbsp;&nbsp;</span>Defining the cloning list utils</a></div><div class="lev3 toc-item"><a href="#Defining-the-training-protocol" data-toc-modified-id="Defining-the-training-protocol-2.5.2"><span class="toc-item-num">2.5.2&nbsp;&nbsp;</span>Defining the training protocol</a></div><div class="lev2 toc-item"><a href="#Optimization-phase" data-toc-modified-id="Optimization-phase-2.6"><span class="toc-item-num">2.6&nbsp;&nbsp;</span>Optimization phase</a></div><div class="lev1 toc-item"><a href="#Interpreting-model-training-and-parameters" data-toc-modified-id="Interpreting-model-training-and-parameters-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Interpreting model training and parameters</a></div><div class="lev1 toc-item"><a href="#Sampling-from-trained-model" data-toc-modified-id="Sampling-from-trained-model-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Sampling from trained model</a></div><div class="lev2 toc-item"><a href="#Reconstructing-the-first-input" data-toc-modified-id="Reconstructing-the-first-input-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Reconstructing the first input</a></div><div class="lev3 toc-item"><a href="#Little-debugging" data-toc-modified-id="Little-debugging-4.1.1"><span class="toc-item-num">4.1.1&nbsp;&nbsp;</span>Little debugging</a></div><div class="lev1 toc-item"><a href="#Scrap" data-toc-modified-id="Scrap-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Scrap</a></div>

# Loading dataset and testing

In [1]:
require 'nn'
require 'nngraph'
require 'optim'

In [2]:
data_loader = require 'utils.data_loader';
QUICKLOAD = true

In [3]:
if QUICKLOAD then
    x = torch.load("/Users/david/Documents/MemoryNetwork/output_lua/sample.t7")
    y = torch.load("/Users/david/Documents/MemoryNetwork/output_lua/label.t7")
    voc = torch.load("/Users/david/Documents/MemoryNetwork/output_lua/vocab.t7")
    index = torch.load("/Users/david/Documents/MemoryNetwork/output_lua/vocab.t7_index")
else 
    input_file = "/Users/david/Documents/MemoryNetwork/preprocessing/output.txt"
    out_vocab_file = "/Users/david/Documents/MemoryNetwork/output_lua/vocab.t7"
    out_tensor_file = "/Users/david/Documents/MemoryNetwork/output_lua/data.t7"
    voc = torch.load("/Users/david/Documents/MemoryNetwork/output_lua/vocab.t7")
    index = torch.load("/Users/david/Documents/MemoryNetwork/output_lua/vocab.t7_index")
    x, y = data_loader.text_to_tensor(input_file,out_vocab_file,out_tensor_file)
end

## Recreating the original sentence for test purposes

## OneHot

In [4]:
debug.getregistry()["OneHot"] = nil
OneHot, parent = torch.class('OneHot', 'nn.Module')

function OneHot:__init(outputSize)
  parent.__init(self)
  self.outputSize = outputSize
  -- We'll construct one-hot encodings by using the index method to
  -- reshuffle the rows of an identity matrix. To avoid recreating
  -- it every iteration we'll cache it.
  self._eye = torch.eye(outputSize)
end

function OneHot:updateOutput(input)
  self.output:resize(input:size(1), self.outputSize):zero()
  if self._eye == nil then self._eye = torch.eye(self.outputSize) end
  self._eye = self._eye:float()
  local longInput = input:long()
  self.output:copy(self._eye:index(1, longInput))
  return self.output
end

# Implementing the RNN training as baseline

In [5]:
require 'nn';
require 'nngraph';
LSTM = require 'models.lstm';
--require 'utils.OneHot';
model_utils = require 'utils.model_utils';

In [6]:
RNN_SIZE = 10
NUM_LAYERS = 3
DROPOUT = 0
SEQ_LENGTH = 54
BATCH_SIZE = 1
VOCAB_SIZE = data_loader.count_table_elements(voc)+2
GRAD_CLIP = 5
LEARNING_RATE = 2e-3
LEARNING_RATE_DECAY = 0.97
LEARNING_RATE_DECAY_AFTER = 10
DECAY_RATE = 0.95
MAX_EPOCH = 50

## Batch Loader

In [7]:
BatchLoader = {}
BatchLoader.__index = BatchLoader

function BatchLoader.create(x,y,batch_size,seq_length)
    local self = {}
    setmetatable(self,BatchLoader)
    -- self.batches is a table of tensor
    print('reshaping tensor...')
    self.batch_size = batch_size
    self.seq_length = seq_length
    self.ix = 1
    
    self.x_batches = x:view(BATCH_SIZE,-1):split(SEQ_LENGTH,2) -- #rows = #batches
    self.nbatches = #self.x_batches
    
    self.y_batches = y:view(BATCH_SIZE,-1):split(SEQ_LENGTH,2) -- #rows = #batches
    self.y_nbatches = #self.y_batches
    print(#self.x_batches)
    print(#self.y_batches)
    assert(#self.x_batches == #self.y_batches)
    
    -- lets try to be helpful here
    if self.nbatches < 50 then
        print('WARNING: less than 50 batches in the data in total? Looks like very small dataset. You probably want to use smaller batch_size and/or seq_length.')
    end
    
    collectgarbage()
    return self 
end

## Next Batch Method

In [8]:
function BatchLoader:next_batch()
    if self.nbatches < self.ix then
        self.ix = 1 -- cycling through the batch
    end
    local x = self.x_batches[self.ix]
    local y = self.y_batches[self.ix]
    self.ix = self.ix + 1
    return x,y 
end

In [9]:
B = BatchLoader.create(x,y,BATCH_SIZE,SEQ_LENGTH)

reshaping tensor...	
70	
70	


In [10]:
B.x_batches[1]

Columns 1 to 20
  4  26  25   2  37   9  16  40  38  37   9  13  39  37  28  25  37  33  13  29

Columns 21 to 40
 37  28  25  37  12  13  27  16  25  37   9   4  40  23  37   8  38  37  37  11

Columns 41 to 54
 10  24  15  35  14  41  26  37  37  34   3   1   1   1
[torch.ByteTensor of size 1x54]



### Test for batch reconstruction

for i=1,B.x_batches[1]:size(2) do
    local v = B.x_batches[1][{1,i}]
    print(index[v])
end

## Loading the LSTM

In [11]:
LSTM = require 'models.lstm';
protos = {}
protos.rnn = LSTM.lstm(VOCAB_SIZE, RNN_SIZE, NUM_LAYERS, DROPOUT)
protos.criterion = nn.ClassNLLCriterion()

### Drawing the graph

In [12]:
-- graph.dot(protos.rnn.fg, 'rnn')

## Initializing LSTM

In [14]:
function prepro(x,y)
    local x = x:transpose(1,2):contiguous()
    local y = y:transpose(1,2):contiguous()
    return x,y
end

In [13]:
init_state = {}
for L=1,NUM_LAYERS do
    local h_init = torch.zeros(BATCH_SIZE, RNN_SIZE)
    table.insert(init_state,h_init:clone())
    table.insert(init_state,h_init:clone()) -- because LSTM
end

In [15]:
-- put the above things into one flattened parameters tensor
params, grad_params = model_utils.combine_all_parameters(protos.rnn)
print("initialized parameters")
-- initialization
params:uniform(-0.08,0.08) -- small uniform numbers
print("parameters uniformed")
-- initialize the LSTM forget gates with slightly higher biases to encorage remembering in the beginning
for layer_idx = 1, NUM_LAYERS do
    for _, node in ipairs(protos.rnn.forwardnodes) do
        if node.data.annotations.name  == "i2h".. layer_idx then
            print('setting forget gate biases to 1 in LSTM layer '.. layer_idx) 
            -- the gates are in order i,f,o,g so f is the 2nd block of weights
            node.data.module.bias[{{RNN_SIZE+1,2*RNN_SIZE}}]:fill(1.0)
        end
    end
end

initialized parameters	
parameters uniformed	


In [16]:
-- make a bunch of clones after flattening, as that reallocates memory
clones = {}
for name,proto in pairs(protos) do
    print('cloning ' .. name)
    clones[name] = model_utils.clone_many_times(proto,SEQ_LENGTH, not proto.parameters)
end

cloning rnn	


cloning criterion	


## Training LSTM

### Defining the cloning list utils

In [17]:
function clone_list(tensor_list, zeto_too)
    -- utility function. todo : move away to some utils file?
    -- takes a list of tensors and returns a list of cloned tensors
    local out = {}
    for k,v in pairs(tensor_list) do
        out[k] = v:clone()
        if zero_too then out[k]:zero() end
    end
    return out
end

### Defining the training protocol

In [18]:
local init_state_global = clone_list(init_state)
function feval(opti_params)
    --[[if opti_params ~= params then 
        params:copy(opti_params)
    end ]]--
    grad_params:zero()
    ------------------ get minibatch -------------------
    local x,y = B:next_batch()
    local x,y = prepro(x,y)
    ------------------ forward pass -------------------
    local rnn_state = {[0] = init_state_global}
    local predictions = {} --- softmax outputs
    local loss = 0
    for t = 1,SEQ_LENGTH do
        -- print("iterations : "..t)
        clones.rnn[t]:training()  -- make sure we are in correct mode (this is cheap, sets flag)
        -- print(unpack(rnn_state[t-1]))
        local lst = clones.rnn[t]:forward{x[t],unpack(rnn_state[t-1])}
        rnn_state[t] = {}
        for i=1,#init_state do table.insert(rnn_state[t], lst[i]) end -- extract the state, without output
        predictions[t] = lst[#lst] -- last element is the prediction
        loss = loss + clones.criterion[t]:forward(predictions[t],y[t])
    end
    loss = loss / SEQ_LENGTH
    ------------------ backward pass -------------------
    -- initialize radient at time t to be zeros (there's no influence from future)
    local drnn_state = {[SEQ_LENGTH] = clone_list(init_state,true)} -- true also zeros the clones
    for t = SEQ_LENGTH,1,-1 do
        -- backprop through loss, and softmax / linear
        local doutput_t = clones.criterion[t]:backward(predictions[t],y[t])
        table.insert(drnn_state[t],doutput_t)
        local dlst = clones.rnn[t]:backward({x[t], unpack(rnn_state[t-1])}, drnn_state[t])
        drnn_state[t-1] = {}
        for k,v in pairs(dlst) do
            if k> 1 then --k == 1 is gradient on x, which we don't need
                -- note we do k-1 because first item is dembeddings, and then follow the
                -- derivatives of the state, starting at index 2. 
                drnn_state[t-1][k-1] = v
            end
        end
    end
    ------------------ backward pass -------------------
    -- transfer final state to initial state (BPTT)
    init_state_global = rnn_state[#rnn_state] -- does this need to be a clone ?
    -- grad_params:div(SEQ_LENGTH)
    -- clip gradient element wise
    grad_params:clamp(-GRAD_CLIP,GRAD_CLIP)
    return loss, grad_params
end

## Optimization phase

In [19]:
-- feval()

In [156]:
-- start optimization here
train_losses = {}
val_losses = {}
local optim_state = {learning_rate = LEARNING_RATE, alpha= DECAY_RATE}
local iterations = MAX_EPOCH * B.nbatches
local iterations_per_epoch = MAX_EPOCH*B.nbatches
local loss0 = nil

-- Optimization starts here
for i =1, iterations do
    local epoch = i / B.nbatches
    local timer = torch.Timer()
    local _, loss = optim.rmsprop(feval,params,optim_state)
    local train_loss = loss[1] -- the loss is inside a list, pop it
    train_losses[i] = train_loss
    print(train_losses[i])
    -- exponential learning rate decay
    if i % B.nbatches == 0 and LEARNING_RATE_DECAY < 1 then
        if epoch >= LEARNING_RATE_DECAY_AFTER then
            local decay_factor = LEARNING_RATE_DECAY
            optim_state.learning_rate = optim_state.learning_rate * decay_factor -- decay it
            print('decayed learning rate by a factor ' .. decay_factor .. ' to ' .. optim_state.learning_rate)
        end
    end
    
    -- every now and then or on last iteration
    if i % 10 == 0 then collectgarbage() end
    
    if loss[1] ~= loss[1] then
        print('loss is NaN.  This usually indicates a bug.  Please check the issues page for existing issues, or create a new issue, if none exist.  Ideally, please state: your operating system, 32-bit/64-bit, your blas version, cpu/cuda/cl?')
        break -- halt
    end
    
    if loss0 == nil then loss0 = loss[1] end
    if loss[1] > loss0 * 3 then
        print('loss is exploding, aborting.')
        break -- halt
    end
end

0.13258836967496	


0.084040673723601	


0.16920585759234	


0.086469799143723	


0.068631238992106	


0.072959511411296	


0.066118999836778	


0.062225640826361	


0.077538619112184	


0.17506615396923	


0.1200695069391	


0.13706953691491	


0.092857088087954	


0.11016661367684	


0.097459229197444	


0.088354637738802	


0.11769201342096	


0.083690525129958	


0.092216855321565	


0.084232324048539	


0.086993444270871	


0.084065238758627	


0.083620958196183	


0.07339319393238	


0.075334600741092	


0.072658500873827	


0.11335124121385	


0.12895920742	


0.088955799546304	


0.089072706857331	


0.082326204849858	


0.098717605710195	


0.12769596997179	


0.12312774822947	


0.086958913820905	


0.075526531098258	


0.11929728016562	


0.11341677176335	


0.082946431167885	


0.1089299874797	


0.087948630876958	


0.10325471070235	


0.071907980062832	


0.075294214395047	


0.06506252084982	


0.072330359674822	


0.15190203476029	


0.074414020721545	


0.11074285163162	


0.080906949439374	


0.13304441534541	


0.11530998334529	


0.089880541374038	


0.084205853572048	


0.072022238822058	


0.067735223784802	


0.11160747855919	


0.099946826550448	


0.073248071732723	


0.12569678737938	


0.090177733954071	


0.085290977395189	


0.085265092321248	


0.084574538484222	


0.10264609459002	


0.091612697023837	


0.10337923513847	


0.078742183933969	


0.074205637993926	


0.070761172744517	


0.12915296078175	


0.1017495910754	


0.10072627740384	


0.10323141673374	


0.085383004678035	


0.090517670477118	


0.084919075152314	


0.0795975767504	


0.093112869894822	


0.11170391918272	


0.094082803493578	


0.11799374177129	


0.081630338862937	


0.10015936687711	


0.10291341254549	


0.093763040147185	


0.10643330658313	


0.078325085092431	


0.095338517384488	


0.088079805182718	


0.081642009371554	


0.087210494515267	


0.086609427981382	


0.076675721811993	


0.078315638123445	


0.075384672793187	


0.10800199214477	


0.1215995023903	


0.08513284620631	


0.09083968912658	


0.084551836211603	


0.10175169670381	


0.12139611319005	


0.11969683621883	


0.088460143766199	


0.076703043374719	


0.11432988980728	


0.11186054656508	


0.083494369978288	


0.10776115644306	


0.088651812987237	


0.10482567026816	


0.072503574063353	


0.076493117357602	


0.06558134473483	


0.072991076708819	


0.15362834331237	


0.075092228115693	


0.10886920019856	


0.081702053787525	


0.12971288460971	


0.11592358381702	


0.090243949301918	


0.084716767767916	


0.072066704827044	


0.067861509163367	


0.11019029937943	


0.098404373592154	


0.073072106441647	


0.12381471147481	


0.088885079259023	


0.084475355300937	


0.084649984747082	


0.083311800276753	


0.1002775626998	


0.091361363657311	


0.103538518865	


0.078010140057875	


0.073469488697996	


0.070020450051425	


0.12921752578259	


0.10173949629445	


0.10032345478771	


0.1035796438983	


0.085296763406163	


0.091135165470752	


0.085694944858344	


0.080330807150803	


0.093962855020357	


0.10963628794142	


0.092462268476395	


0.11662277770268	


0.081331982140035	


0.099132660151837	


0.10218229752236	


0.093212755389817	


0.10427305045385	


0.078081352604399	


0.094848473156929	


0.087753568681911	


0.080812957682203	


0.086809775675396	


0.086297545169039	


0.076878981432824	


0.07835251690832	


0.075731885884194	


0.10610158119238	


0.11927710174222	


0.084611986979547	


0.090548675410953	


0.084361413829679	


0.10214196488712	


0.11903744044193	


0.1190789413968	


0.08788396352885	


0.07605236035212	


0.11301492647185	


0.11221886090719	


0.082662603393235	


0.10766699189195	


0.087878958278573	


0.10477470003789	


0.072099785242509	


0.076713131807452	


0.065818965922618	


0.073890670542808	


0.15022560258558	


0.075031457330349	


0.10748984521433	


0.081893841132591	


0.12849735850458	


0.11644833012588	


0.089925098732457	


0.084655658390198	


0.071736424360167	


0.067705014177088	


0.10986588413511	


0.098629923526346	


0.072287097753505	


0.12338878746097	


0.088707367687551	


0.084376093921988	


0.083594291925672	


0.083075499784545	


0.098998943775901	


0.090784970305431	


0.10370658059904	


0.078058346549354	


0.073361854169505	


0.069812087049148	


0.12894053739584	


0.10136012932173	


0.099981284888423	


0.10351752619619	


0.084878139608956	


0.09139784719694	


0.086186519677353	


0.080794186397814	


0.094427958880282	


0.10778111582537	


0.091516685206112	


0.11622172970034	


0.081553372313765	


0.098659154178794	


0.1012996974228	


0.092411967921313	


0.10314383665814	


0.078272109028178	


0.094162319778219	


0.087323475140839	


0.080483797617219	


0.086370214106734	


0.085842547578644	


0.076964120130141	


0.078385985112557	


0.075935951230086	


0.10437546783315	


0.11787057960467	


0.084545110590458	


0.089998550809561	


0.084084091207191	


0.10227934460476	


0.11781153757321	


0.11875232618007	


0.0873277231592	


0.075400689146848	


0.11243613149824	


0.11233478154018	


0.081825247651781	


0.10766965223223	


0.087018965524246	


0.10441857972653	


0.071651005086248	


0.076814340752004	


0.066025765226919	


0.07451072325836	


0.15287981401172	


0.074656235511511	


0.10584204262697	


0.082029810000827	


0.12790717194633	


0.11650754207488	


0.08949857916159	


0.084562415985362	


0.071614852217089	


0.067725384848319	


0.10930402854029	


0.098866668406301	


0.071535465387506	


0.12314138549294	


0.08864195345576	


0.084387105433048	


0.082494939400619	


0.082934653479789	


0.098197787119691	


0.090109705928054	


0.10363141412928	


0.078450011654108	


0.073432251823754	


0.069652493590202	


0.12862513247691	


0.10077919279853	


0.099482018600935	


0.10327502807732	


0.084353816137781	


0.091702236484728	


0.086694627486414	


0.081290062702447	


0.094552193051295	


0.10676987389668	


0.090753700749961	


0.11575263528635	


0.081920130974053	


0.098202757728952	


0.10068468965141	


0.091614482464148	


0.10234788304054	


0.07862449438878	


0.093688915719009	


0.087056255505132	




0.080146639863563	


0.086102492888398	


0.085404443663538	


0.077185783979534	


0.078543946554333	


0.07604438649468	


0.10293076729599	


0.11655127882615	


0.084402914138252	


0.089583054163341	


0.083882241581065	


0.10237486017932	


0.11680524737894	


0.11871079578936	


0.086853832791786	


0.074698733635367	


0.11204524428513	


0.11231885614571	


0.080913934236713	


0.10747728586876	


0.086011940107581	


0.10382006784541	


0.071049268592949	


0.076872531342762	


0.066016751179594	


0.074893640249186	


0.14753638169368	


0.074650030184928	


0.10551026320918	


0.082021063390742	


0.12759244226986	


0.11729481339256	


0.089404742134833	


0.084190653425679	


0.071153949835353	


0.0673029908826	


0.10930148114895	


0.098157081863734	


0.071057793946424	


0.12274155048929	


0.088630894553982	


0.084588873417668	


0.081710929674662	


0.082866352936072	


0.097725040122478	


0.089935389722502	


0.10380299730444	


0.078672711481009	


0.073180965105786	


0.069579427195584	


0.12825853266253	


0.10023826365212	


0.099092196937368	


0.10340478028013	


0.083882437527683	


0.091689511424624	


0.086733089039844	


0.081461201981956	


0.09476949100887	


0.10522254355762	


0.090228879435843	


0.1156153166552	


0.082254185635543	


0.098083767068132	


0.099990425006286	


0.090871109086373	


0.10172931617288	


0.078867465182621	


0.093203008230218	


0.08669996900658	


0.079974050033069	


0.085726181411488	


0.084984587402596	


0.077284832364616	


0.078516953926251	


0.076175986907975	


0.10167530019489	


0.11573998103941	


0.084386085046374	


0.089156495590858	


0.083622005202524	


0.10232117576199	


0.1162868665252	


0.11858607050717	


0.086385767939621	


0.074153898351482	




0.11202509388336	


0.11223330501691	


0.080204099832073	


0.10728012914953	


0.085291733110429	


0.10342266041181	


0.070836492804962	


0.076941767936974	


0.066216628560156	


0.075606639778801	


0.14445307560819	


0.07453573171543	


0.10485620930535	


0.08198565007657	


0.12743042637712	


0.11766078344476	


0.089145332028436	


0.083832609865722	


0.070829602054412	


0.067081624307832	


0.10932037380741	


0.098085222975448	


0.070550802007726	


0.12268316456968	


0.088647355696159	


0.084753674559378	


0.080953777211417	


0.082836239764908	


0.097445815444937	


0.089574327267204	


0.10390602341551	


0.078954372120034	


0.073083801004392	


0.069521858931476	


0.12786070078696	


0.099682599582273	


0.098645661116907	


0.1033604988277	


0.083432957606469	


0.091700149737976	


0.086794284363242	


0.081667484782052	


0.094890152036487	


0.10395086289836	


0.089783242931725	


0.11547084785574	


0.082537619116836	


0.098000942489717	


0.099418154968998	


0.090208254161403	


0.10128683696608	


0.079109425642781	


0.092792068236799	


0.08644680221175	


0.07980426078893	


0.085444982704432	


0.08462851763702	


0.077394111007777	


0.07852009942471	


0.076270144329104	


0.10048961413094	


0.11498568030889	


0.084310418463289	


0.088804001485276	


0.08346129850063	


0.10233876351352	


0.1157879783939	


0.11849263147213	


0.086018435813054	


0.073675986923634	


0.11196656485148	


0.11206454292539	


0.079525720884114	


0.1070919419942	


0.084600154547726	


0.10297946840518	


0.070593337732438	


0.076973692722307	


0.066484440178202	


0.076311232924535	


0.14171262813817	


0.07443264362133	


0.10411118561668	


0.082007176632768	


0.12738668183947	


0.11769144810639	


0.088903036863593	


0.083552909615061	


0.070677021345942	


0.066982509917001	


0.10905648359563	


0.097949693098869	


0.070038823126592	


0.12280689579796	


0.088679597739538	


0.08487444615523	


0.080197186545737	


0.082817131564721	


0.097249778375451	


0.089196992010197	


0.10391476224946	


0.079326964699021	


0.073091289853996	


0.069447517325647	


0.12749508572178	


0.099081620786208	


0.098273441527857	


0.10330300689142	


0.082967940125265	


0.09175730230303	


0.086943962510449	


0.081897197120879	


0.094918710809263	


0.10284855608424	


0.089360655163365	


0.11547536068937	


0.082857224540982	


0.097928915239326	


0.098989693708133	


0.089577067342848	


0.10099972632869	


0.079395276707094	


0.092441227159218	


0.086264329830902	


0.07966457818824	


0.085257340492537	


0.084287536164717	


0.077478629826647	


0.078524786360053	


0.076277630956306	


0.099461502817865	


0.11432740361217	


0.084198349625156	


0.088527995566715	


0.083354145360453	


0.10232700241044	


0.11531363884453	


0.11848083365975	


0.085739625852012	


0.073236656443483	


0.11190233375882	


0.11186590572224	


0.078889390346678	


0.10688453861283	


0.083896051548077	


0.10246235570913	


0.070154133151757	


0.076968911555519	


0.066664935278433	


0.076648435290353	


0.14303019936124	


0.073239259240332	


0.10286340374365	


0.082209283288042	


0.12852398132086	


0.11681662031501	


0.088792214036514	


0.083618206669441	


0.070723743419397	


0.067143828899898	


0.1081620152806	


0.09750621114625	


0.069373496881657	


0.12330741370763	


0.088556809275596	


0.084841841327315	


0.079499298550282	


0.082680050218019	


0.097002371226296	


0.088961575083506	


0.10388877171352	


0.079731735393434	


0.073275001763537	


0.069322377752097	


0.1270934199727	


0.098384786656322	


0.097851702115903	


0.10327464903049	


0.082450592051499	


0.091948109184078	


0.087125065761286	


0.082232160468787	


0.094713223490137	


0.10238195226364	


0.08878138575285	


0.1157235528328	


0.083296370304488	


0.097774369979303	


0.098695993289231	


0.088983492799597	


0.10068847961914	


0.079799687074225	


0.092138030425732	


0.086231524527262	


0.07942110016427	


0.085122923264871	


0.083933626743309	


0.077671649615908	


0.078618693314797	


0.076033107225229	


0.098431682847569	


0.11379772405604	


0.083996437410481	


0.088413122876405	


0.083352628112528	


0.10234204804426	


0.11471914197002	


0.11858355215422	


0.085493119818071	


0.072806704896599	


0.11188805110057	


0.11167653025353	


0.078263215990145	


0.10661664813378	


0.083083167729918	


0.10186627832434	


0.069304179794475	


0.076841958723477	


0.066417716433521	


0.075942966873382	


0.14571096665964	


0.070936682956874	


0.10283413563837	


0.082101315803018	


0.13054960471601	


0.11648810550008	


0.08881240162922	


0.083588470998521	


0.070793391123506	


0.067320410474464	


0.1075650862735	


0.096541458761907	


0.068818601576234	


0.12381323869501	


0.08807305009447	


0.084646992538561	


0.079232611511126	


0.082244573116127	


0.096766435922088	


0.089038725177933	


0.10387997831436	


0.079952634624766	


0.072980952026722	


0.069103922259205	


0.12684813375873	


0.09797587303786	


0.097169777285501	


0.10313104692055	


0.082173709925668	


0.092204267286772	


0.087371344504471	


0.082781512153192	


0.094833667365028	


0.10092029323636	


0.088160551170517	


0.11555047859825	


0.084360608185111	




0.097600813186237	


0.09858705666014	


0.088616620621247	


0.10020490027893	


0.080081268018142	


0.092088058070563	


0.086230101480941	


0.079121259132477	


0.085095743483269	


0.083839087593696	


0.077941128377352	


0.078774422565695	


0.075784950106439	


0.097777194007332	


0.11317700368397	


0.083665809548774	


0.088478887497392	


0.083400228188497	


0.10229387188441	


0.11408885839337	


0.11834222527163	


0.085165422727628	


0.07261500713572	


0.1118353196628	


0.11167961848935	


0.077915706623799	


0.10622151359952	


0.082606726308367	


0.10144726412381	


0.069079155363391	


0.076842751490484	


0.066600562311369	


0.076344606270329	


0.13937119599881	


0.070946439480686	


0.10286674929805	


0.081765432561718	


0.13147612382223	


0.11681021868205	


0.089000847238949	


0.083444362432331	


0.07084440658816	


0.067258353024647	


0.10735280330221	


0.095780008967327	


0.069023653999772	


0.12279128621446	


0.08815026045055	


0.084679104141427	


0.07875256528682	


0.082271782504992	


0.097040157799399	


0.088756898470277	


0.10387856384933	


0.080404446033195	


0.0727526655689	


0.069057036815023	
decayed learning rate by a factor 0.97 to 0.00194	


0.12660988954719	


0.09745532287353	


0.097021949071271	


0.10284087001599	


0.081763577900964	


0.09206337444514	


0.08749765676051	


0.082840394098752	


0.094778610896023	


0.099671850669041	


0.088041070589551	


0.115279924377	


0.087960628176247	


0.097184791446085	


0.098912138284588	


0.088487284198315	


0.099916206166689	


0.080030234905249	


0.092453883591464	


0.086663060035144	


0.07883007363614	


0.085595714692472	


0.083966009972286	


0.078279613016488	


0.079372700735605	


0.07598755323354	


0.096970099424902	


0.11224010307689	


0.083343984624288	


0.088450244677826	


0.08340095101361	


0.10222013518709	


0.11344461895931	


0.11785411832079	


0.084735364180043	


0.072698342683952	


0.11114289971049	


0.1120117188075	


0.077764380407426	


0.10602886113951	


0.08242822037636	


0.10102829054171	


0.069175621462999	


0.076937227621023	


0.066920240192132	


0.076757279555128	


0.13761655434321	


0.071776806957918	


0.10241707094483	


0.081611785493052	


0.12944796048421	


0.1171531536507	


0.088111795031462	


0.082933674456252	


0.070657963069893	


0.067043835288543	


0.10763545647346	


0.096954912377868	


0.06845990922717	


0.12332760564669	


0.088304145359682	


0.08481444550609	


0.078141642966509	


0.082395262306981	


0.097437611220057	


0.08812556603757	


0.10377112818856	


0.080965853608103	


0.072926783214201	


0.069122328692502	
decayed learning rate by a factor 0.97 to 0.0018818	


0.12624179109471	


0.096871743439287	


0.096939624247453	


0.10217383581108	


0.081312259228764	


0.091818875500703	


0.087505486958751	


0.082800581312278	


0.094514643817696	


0.098287455227784	


0.088078350932413	


0.11526609223352	


0.090150706722188	


0.097074200995934	


0.098922109877724	


0.087951151521289	


0.099952446195658	


0.080482728836954	


0.092607776569299	


0.086740232280121	


0.078607584357389	


0.085960608921732	


0.083826981463583	


0.078242065559469	


0.079555633317689	


0.075333367163906	


0.097468036804529	


0.11199617473597	


0.082887257379933	


0.088574334153341	


0.083269529267202	


0.10205594072051	


0.11280313244636	


0.1174850747039	


0.084276592619754	


0.072684351495857	


0.11008837433015	


0.1128971814486	


0.077625344784646	


0.10556957039269	


0.081866915487484	


0.10050539150468	


0.06856471723048	


0.076962771984646	


0.066899245663546	


0.076499965024196	


0.13796723903516	


0.070837440131859	


0.10220454006132	


0.081482328094964	


0.1302109609305	


0.11680239166559	


0.087724344300925	


0.082947338551054	


0.071066976144546	


0.067483683240576	


0.1063338532082	


0.097186088679817	


0.068045148372101	


0.12361455468258	


0.087982998925248	


0.08480238429673	


0.077920405808395	


0.082108816339389	


0.097699390544529	


0.087647742750658	


0.10371427775815	


0.081459216094594	


0.073134948284378	


0.069032361909273	
decayed learning rate by a factor 0.97 to 0.001825346	


0.12601889793738	


0.096256401058087	


0.096658664990824	


0.1008224241421	


0.080893803462504	


0.091726999249077	


0.087606811463144	


0.082924124038473	


0.094315528129518	


0.096664157304199	


0.088444424530037	


0.11511811158572	


0.092818478297465	


0.096942707106899	


0.099054937518866	


0.087384221379987	


0.10013616245096	


0.081298146311003	


0.092944497531803	


0.086734155730476	


0.078258822377957	


0.086329325349591	


0.083759627436005	


0.078220443010305	


0.079508492353643	


0.074118812231034	


0.098350856368303	


0.11212500833718	


0.082233987628021	


0.089048459323095	


0.083218027269731	


0.10193109083708	


0.11220095353895	


0.11700889468716	


0.084200435971618	


0.072573248668762	


0.10889104200128	


0.11306421847062	


0.077648608783058	


0.10453702435833	


0.080827702021127	


0.099896554542651	


0.067434910253676	


0.07682550813358	


0.066588078502341	


0.075851860799055	


0.1395848941197	


0.068758611622041	


0.10214678556722	


0.081264402132687	


0.13237607473499	


0.11665652729763	


0.087860240232109	


0.083078236657057	


0.071741175231411	


0.068129930550425	


0.10471943973407	


0.095725692855659	


0.067565376107516	


0.12439527652078	


0.08715749392299	


0.084801463629909	


0.078266877928064	


0.081458755117601	


0.097956210158321	


0.087487185153396	


0.10389181329267	


0.081964539309883	


0.072810894732377	


0.068814454372859	
decayed learning rate by a factor 0.97 to 0.00177058562	


0.12590213506086	


0.095486863520136	


0.096178846745263	


0.099670775439561	


0.08034280064686	


0.091557399841931	


0.087607746267388	


0.082820966671833	


0.093956859067964	


0.09515490079846	


0.08830962105109	


0.11420472998398	


0.096340911975723	


0.096918409147887	


0.099311386414765	


0.086666214088041	


0.10087866145663	


0.08240575652503	


0.093513855358144	


0.086665590424347	


0.077667516255365	


0.086647462037114	


0.083885323448741	


0.078368698140167	


0.079578898744734	


0.073222636778277	


0.098516472430178	


0.11106785669219	


0.081460273429161	


0.089658594713429	


0.082860218785407	


0.10146401419435	


0.11127806123256	


0.11642851173551	


0.083313018305285	


0.071934331485655	


0.1082419242871	


0.11531124558663	


0.077762076917695	


0.10333789348394	


0.079668104124376	


0.099019182476993	


0.065922953394251	


0.07616315754062	


0.065315211046808	


0.073438020870055	


0.15228317253846	


0.067477707940952	


0.10182952060915	


0.081362965492267	


0.13401393656338	


0.11711322745271	


0.08803867859988	


0.082856611319494	


0.07224188810668	


0.068514285446204	


0.10335656937442	


0.093918331917213	


0.066936377947001	


0.12565395863032	


0.085787173362998	


0.085265238083453	


0.079293256099185	


0.080472294494446	


0.098125561415878	


0.087054494538408	


0.10465428194555	


0.082907372156661	


0.072149129556854	


0.06848582890319	
decayed learning rate by a factor 0.97 to 0.0017174680514	


0.12548124896638	


0.094433248993904	


0.095453474258283	


0.098937230907712	


0.079300074303958	


0.090976656405668	


0.087191823100813	


0.081860420235288	


0.092677170261721	


0.093434833654795	


0.088653985028025	


0.11208707747508	


0.098672086707239	


0.097253604917545	


0.09939200898478	


0.085956419953842	


0.10241028265724	


0.083369545455152	


0.094453806774241	


0.0863125725197	


0.076541454868909	


0.086485721139351	


0.084107312037702	


0.078379087279531	


0.079034559318282	


0.072798689653865	


0.099001959026719	


0.10854603094861	


0.080168227407896	


0.09100522183687	


0.081842077144575	


0.10004027667672	


0.10960350793873	


0.11588100497602	


0.081005958674962	


0.070658559321991	


0.10833157700672	


0.12013009774993	


0.078198161412473	


0.10135897592819	


0.078215443376486	


0.097242116445953	


0.063105652153952	


0.073181916610071	


0.057530354236861	


0.063515401821865	


0.23139780004658	


0.061840313601	


0.10855358222217	


0.08101969912077	


0.14278240964785	


0.11843679906873	


0.088355016280969	


0.082461067525516	


0.072633045595294	


0.06856790798651	


0.10256718069659	


0.091684780292488	


0.065324546136023	


0.12905143957628	


0.082165575936212	


0.086201498313299	


0.081643760732085	


0.07753774027836	


0.097977249834572	


0.087480567417296	


0.10517585383035	


0.084563066393555	


0.069369297201663	


0.067033057187803	


decayed learning rate by a factor 0.97 to 0.001665944009858	


0.12617461698853	


0.093367124525294	


0.093687525508312	


0.099112009043841	


0.078121247512864	


0.091795526642724	


0.086518458280336	


0.081796825545271	


0.090601277079644	


0.09063703178793	


0.092739864302954	


0.10938876219922	


0.098717120630703	


0.097422640713751	


0.099809923995485	


0.08512569814747	


0.10210314329752	


0.085196633552936	


0.096707008447827	


0.086410023398431	


0.073995189180242	


0.086353235160677	


0.083158377244152	


0.078397423714548	


0.077456892313852	


0.070248751224103	


0.10104023413854	


0.10471095627139	


0.077421575849953	


0.094281201126654	


0.080581299557281	


0.097905981728937	


0.10732393437996	


0.12131754842307	


0.079852496714594	


0.068971720167118	


0.10744692712938	


0.12304164683592	


0.077001168871997	


0.097954158228686	


0.076197018484585	


0.094072685005414	


0.059836374055051	


0.067245622190119	


0.051698087140256	


0.062337321756348	


0.2286631406231	


0.063351043857391	


0.10928027125384	


0.083671593997266	


0.14071639124251	


0.11697127885907	


0.091400300018089	


0.084236099545066	


0.072944425057806	


0.067959552884941	


0.10497252730698	


0.091245671921088	


0.06689564187335	


0.12599989981733	


0.079398692360583	


0.084541636873089	


0.08402707361958	


0.075118032618333	


0.096082016435972	


0.090008292301081	


0.10375877586224	


0.083323016427762	


0.067374982745872	


0.065742653940096	
decayed learning rate by a factor 0.97 to 0.0016159656895623	


0.12797860883849	


0.094920830665877	


0.092611264447891	


0.10237042473889	


0.078829505351866	


0.093160117895396	


0.086895876874751	


0.08217337392551	


0.089675218156666	


0.091468828665088	


0.091974954890113	


0.10901776002175	


0.095787881138999	


0.097115265973529	


0.099931887893901	


0.085116162406416	


0.099892326753924	


0.085451897662169	


0.096856143708809	


0.086082262826811	


0.072890435274472	


0.085485904857736	


0.081691051309627	


0.077394473816498	


0.075089072931388	


0.068636945431751	


0.10227960784276	


0.10406424172988	


0.075573373771229	


0.096743427389825	


0.080011593644841	


0.096639975199553	


0.10670394691701	


0.12873633692542	


0.079942767726614	


0.069091955057133	


0.10812415542148	


0.12263920146358	


0.07562693461432	


0.095832312464444	


0.07594532883791	


0.093429781678996	


0.06007095939125	


0.065463633947793	


0.051694489852427	


0.063297060142391	


0.21991323689216	


0.064325431755945	


0.10976801158177	


0.0853963247607	


0.13939484766161	


0.11786480643711	


0.093192016876539	


0.084106408338558	


0.071913916032055	


0.066414372813796	


0.1085926041862	


0.089980687900479	


0.067091148188345	


0.12505713789331	


0.078988248401888	


0.08426207406178	


0.08320378913732	


0.073596870038161	


0.095017405563161	


0.091645752578466	


0.10306408597344	


0.083211617339008	


0.066273676619268	


0.064757377773722	
decayed learning rate by a factor 0.97 to 0.0015674867188754	


0.12914444073421	


0.095344891204733	


0.091480229131337	


0.10472164872343	


0.078722521375718	


0.094340736509803	


0.087250817490426	


0.082980627325441	


0.089187653185795	


0.094108153048501	


0.087757071060037	


0.11170634876313	


0.092328910653093	


0.09627558196567	


0.10055149770824	


0.085424399340306	


0.098011286580426	


0.084923022935402	


0.096149434132174	


0.086139898163726	


0.072564302337814	


0.085374164871803	


0.081241516611652	


0.077232788140758	


0.074486122926464	


0.068746079971584	


0.100484882173	


0.10579018400966	


0.074420633085547	


0.098059014260698	


0.080296727868565	


0.096193829043339	


0.10674175157049	


0.12978141385305	


0.079624040484369	


0.068684397691005	


0.10941542377472	


0.12331491506677	


0.075627629020468	


0.094733695681504	


0.076029981397831	


0.093359209484241	


0.060431829674151	


0.066410717025051	


0.05256746985117	


0.064577299679145	


0.20670454875186	


0.065052910793709	


0.10945637984235	


0.086919922216838	


0.13808546874709	


0.1177513359045	


0.094561522470967	


0.084340240594032	


0.071788471198615	


0.066174868391267	


0.10870982443438	


0.088337576330552	


0.066835984280304	


0.12483662990997	


0.079214499026274	


0.083865507533129	


0.081537574792115	


0.07276824211701	


0.094348533409969	


0.09265619747614	


0.10270838401937	


0.083523841301203	


0.065755143321289	


0.064044864715055	
decayed learning rate by a factor 0.97 to 0.0015204621173091	


0.12943522921788	


0.09528807859226	


0.090692528850406	


0.10610574691054	


0.078332232921896	


0.095099394771826	


0.087583589038716	


0.083696807575057	


0.088726474506159	


0.096146569277237	


0.084757033159015	


0.11380590418552	


0.09085520203679	


0.095885947396986	


0.1008017226057	


0.085572644386449	


0.096671054351862	


0.084611397761283	


0.095451767422458	


0.086289696110141	


0.072374934571351	


0.085420975412554	


0.081032051957931	


0.077469995804662	


0.07464397557116	


0.069249821597376	


0.0983419768601	


0.10766882143536	


0.073853734028086	


0.09854222705575	


0.080713182989603	


0.096041287046944	


0.1069553867344	


0.12893972437281	


0.079437678033511	


0.068358507584279	


0.11104733543024	


0.12243628098857	


0.075653813924069	


0.094482716829884	


0.076302560952556	


0.093479558020772	


0.06084862779497	


0.067515982943984	


0.053515704856794	


0.065081536124017	


0.19740737860232	


0.065339679420173	


0.11045887899687	


0.087911205652749	


0.13695705020857	


0.11741384322385	


0.095350082269572	


0.08442923097921	


0.071783679417272	


0.066260995844223	


0.10807275241641	


0.087193364335435	


0.066651364967042	


0.12461152159014	


0.079543850618581	


0.083554526012923	


0.080353750162772	


0.072265621850534	


0.094052023709082	


0.093199956686843	


0.10255833486174	


0.08369873045873	


0.065432483656354	


0.063573674088297	
decayed learning rate by a factor 0.97 to 0.0014748482537899	


0.12925775816943	


0.095253966337907	


0.090216511660125	


0.10672213470365	


0.078062246590978	


0.095490847108767	


0.087640583891249	


0.083963240303788	


0.088395750337694	


0.09742467462131	


0.083374239392491	


0.1147935504408	


0.090363401556279	


0.095982717449157	


0.10066708697599	


0.085642811462353	


0.09597945460674	


0.08450109803676	


0.094791022960288	


0.086193325665491	


0.072099349506567	


0.085288519366459	


0.080911534034944	


0.077660874523847	


0.074879174772528	


0.069769895607531	


0.097094127064689	


0.10881861594221	


0.073429261906987	


0.098802413585866	


0.08093406732225	


0.095742975629689	


0.10683961719312	


0.12813126005327	


0.079016716396992	


0.068063294015694	


0.11245626706555	


0.12284241258606	


0.075702938510391	


0.094377121171416	


0.07664347655967	


0.093606309957954	


0.061237106877348	


0.068328382140146	


0.054419597593843	


0.06522840696897	


0.19158765722017	


0.065411112602944	


0.11097631938843	


0.088525686119044	


0.13637729798802	


0.11707703278699	


0.095819104378185	


0.084305144960939	


0.071699356673482	


0.066370330863405	


0.10735886920918	


0.086376821025704	


0.066416547888243	


0.12454123992336	


0.079844513514909	


0.083440498860747	


0.079319329387145	


0.071846930812376	


0.093891457023458	


0.093497265503633	


0.10271470710887	


0.083988146909073	


0.06520489676042	


0.063262551139757	
decayed learning rate by a factor 0.97 to 0.0014306028061762	


0.12880496424664	


0.095116977126707	


0.089882363286508	


0.10717096081821	


0.077768105311481	


0.095746349516231	


0.087445745279662	


0.084245363064519	


0.087981101547325	


0.098616438788489	


0.082254010259602	


0.1155599796158	


0.090296512347122	


0.096321407673135	


0.10030329096494	


0.08562925306895	


0.095532267884582	




0.084556330044109	


0.094081678781375	


0.085903222403438	


0.071829404249178	


0.085026295715433	


0.080768590863705	


0.077932783752213	


0.075063835941113	


0.070146056900925	


0.096116852980005	


0.10943253328028	


0.073026166462494	


0.099016973061502	


0.081144477606001	


0.095409405563579	


0.10666239684431	


0.12763909637341	


0.078561923261328	


0.067690059760124	


0.11370740616496	


0.12365058210634	


0.075719361767226	


0.094260257540508	


0.076865562706351	


0.093624550857035	


0.061537521229195	


0.068732196575004	


0.055186169028411	


0.065189505170504	


0.18622850574297	


0.065503780220155	


0.11169475714298	


0.088951114880959	


0.1355513980241	


0.11708219726742	


0.096122370355261	


0.083922900561644	


0.071473999710407	


0.066360615315929	


0.10692690226121	


0.085868180176383	


0.066368802353822	


0.12419737032776	


0.080078344600507	


0.083505052456179	


0.078668430604321	


0.071368165481419	


0.093769528318541	


0.093798150968727	


0.10312568739437	


0.084036850501308	


0.064935316731805	


0.062961801212397	
decayed learning rate by a factor 0.97 to 0.0013876847219909	


0.12829429106445	


0.095113346732456	


0.089576511465666	


0.10768952642224	


0.077650862018435	


0.095978075109493	


0.087040110480172	


0.084528839718912	


0.087714721613297	


0.099628121056158	


0.081405606455057	


0.11594285421119	


0.090253283038902	


0.096800012134325	


0.099810067719076	


0.085640185421313	


0.095233305004667	


0.084583753670717	


0.093382382455769	


0.085468912829346	


0.071552742647929	


0.084664557376283	


0.080606066713138	


0.078135493182674	


0.075099027705895	


0.070410741668059	


0.095312728228229	


0.10975919320963	


0.072582649718132	


0.0992708977597	


0.081313022903442	


0.094955004276043	


0.10646562435324	


0.12731927342723	


0.078051140620113	


0.067292836366713	


0.11463463856951	


0.12484190091944	


0.075730104009142	


0.094073973036266	


0.077016481662849	


0.093608131273611	


0.061860736712929	


0.068827150139531	


0.055839032118295	


0.065209981886753	


0.17994401034555	


0.065690707540834	


0.11307905727728	


0.089289658762507	


0.13446217246204	


0.1173950209731	


0.096360461916309	


0.083411707149705	


0.071185480188593	


0.066215952012525	


0.10680016817631	


0.085544448271808	


0.066504531379708	


0.1236108435267	


0.08023437772479	


0.083665348013434	


0.078369301937884	


0.0708692509097	


0.093659002544987	


0.094176368607885	


0.10365503484604	


0.083830558980682	


0.064607535495922	


0.062644836548248	
decayed learning rate by a factor 0.97 to 0.0013460541803311	


0.12784800248684	


0.09524329928303	


0.089253783904763	


0.10834359812213	


0.077705445387406	


0.096218384185374	


0.086508285558056	


0.084749966763136	


0.087635423751245	




0.10042663313303	


0.080789572369378	


0.11605498212397	


0.090121207127981	


0.097356940225039	


0.099279805106961	


0.085688003392608	


0.094994452676149	


0.084528863703963	


0.092755001756545	


0.084962030660051	


0.07126988126721	


0.084249087582437	


0.080410904707705	


0.078210370639811	


0.074985801118656	


0.070686460691717	


0.094604668052695	


0.10991501116888	


0.072093298271932	


0.09957573729106	


0.081446216296885	


0.094405596600194	


0.10620740009954	


0.12707414118447	


0.077554310178843	


0.066912667128939	


0.11529586512702	


0.12601940213634	


0.075679314361485	


0.09380813638942	


0.077092479679665	


0.093573975959431	


0.062246908833265	


0.06880459686242	


0.056455370005657	


0.06542255010296	


0.17295400604485	


0.065947675794945	


0.11486826066163	


0.08962675029294	


0.13319036838511	


0.11789218485561	


0.096598117331852	


0.082885175604325	


0.070879460957083	


0.065987257405551	


0.10686670029213	


0.085301553961612	


0.06671274830179	


0.12295253473252	


0.080364772005327	


0.083862111105529	


0.078224525590594	


0.070380239984114	


0.093507655774727	


0.094647166496316	


0.10415259929101	


0.083542475052913	


0.064238361748183	


0.06231635374208	
decayed learning rate by a factor 0.97 to 0.0013056725549212	


0.12754826020465	


0.095395353257307	


0.088910757247489	


0.10917237469835	


0.077840736278287	


0.096517708383874	


0.086010759923522	


0.084995761170616	


0.087769017665759	


0.10093720951412	


0.080289909982026	


0.11612707778103	


0.089893947275125	


0.097970133058121	


0.098757764429453	


0.085699380091504	


0.09475102989256	


0.084460362064863	


0.092183414518417	


0.084427767221902	


0.07100913088851	


0.083778255087514	


0.080149848136725	


0.078238639764393	


0.074802864356852	


0.071074109943992	


0.093528217253655	


0.10998654836203	


0.071607906683315	


0.099881113592888	


0.081842235898485	


0.094127710345282	


0.10588907276748	


0.12674447723482	


0.077227668354098	


0.066511793136165	


0.11598009278639	


0.12676478538242	


0.07543255308052	


0.093566329528481	


0.077043809798195	


0.093490690103456	


0.062700867851	


0.068881479063046	


0.057369359296715	


0.066375355258809	


0.16353999412481	


0.066345255785851	


0.11614851097881	


0.090049915415725	


0.13192819263981	


0.11838741007202	


0.096844019634189	


0.082383465645263	


0.070469481539121	


0.065701152006639	


0.10717008106792	


0.085134801116998	


0.066870561226395	


0.1223927359218	


0.080669339630926	


0.084071781321374	


0.077992347015187	


0.070019984411857	


0.093225096321483	


0.095163238143331	


0.1044816879839	


0.083370921463998	


0.06391872555227	


0.062013960656635	
decayed learning rate by a factor 0.97 to 0.0012665023782736	


0.12735206126958	


0.095431538927949	


0.088626807974481	


0.1101157974902	


0.077940448809459	


0.096903031989162	


0.085766774821709	


0.085505015148964	


0.088245568460901	


0.10119378847922	


0.079655373914414	


0.11628910280412	


0.08953250652598	


0.098639428232037	


0.098189354426652	


0.085613258804751	


0.094462977906751	


0.084467147906343	


0.091500172315861	


0.08384879121791	


0.070902187712246	


0.083170487448862	


0.079811284259633	


0.078342974576315	


0.074662439325751	


0.071661982683699	


0.091146812530484	


0.11039186653851	


0.071267018398868	


0.10008134278158	


0.082833768104085	


0.09500836127765	


0.10557030619824	


0.12589743671444	


0.077507158571097	


0.066129331893317	


0.11693744033998	


0.12635754884588	


0.074836990471618	


0.093609336691297	


0.076789955384935	


0.093280190717235	


0.063021074096705	


0.0689012103553	


0.058259020110697	


0.068206922584741	


0.15476722208258	


0.066555542962368	


0.11711487477529	


0.090324938610689	


0.13156964294081	


0.1193718547035	


0.0967627306166	


0.081493761249008	


0.069660766050438	


0.065104448292152	


0.10815138536668	


0.08522930514171	


0.066754329090492	


0.12212395185156	


0.081398799546641	


0.084589296997423	


0.077538745203232	


0.069811942416987	


0.093024385594948	


0.095547778550408	


0.10491105466336	


0.083500402722124	


0.063683043717094	


0.061754360788889	
decayed learning rate by a factor 0.97 to 0.0012285073069254	


0.12709165023446	


0.095193936528073	


0.088446567480406	


0.11078797378995	


0.077824398035711	


0.097245683811032	


0.085503090498948	


0.086032156782555	


0.088817122906165	


0.10150975454147	


0.078977127246128	


0.11632236701751	


0.08941350924287	


0.099317060296915	


0.097605905473462	


0.085454156887234	


0.094225178064697	


0.084641863411555	


0.090730842683969	


0.083168976484992	


0.070827132235174	


0.082440566153044	


0.079495812784133	


0.078400330524911	


0.074521587214295	


0.072196608491789	


0.088598111587187	


0.11124643115854	


0.070911662919927	


0.10037275615554	


0.083573339534072	


0.096222160645905	


0.10511332484511	


0.12449393324666	


0.078377110759623	


0.065966321740491	


0.11776937409395	


0.12452090609063	


0.074106806613678	


0.093782450668773	


0.076368734118377	


0.092903053123604	


0.062918757739643	


0.06856461977072	


0.05839976166672	


0.069879075207155	


0.15012774278121	


0.066510698711984	


0.11754593448362	


0.090465461374732	


0.1322353972376	


0.12087118750775	


0.09648152231579	


0.080332639342529	


0.068645060288926	


0.064292574739627	


0.10951422816118	


0.085455805451938	


0.066384432385532	


0.12210242296451	


0.082234531556045	


0.085389112901331	


0.076985193398415	


0.069537762196073	


0.092944959323512	


0.095882368511548	


0.10555062513072	


0.083924020985555	


0.063411747147966	


0.061436085328172	
decayed learning rate by a factor 0.97 to 0.0011916520877176	


0.12672009363832	


0.094725486205215	


0.08825024605982	


0.11125439836743	


0.07745920540495	


0.097575677813042	


0.085046607390918	


0.086425614759456	


0.089125002452673	


0.10185876391394	


0.0785782491077	


0.11620363684689	


0.089678332374505	


0.09993422719403	


0.097100114495813	


0.085273679701386	


0.094016115277033	


0.084956103647007	


0.090064482301544	


0.082491510696523	


0.070421366476513	


0.081726870750012	


0.079259252386483	


0.078484765358411	


0.074347079708762	


0.072456355291762	


0.08701014146087	


0.11172065881539	


0.070324093729462	


0.10094636140218	


0.083931024280109	


0.096631669940873	


0.10448185334151	


0.12345038156474	


0.078889727757155	


0.065801686633881	


0.118427849912	


0.12285945291731	


0.07347211442647	


0.093682065731271	


0.075885796119769	


0.092395478418985	


0.062484532722394	


0.068010288781724	


0.05798471529932	


0.070493922422859	


0.14811016137321	


0.066321108911195	


0.11739911114535	


0.090905456504833	


0.13336933531169	


0.12226751926877	


0.096524310646196	


0.079370198031537	


0.067804489974331	


0.06357378170022	


0.11070125977351	


0.085331455182131	


0.065925461102812	


0.1221675455313	


0.082646306087001	


0.086137625304018	


0.076537132645793	


0.068916619500709	


0.092815288069161	


0.096483888922592	


0.10607455155062	


0.084426828780269	


0.062893213842415	


0.06090306038062	
decayed learning rate by a factor 0.97 to 0.0011559025250861	


0.12648303673507	


0.094202235104631	


0.087882365666203	


0.11190906128602	


0.076978313606595	


0.098073140048112	


0.08458597177657	


0.086794365198496	


0.089121861738303	


0.10208700690357	


0.078269056741603	


0.11618668043322	


0.09019241249092	


0.10049340081792	


0.096730916500582	


0.085046869040023	


0.093694312433017	


0.085386678838994	


0.089601619124425	


0.081880421349754	


0.069662592091411	


0.081087489910597	


0.079021749824754	


0.078646386599671	


0.074124849993778	


0.072450472635949	


0.085834804018828	


0.11172707469029	


0.069496176981572	


0.10178694772044	


0.084102807431335	


0.096451246301534	


0.10365292861921	


0.12303042160218	


0.078823795858609	


0.065388847205678	


0.11914573491812	


0.12219529647789	


0.072877334338746	


0.093163112586515	


0.075353400592468	


0.091751335827126	


0.061796423865376	


0.067328741771912	


0.057168899494165	


0.070302409466226	


0.14827542832361	


0.065883578653618	


0.11722384534012	


0.091813040608261	


0.13487729027078	


0.12334894438503	


0.097184030840025	


0.078699082309138	


0.067175393849055	


0.063004477180321	


0.1116123100567	


0.084523859823357	


0.065395676402128	


0.12238844047928	


0.082383642886943	


0.08673910429132	


0.076083963721914	


0.067859610308778	


0.092461950849906	


0.097509604218662	


0.10637081537228	


0.085047374525435	


0.062032123528689	


0.060088121455645	
decayed learning rate by a factor 0.97 to 0.0011212254493335	


0.12644027679496	


0.093611328447485	


0.08720305399908	


0.11291124703705	


0.076385008395043	


0.098887569625695	


0.084202443895846	


0.087327783974761	


0.088770164705395	


0.10234228661062	


0.077627391408979	


0.11646188687367	


0.0910781215228	


0.10116288565192	


0.096350243689755	


0.084644111443217	


0.093159122407447	


0.08598951801261	


0.089165654528458	


0.081281118261122	


0.068611097386589	


0.080452479042636	


0.078679388026888	


0.078852504661115	


0.0738073995252	


0.072200628186324	


0.084647132552135	


0.11149844381126	


0.068453745919106	


0.1028871381543	


0.084078053824681	


0.095874713504833	


0.10262019865895	


0.12324154078247	


0.078406347205264	


0.064652011419954	


0.12009133011917	


0.12204463933842	


0.072140581193346	


0.092206037829498	


0.074648370690108	


0.09085750695466	


0.060737869661862	


0.066429794492682	


0.055809016993175	


0.069403453188999	


0.15219059945651	


0.065046135979952	


0.11675071249571	


0.093321583437156	


0.13749922377693	


0.12399613256081	


0.098646971604662	


0.078319595187093	


0.066718391770801	


0.062567558080637	


0.11227280046149	


0.082731123012987	


0.064657797189622	


0.12298138176067	


0.081339622050628	


0.087156776654787	


0.075252365403903	


0.066373902888351	


0.091644131237095	


0.099027773653604	


0.10656852516185	


0.086183914605905	


0.060936427456766	


0.058938261965937	
decayed learning rate by a factor 0.97 to 0.0010875886858535	


0.12628289969014	


0.092807266424108	


0.086040213757572	


0.11429373147972	


0.075510182021112	


0.10021474389012	


0.083887192919067	


0.088371418168556	


0.087971560026675	


0.10309910772194	


0.0763894262311	


0.1171312119568	


0.092343150785525	


0.10193456862504	


0.095995746800823	


0.084043027538911	


0.092375591477214	


0.086862813711908	


0.088433651260406	


0.080766200968942	


0.067380251594423	


0.080007899689551	


0.078211585132681	


0.079381375879234	


0.073418249813194	


0.07158359478083	


0.083336744378444	


0.11158350636172	


0.06709285140725	


0.10428755722506	


0.084133199457184	


0.095347849792494	


0.10152370123103	


0.1241442018589	


0.07812053757748	


0.063730412805563	


0.12095675963247	


0.12172341704483	


0.071336989631507	


0.090869609604216	


0.073792126701281	


0.089762603671847	


0.059290540946925	


0.065665783634996	


0.05445153002552	


0.068996427894038	


0.15701358141595	


0.064203423750073	


0.1145857833435	


0.095528902794589	


0.14067124054929	


0.12426317194441	


0.10077475643902	


0.078278956101407	


0.066582951192012	


0.062398239802628	


0.11227754923859	


0.079827977670405	


0.063637895150273	


0.12404756066875	


0.07997884877854	


0.087489910935968	




0.074508938718053	


0.064702685639085	


0.090602504183466	


0.10077960140843	


0.10685553903955	


0.087615035049004	


0.059624423065544	


0.057664442717157	
decayed learning rate by a factor 0.97 to 0.0010549610252779	


0.12601617293411	


0.09199447725471	


0.084652401782745	


0.11584426664404	


0.074436447446527	


0.10171648146407	


0.083552971587682	


0.089651599305611	


0.086852799807744	


0.10336000058367	


0.074626555355103	


0.11826203333292	


0.094058957986687	


0.10264451481966	


0.095906657807388	


0.083322993044659	


0.091396869120658	


0.087990422648233	


0.088385841537332	


0.080363498881746	


0.065865089557627	


0.07964041725209	


0.077587740088818	


0.079854258654167	


0.072926821490759	


0.070404604417348	


0.082114198970926	


0.11239247803871	


0.065582089684332	


0.10574925914967	


0.084351977910791	


0.095326088127732	


0.099938380043042	


0.12598139669053	


0.078055338501398	


0.062740957741469	


0.12114125859111	


0.1207805373999	


0.070247793152216	


0.08949872722991	


0.072589662136593	


0.088290335842698	


0.057335622054166	


0.065074311673944	




0.053307853968528	


0.069339633115355	


0.16043076933549	


0.063139215023315	


0.11238935347213	


0.097749124391057	


0.14563803242977	


0.12395841646182	


0.1029005106511	


0.078458302507977	


0.066594640230511	


0.062314011385907	


0.1118871628979	


0.077699662844803	


0.062647268965048	


0.12525715333555	


0.078562895010674	




0.08780423772217	


0.074153862311223	


0.063129535811978	


0.090060320217826	


0.10268838546176	


0.10672734341124	


0.088849675294676	


0.058262573943173	


0.056465663094849	
decayed learning rate by a factor 0.97 to 0.0010233121945196	


0.12626316331368	


0.091446070243418	


0.083510379864633	


0.11749791704246	


0.073592747382925	


0.10308932790938	


0.083511911748013	


0.090574283225446	


0.085848381627287	


0.10262402152044	


0.073047463207305	


0.11924995528602	


0.095996073509219	


0.10309634743372	


0.095866188415645	


0.08261419510588	


0.090432833702756	


0.089177971088269	


0.089006896769786	


0.080140956829784	


0.064365990134324	


0.079468422860635	


0.076999843507591	


0.079740629008522	


0.072764231149247	


0.069620008898943	


0.080474290079064	


0.11254401000241	


0.064207000221433	


0.10714404206674	


0.083851889485804	


0.094745893197287	


0.098387081358947	


0.12777881906421	


0.0777053159025	


0.061768051681729	


0.12088568185622	


0.12055434231379	


0.069296896377926	


0.088260651696766	


0.071412303529989	


0.086795017044852	


0.055287139684907	


0.064864435593597	


0.052480256517738	


0.069788436357184	


0.16246588873408	


0.06201238288678	


0.10985723371908	


0.1000174007109	


0.15131459366466	


0.12286326789007	


0.10522311548423	


0.079122753973587	


0.067008071954323	


0.062396317433895	


0.11125679421526	


0.075817552539725	


0.061935031695627	


0.12589986601275	


0.0764897722443	


0.087763616712069	


0.073996495982552	


0.061738391480917	


0.089574371082347	


0.10461952613269	


0.10611405388417	


0.089873548020194	


0.057097981896269	


0.055294039101137	
decayed learning rate by a factor 0.97 to 0.00099261282868397	


0.12758879845704	


0.091065227390878	


0.08258386391681	


0.11906345368324	


0.072877402048603	


0.10423980345672	


0.083674522309998	


0.091243028685586	


0.08480127585997	


0.10161420536332	


0.071650737140967	


0.1202557211561	


0.097981498547173	


0.1035203064425	


0.096038899001073	


0.081922906035495	


0.08958439302787	


0.090319447989799	


0.089812397213893	


0.080085114548553	


0.063072422825544	


0.07958442566617	


0.076412595446907	


0.079611845783008	


0.072871789382985	


0.06922881532816	


0.078593324263805	


0.11067970408841	


0.063215503856701	


0.10813243456677	


0.082643448743164	


0.093571340802576	


0.097169615112263	


0.13019667547627	


0.077138747011324	


0.060806021434659	


0.12052897427355	


0.12092315534745	


0.06854695856998	


0.087304795022069	


0.070380078352989	


0.085433494411694	


0.053570909846219	


0.064700532251255	


0.051671539399412	


0.069646100974805	


0.16434454851033	


0.060957332702672	


0.10826014797338	


0.10211794387954	


0.15560596749842	


0.12122605842066	


0.10731390072107	


0.079891254350712	


0.067617834336334	


0.062578501109772	


0.11017635768961	


0.074040607325476	


0.061276969461596	


0.12663378341538	


0.074381462419049	


0.087616783861434	


0.073936313999778	


0.06062948141962	


0.089294637477769	


0.1062227578274	


0.10550516037694	


0.090891783005216	


0.056237929828066	


0.054323568447428	
decayed learning rate by a factor 0.97 to 0.00096283444382345	


0.12953654333287	


0.090767791985967	


0.081781489239673	


0.12024819047714	


0.072168504060542	


0.10512743117683	


0.083687834965876	


0.091658908959828	


0.083794956696754	


0.10074293829608	


0.070505871822942	


0.12128018456866	


0.099897633443915	


0.10401191876849	


0.096272665495172	


0.08132404842584	


0.088889177325037	


0.091283676519001	


0.090472673305971	


0.079975256651486	


0.062078356719695	


0.07965165498797	


0.075839337512366	


0.079390515662549	


0.072921859935214	


0.068953499222319	


0.077025616639349	


0.10966911621831	


0.06249896075829	


0.10878971722941	


0.081486859031752	


0.092883916779393	


0.096194111820909	


0.13184079807867	


0.076734166674662	


0.059861837286887	


0.12026376192252	


0.12138140999162	


0.067795611095299	


0.086547601457569	


0.069403793526131	


0.084248334757776	


0.052103152477982	


0.064139363260473	


0.050323756823071	


0.068320005167978	


0.17079600285753	


0.060143119300649	


0.10790443192534	


0.10381837136236	


0.1577474539615	


0.12026034324343	


0.10898014001687	


0.080454368217837	


0.068246274160079	


0.062706658674384	


0.10901107285306	


0.072657122954555	


0.060595797029375	


0.12757651356129	


0.073054096782013	


0.08740383542953	


0.073795578271936	


0.059698111092376	




0.089126574612374	


0.10767667941276	


0.10477882169508	


0.091867957339681	


0.055514257963804	


0.053553095913967	
decayed learning rate by a factor 0.97 to 0.00093394941050874	


0.13143922092814	


0.090552027981483	


0.08110200904217	


0.12121223545614	


0.071603908854808	


0.10580977784164	


0.083796060435	


0.091947677813988	


0.082943382039836	


0.099831188321671	


0.069532911456618	


0.1221672663794	


0.10156685447567	


0.10434223398409	


0.096491494671606	


0.080833203830216	


0.088223847152669	


0.092046891475551	


0.091078498612992	


0.079953315046219	


0.061237357972355	


0.079790223190781	


0.075327542428653	


0.07910986872212	


0.072923317513792	


0.068635505187756	


0.075832696361994	


0.10934121800638	


0.061922698012244	


0.1092822099412	


0.080678917560606	


0.092476832341167	


0.095431373428714	


0.13323140167696	


0.076455951162765	


0.059127703414045	


0.12000955707931	


0.12167693832719	


0.067192958130921	


0.085918083284539	


0.068685060780215	


0.083476532884415	


0.051125373852438	


0.063601552321155	


0.049172654469308	


0.067168929031827	


0.17594468211223	


0.059617729050131	


0.10844794099173	


0.10508060544212	


0.15843285144451	


0.11981838116573	


0.11026385465929	


0.080863522751389	


0.068803725430092	


0.062764430711986	


0.1081467632526	


0.071551588273086	


0.060033931644285	


0.12823216696677	


0.072288058128913	


0.087152968776491	


0.07353411512724	


0.05897037403468	


0.088883149705328	


0.10891414353358	


0.10400456499843	


0.092578236913688	


0.054946281223124	


0.052954248847024	
decayed learning rate by a factor 0.97 to 0.00090593092819348	


0.13327018923305	


0.090482176296871	


0.080513911636469	


0.12194591629129	


0.071248007451745	


0.10633713379668	


0.0839785645206	


0.092198751137509	


0.082374395122502	


0.099081410058858	


0.068773219544007	


0.12292583432241	


0.10282483800393	


0.10454875063759	


0.096735261186362	


0.080477609208442	


0.087632853302823	


0.092571049869085	


0.091573502430668	


0.079958261606665	


0.060567840007424	


0.079923796575207	


0.074923030837515	


0.078795489692382	


0.07285083268082	


0.068370721906404	


0.074925920869063	


0.10937343526773	


0.061510160889115	


0.10959905110112	


0.080018610680235	


0.092115971458772	


0.094829597790802	


0.13424368441145	


0.07618346707455	


0.058605662960701	


0.11986605088588	


0.12193366672723	


0.06678279109984	


0.085395774348547	


0.068277213879272	


0.083132525080839	


0.050645874532942	


0.063309430922347	


0.04844643357366	


0.066381205302584	


0.1777323868637	


0.059390560558921	


0.10959652047237	


0.10590814937048	


0.15820691174636	


0.11977805978475	


0.11111451053384	


0.081059747011317	


0.069152753155109	


0.062685739383146	


0.10773427664689	


0.070748490792783	


0.059668185928014	


0.12851743760401	


0.071952152509218	


0.086966426955113	


0.073267395526438	


0.058438684062669	


0.088574568318666	


0.10986116925211	


0.10327468183212	


0.09293825086031	


0.054529297698811	


0.052501722211145	


decayed learning rate by a factor 0.97 to 0.00087875300034768	


0.13503379447665	


0.090552287340876	


0.079997902291204	


0.12242843864406	


0.071106430726395	


0.10673641851214	


0.084204920689084	


0.092457103921982	


0.082083054790152	


0.098555777839119	


0.068220796212935	


0.12348699566715	


0.10371247121727	


0.10470663108579	


0.097007563661257	


0.080238998083134	


0.087162423309173	


0.092868663498655	


0.091973046805204	


0.079927152191447	


0.060082463293526	


0.079989042031156	


0.074645386318814	


0.078454846813831	


0.072673548380416	


0.068181671829703	


0.074250691294176	


0.10963645731706	


0.061244995315387	


0.10973773091781	


0.079423603040364	


0.09179928655859	


0.094348446943377	


0.13465489189634	


0.075890410846947	


0.058297014442777	


0.11978757658792	


0.12223854800063	


0.066525927282079	


0.084959293995176	


0.068085663106504	


0.083080630601179	


0.050489351058392	


0.063237680630844	


0.048081704506822	


0.065954276894019	


0.17659485261105	


0.05935660465074	




0.1108041236667	


0.10641212287241	


0.15779433671812	


0.11992371724192	


0.11161651597436	


0.081150480757444	


0.069353165595524	


0.062579058644445	


0.10759656243381	


0.070184944437844	


0.059488887066154	


0.12846646427013	


0.071749079039143	


0.086841419516014	


0.073084755687465	


0.058062704158762	


0.088128711668194	


0.11052754415826	


0.10261368938825	


0.093143400218988	


0.054224979193783	


0.052160894202491	
decayed learning rate by a factor 0.97 to 0.00085239041033725	


0.13672588735278	


0.090653694635576	


0.079532029469996	


0.12268382245739	


0.071088056545608	


0.1070532157455	


0.084483900928372	


0.092781337376953	


0.081956056423769	


0.098142149135193	


0.067855644646217	


0.12383576423297	


0.10434788351968	


0.10480910944139	


0.09739347687629	


0.080063533356614	


0.086836736831578	


0.093032136309773	


0.092360303799179	


0.07985466165075	


0.059776602109213	


0.079993138621266	


0.074450044116741	


0.078198732096887	


0.072441413763075	


0.068072830214319	


0.073756615833311	


0.10995085757197	


0.061113174213952	


0.10969107210348	


0.078903177643594	


0.091551086031172	


0.094004345695521	


0.13456780417474	


0.075586231999783	


0.058112820331524	


0.11973096196191	


0.12256572830444	


0.066340264762588	


0.084623956202871	


0.067969639316394	


0.083134847169108	


0.05044758942726	


0.063243631676691	


0.047889142473438	


0.065799906333906	


0.17416551990592	


0.059364850682118	


0.11180368605156	


0.10667553778073	


0.15771667699545	


0.12012435170972	


0.11186008300479	


0.081227920798289	


0.069489507476144	


0.062510044930033	


0.10756011922535	


0.069795482301303	


0.05938315015477	


0.1282521121188	


0.071438732274308	


0.086767647892449	


0.072964663281201	


0.057806764025343	


0.087538723324204	


0.11096507486232	


0.10206195345734	


0.093459792370993	


0.053988274056943	


0.051907792619949	
decayed learning rate by a factor 0.97 to 0.00082681869802713	


0.13824057216446	


0.090672671130884	


0.079119647614351	


0.12275305919629	


0.071068389718086	


0.10733061833902	


0.08483066580388	


0.093181575492413	


0.081891940122997	


0.097695132754729	


0.067653372837362	


0.12403315557206	


0.10481596855611	


0.10479901809846	


0.098025547074088	


0.079892383646443	


0.08665636115769	


0.093181154948972	


0.092836212556562	


0.079748724600953	


0.059631710185944	


0.079960592981036	


0.074271203291533	


0.078114062180804	


0.072207372956828	


0.068010764263169	


0.073415086453658	


0.11017259725615	


0.061099248408806	


0.10947301945723	


0.078441064025591	


0.091370592278082	


0.09379508697675	


0.13419151462651	


0.07528277171126	


0.057945650451559	


0.11967742917728	


0.12285132685583	


0.066125796380036	


0.084388764044811	


0.06781098647691	


0.083131299899965	


0.05032999689516	


0.063211100231099	


0.047678498614321	


0.065808748840143	


0.17200305807329	


0.059320491852942	


0.11245338625105	


0.10676830805343	


0.15819583137777	


0.1202945619932	


0.11191495636018	


0.081329178195815	


0.069620311422624	


0.062476453683309	


0.10751645025933	


0.06953442709971	


0.059235039860011	


0.1280788365188	


0.070941210368967	


0.086755879123913	


0.072838221343513	


0.057628975239137	


0.086940774816938	


0.11124853255871	


0.10167408329728	


0.094047531026342	


0.053780349481578	


0.051715494633551	
decayed learning rate by a factor 0.97 to 0.00080201413708631	


0.13939144837599	


0.090525023254672	


0.078778629192479	


0.12268946144001	


0.07094448804929	


0.10756936468871	


0.085205345502601	


0.093543267721453	


0.081812839060741	


0.097148508843299	


0.067562250998824	


0.12413173640644	


0.1052440252055	


0.10467215416146	


0.098918751658504	


0.079680017381975	


0.086581423578222	


0.093429728724379	


0.09345466177827	


0.079614469702966	


0.059597553570468	


0.079915419951389	


0.074067356076193	


0.078183341737529	


0.071988001029025	


0.067941581471298	


0.073182303324573	


0.11024161981331	


0.06115019968878	


0.10916115550453	


0.078008119902609	


0.091213961530142	


0.093674167619369	


0.13379789794347	


0.074979995015001	


0.057721655979864	


0.11963003336764	


0.12309141570906	


0.06582642646679	


0.08421491018768	


0.067565847783648	


0.082989025700609	


0.050041917954196	


0.063100334709277	


0.047336983402634	


0.065876991846276	


0.17081751266088	


0.059189085784071	


0.11270560563641	


0.10677456034139	


0.15916099212299	


0.12039057885039	


0.11184990008357	


0.081445862775709	


0.069769839579117	


0.062452104820807	


0.10743205779978	


0.069360089493233	


0.059001452296213	


0.12808261792352	


0.070316204386044	


0.086821640425084	


0.07264911892826	


0.057475062998821	


0.086504754398981	


0.11146362871652	


0.10144443406402	


0.094882026485815	


0.053577034074941	


0.051547097204812	
decayed learning rate by a factor 0.97 to 0.00077795371297373	


0.14013093757298	


0.090216992299268	


0.078491229935798	


0.12255589823668	


0.070690066160211	


0.10777350143606	


0.085530821284641	


0.093774944722425	


0.081670948451898	


0.096526691667405	


0.067496353888961	


0.12412548012055	


0.10574011976512	


0.10450062162758	


0.099891941090245	


0.079409316027238	


0.086531603497274	


0.09377319072033	


0.09420617224197	


0.079459626242023	


0.059605455645832	


0.079879562008937	


0.073818790057231	


0.078313631606438	


0.071773089976745	


0.067832670399503	


0.072988974286325	


0.11012866800716	


0.061191864973895	


0.10885851386345	


0.077601309737483	


0.091014497385112	


0.0935736386878	


0.1335880829021	


0.074641085028196	


0.057386097687764	


0.11962357458627	


0.12337454803914	


0.065430802187328	


0.084036907304299	


0.067230678773589	


0.082695680580213	


0.049584376759592	


0.062924839580234	


0.046845956523232	


0.065936354091955	


0.17063834862782	


0.058986809955159	


0.11259675942121	


0.10678671226055	


0.16038921062642	


0.12038246431942	


0.11174209263397	


0.081595078988014	


0.069980148824742	


0.062438187515698	


0.10727455978977	


0.06922103600149	


0.058696718442934	


0.12827558132044	


0.069631901527867	


0.086948232123209	


0.072382892288862	


0.057292936683542	


0.086290126424574	


0.11169091902335	


0.101289885803	


0.095852893538433	


0.053358392429866	


0.051362795776033	
decayed learning rate by a factor 0.97 to 0.00075461510158451	


0.14059362066836	


0.08984099656362	


0.078211805740869	


0.1224165472716	


0.070356364247067	


0.10798974059398	


0.085790594747149	


0.093936032715828	


0.081463760420452	


0.09581976102037	


0.067373291647727	


0.12396705673358	


0.10635173428446	


0.10435223981188	


0.10075296271711	


0.079092210000457	


0.086436816307663	


0.094247602794812	


0.095044683932274	


0.079310244553684	


0.059608406178488	


0.079889647312722	


0.073529496914539	


0.078422490189179	


0.071536667548556	


0.06766267895379	


0.07277463023893	


0.10988419890146	


0.061169097880828	


0.10862765783956	


0.077256413031209	


0.090772440969769	


0.093438086016969	


0.13358766565822	


0.074288032790359	


0.056972635886684	


0.11960992816624	


0.12367921462054	


0.064977968471696	


0.083806886872991	


0.066851335068956	


0.082302658401937	


0.049038330026785	


0.062733791134978	


0.046271822939957	


0.065981365491442	


0.17101235543584	


0.058768295401724	


0.1122503522435	


0.10682943812939	


0.16170140924218	


0.12025643286951	


0.1116394797719	


0.081814203968401	


0.070294003102202	


0.062442538312746	


0.1070218718688	


0.069058508506569	


0.058353456121845	


0.12857553210177	


0.06896287903117	


0.087089740964002	


0.072031297045912	


0.057069813844482	


0.086239070034651	


0.1119717329389	


0.10111057665083	


0.096855802146831	


0.053113245832299	


0.051139709004091	
decayed learning rate by a factor 0.97 to 0.00073197664853698	


0.14093656691814	


0.089490907917491	


0.07791043204969	


0.12232204812349	


0.070021979378255	


0.10825270486145	


0.08602679509632	


0.094130424247774	


0.0812196140978	


0.094996838651198	


0.067168299877717	


0.12376285774824	


0.10705624880001	


0.10425237932185	


0.10141910876133	


0.078778671124818	


0.086277508011565	


0.094762096699454	


0.095931356901608	


0.079209353591453	


0.059580498727967	


0.079992282547241	


0.073243922042755	


0.078474354386997	


0.071289024115873	


0.067470345059634	


0.072514549243932	


0.10957454053847	


0.0610734222425	


0.1084741401308	


0.076930691182301	




0.090499186663923	


0.093252639801269	


0.1336977477723	


0.073938399947321	


0.05653490352528	


0.1195495008077	


0.12397944171635	


0.064527308017431	


0.083538262794281	


0.066483585545228	


0.081876261439116	


0.048490916521076	


0.062570128216126	


0.045692867575888	


0.066021151843658	


0.17142654693977	


0.058577309852252	


0.11181123454193	


0.10690458290661	


0.16300653468585	


0.12002770563993	


0.11159630950018	


0.082129104462328	


0.070729004086971	


0.062466219585766	


0.10667866852316	


0.068834146493766	


0.057996562738281	


0.12887793265106	


0.068378142698115	


0.08721405944343	


0.071606290277723	


0.056830591673401	


0.086250137243298	


0.1123129930186	


0.10084645881641	


0.09784527788018	


0.052845554770009	


0.050888278571448	
decayed learning rate by a factor 0.97 to 0.00071001734908087	


0.14124881506423	


0.089231708248095	


0.077594280086487	


0.12228606812052	


0.069744960880659	


0.10853531052008	


0.086284417653265	


0.094369291182884	


0.080974638592398	


0.094040036012408	


0.066912171382756	


0.12365994586803	


0.10778301642766	


0.10419738608856	


0.10192227957391	


0.078513901210852	


0.086088961041035	


0.095239838970604	


0.096840109083126	


0.079208777891015	


0.059509587888933	


0.080242754685248	


0.073024071939963	


0.078484781982571	


0.071068748240714	


0.067323590745657	


0.072251038100004	


0.10923828455638	


0.060911542075877	


0.10840434268962	


0.07651622436351	


0.090234952087628	


0.093020898150505	


0.13375874344461	


0.073625221312671	


0.056137814535921	


0.11941361173814	


0.1242038807676	


0.064124696449732	


0.083290565813894	


0.066163298575364	


0.081478602321761	


0.047986376921146	


0.062452414017933	


0.045143160699143	


0.066074295015954	


0.17167952760279	


0.05842685174304	


0.11135555999991	


0.10700214747318	


0.16426660557461	


0.1197275544152	


0.11161124804875	


0.082553509928284	


0.071286642575519	


0.062499092467576	


0.10626995796543	


0.068585774275255	


0.057636505377188	


0.12912051853479	


0.067906837533955	


0.087341950289574	


0.071128159852586	


0.05662808223813	


0.086278369007448	


0.11266589098636	


0.10049726459298	


0.098835270794348	


0.052588323942276	


0.05065299033574	
decayed learning rate by a factor 0.97 to 0.00068871682860844	


0.14156076156007	


0.08911598132188	


0.077298928827149	


0.12225796345942	


0.069557185290369	


0.10877618704187	


0.086571857854086	


0.094564126058459	


0.080756180917291	


0.092935866690093	


0.066651586158737	


0.12361886687556	


0.10850001859559	


0.10418871090733	


0.10233042527356	


0.078309468937418	


0.085916040348087	


0.095683853893209	


0.097711838003179	


0.079332192974012	


0.059406781947721	


0.08066912940909	


0.072917198143552	


0.078484742236136	


0.07092324217904	


0.067268764282287	


0.072031983522821	


0.10881335032532	


0.060706148849838	


0.1084670932147	


0.075943890392763	


0.090024672974433	


0.092760327464019	


0.13367304918314	


0.073353112239787	


0.055803354510098	


0.1192539908008	


0.12427176193056	


0.063766228713908	


0.083112569214343	


0.065880814248503	


0.08117137315167	


0.047512048403899	


0.062375170501623	


0.044600663818535	


0.066191045630944	


0.17199948429102	


0.058323735955539	


0.11073687020831	


0.10710399107693	


0.16543574250461	


0.11936279814782	


0.11164568376433	


0.083117081393495	


0.071966964671833	


0.062541664592831	




0.10580762462399	


0.068370551781413	


0.05730933465168	


0.12923363297114	


0.067542615540739	


0.087509620847642	


0.07058817567147	


0.056505352338386	


0.086335879154106	


0.11295619884602	


0.1000915538438	


0.099848663293179	


0.052386163356433	


0.050493055036704	
decayed learning rate by a factor 0.97 to 0.00066805532375019	


0.14186850150635	


0.08916035986442	


0.077078019632757	


0.12216276209361	


0.069478775112408	


0.1089248534449	


0.086871773558629	


0.094662762872896	


0.080589512074329	


0.091686826867336	


0.066427530427966	


0.12363082559098	


0.10919888950805	


0.10424912487105	


0.10266745127665	


0.078203394564906	


0.085801191851477	


0.096131934405157	


0.098487467683312	


0.079603455952332	


0.059292076733334	


0.081235224974993	


0.07292201510897	


0.078531761334036	


0.070922353306129	


0.067356314853849	


0.071842547422334	


0.10811557598484	


0.060661530299098	


0.10865775696779	


0.075461418806877	


0.089936753362074	


0.092468382140832	


0.13333468298937	


0.073121717543241	


0.055581468019513	


0.11915306312317	


0.12408016242764	


0.063422309516196	


0.083028908921859	


0.065670110012572	


0.08105205774615	


0.047162305601528	


0.062366681525773	


0.044111674228095	


0.066510631499883	


0.17232816653591	


0.058298187083424	


0.10977575787439	


0.10708670361054	


0.16649276294966	


0.11903659224901	


0.11155470623711	


0.083652704377332	


0.072606746047206	


0.062552104181226	


0.10550505385144	


0.068286911567574	


0.057058140814832	


0.12922181808113	


0.067267992752764	


0.087708439348817	


0.070049098417571	


0.056507112897765	


0.086413689272829	


0.11306983308251	


0.099723148352423	


0.10068696531537	


0.052287658471902	


0.050468349272926	
decayed learning rate by a factor 0.97 to 0.00064801366403768	


0.14204227436745	


0.089119901502093	


0.076964896240778	


0.12192750312654	


0.069412049704837	


0.10894725179	


0.087069545954834	


0.094664856682582	


0.080440590285503	


0.090543176380828	


0.066286581506169	


0.12403349359569	


0.10972426533116	


0.10443642559543	


0.10277557314237	


0.07819367209569	


0.08575186972885	


0.096585790541078	


0.099031356950248	


0.079937969564805	


0.059133997816204	


0.081749972475921	


0.07290785912778	


0.078547626802798	


0.070980357098404	


0.067576217937796	


0.071763915494157	


0.10740137058345	


0.060772717710624	


0.10869173240242	


0.075281104275746	


0.089855789502594	


0.092249793172056	


0.13277082740343	


0.072879207682356	


0.055457207085259	


0.11895429301047	


0.12401671955706	


0.063219550321443	


0.082987192917944	


0.065630427398129	


0.081043615625656	


0.047176671012331	


0.062514916784624	


0.043915891397432	


0.06688140813449	


0.17118916403054	


0.058358326066052	


0.1095064367259	


0.10678205283842	


0.16709531540767	




0.11912303206597	


0.11117922479332	


0.083841110300861	


0.073038632048031	


0.062517746492816	


0.10538080480644	


0.068376386954671	


0.056788140502916	


0.12918351692895	


0.067113730643004	


0.087966434734471	


0.069526792638622	


0.056588709131881	


0.086317588118377	


0.11299596873494	


0.099482630863949	


0.10158422277305	


0.052228692107434	


0.050459919080826	
decayed learning rate by a factor 0.97 to 0.00062857325411655	


0.14202285121734	


0.088760564376484	


0.076841015968946	


0.12157352618866	


0.069228614326665	


0.10892354146353	


0.087032148029518	


0.0945897341315	


0.080278645404031	


0.089854488194196	


0.066275298135088	


0.12463895290958	


0.11006876607216	


0.104727836729	


0.10258276687569	


0.078151608645203	


0.085666272224156	


0.097039123433312	


0.099180455137313	


0.080125070190166	


0.058855219794051	


0.082055207973527	


0.072890172466796	


0.078563721346615	


0.070989630831426	


0.067857511440405	


0.071805754358477	


0.10674990184905	


0.060827628404203	


0.10864440196881	


0.075241938184067	


0.089748429805774	


0.092146773260159	


0.13198589081804	


0.072632643088924	


0.055352165502405	


0.11868613333721	


0.12432944705253	


0.063108318254248	


0.082877275768497	


0.065631269051294	


0.081028774802648	


0.047313169775412	


0.062660264775866	


0.043903972606254	


0.067221943779361	


0.16917408372279	


0.058523907232992	


0.10974761049944	


0.10652553744185	


0.16743351637447	


0.1194012256661	


0.11081557375899	


0.083802538159432	


0.073302894036867	


0.062532257026149	


0.10515101628485	


0.068454093297329	


0.056534495277889	


0.12921413052337	


0.067007767970629	


0.088174135587	


0.069070241655271	


0.056617570977147	


0.086057859195192	


0.11290179827442	


0.099265127799485	


0.1024171814154	


0.05213481682309	


0.050354239935038	
decayed learning rate by a factor 0.97 to 0.00060971605649306	


0.14197421515858	


0.088213191166619	


0.076650306127872	


0.12123207977707	


0.068955062843418	


0.10892712992207	


0.086859099697379	


0.09447001227577	


0.080079798025437	


0.089583607727979	


0.066292050577409	


0.12518785379263	


0.11040915054978	


0.10503443593435	


0.10236152009025	


0.078045000791684	


0.085503843296867	


0.097350320012416	


0.099160558629327	


0.080129553501703	


0.058580508099464	


0.08218336597912	


0.07279130608806	


0.078607768753241	


0.070844501528143	


0.067938051048928	


0.07184584776575	


0.10615085744472	


0.060731136299438	


0.10863502739103	


0.075208808199342	


0.089561005225392	


0.092028410898364	


0.13158198641052	


0.072278851440187	


0.0551496817668	


0.11861934823715	


0.12481191170179	


0.06294985542289	


0.082644954344223	


0.065568077197332	


0.080911796414863	


0.047354566516948	


0.062715003545784	


0.043879378919103	


0.067589695448581	


0.16718918870573	


0.058778333203029	


0.10993191603586	


0.10640392387172	


0.16786518118352	


0.11969071200708	


0.11058221958741	


0.083636781012673	


0.073434713188207	


0.062620710159466	


0.10477954236822	


0.068432315900627	


0.056320497486503	


0.12952105228876	


0.066869279493547	


0.08832612064355	


0.068673735229168	


0.056492384446678	


0.085707903988443	


0.11297458472294	


0.099062053372845	


0.10305716156977	


0.051937273925109	


0.050114941624919	
decayed learning rate by a factor 0.97 to 0.00059142457479826	


0.14205033180483	


0.087528359896643	


0.076376041360337	


0.12095525501288	


0.068653174231887	


0.10906363484657	


0.086759916724536	


0.09454857609122	


0.079843849337191	


0.089455237864289	


0.066306517143056	


0.12571589114718	


0.11086574152472	


0.10531731082095	


0.10240246558371	


0.077742555218319	


0.085398922692064	


0.09739925061604	


0.099260447881111	


0.079974498931298	


0.058372212052285	


0.082220841676388	


0.072494708227607	


0.078701185486299	


0.070470713652389	


0.06764701151245	


0.071704820872272	


0.1056625095926	


0.060491691566791	


0.10875917746207	


0.075113646966082	


0.089189591804979	


0.091853746323517	


0.13205375277447	


0.071768766243932	


0.054716661998196	


0.118880011308	


0.12528885821494	


0.062566714881534	


0.082277810174693	


0.065290338714994	


0.080448440603214	


0.0471097209236	


0.062607231940822	


0.043692897078062	


0.067977939042263	


0.16581210783984	


0.059055506575183	


0.10980178434722	


0.10651812322868	


0.16855148832691	


0.11984068898822	


0.11053531975775	


0.083398113106623	


0.073547472624617	


0.062880070676174	


0.10433229149637	


0.068253960425577	


0.056093571035805	


0.13000607604685	


0.06655602120961	


0.088398605419814	


0.068352021003656	


0.05616329235531	


0.08525351394101	


0.11329569100678	


0.098842675948258	


0.10359537824357	


0.051601925671201	


0.049748161249202	
decayed learning rate by a factor 0.97 to 0.00057368183755432	


# Interpreting model training and parameters

# Sampling from trained model

## Reconstructing the first input

In [119]:
function tensor_to_table(tens,ind)
    ind = ind or 1
    local sentence = {}
    for i=1,tens:size(2) do
        table.insert(sentence,index[tens[{ind,i}]])
    end
    return sentence
end

In [None]:
function table_to_string(sentence)
    local s = ''
    for k,w in pairs(sentence) do
        s = s..' '.. w
    end
    return s
end

In [123]:
local tt = tensor_to_table(x)
local ttt = table_to_string(tt)
print(#tt)
print(ttt)

51	
 ashley is in  the bathroom. cassidy went to the bathroom. billy took the lamp in the porch. billy left the lamp in the bedsit. billy joined cassidy in the bathroom. ashley went from the bathroom to the the bedsit because he can't stand billy. where is the the radio ?	


In [67]:
TEMP_SAMPLING = 0

In [157]:
------------------ loading the first element of the bash  -------------------
-- Using the first element of training set to test empirically if the model is working

local tt = tensor_to_table(x)
local ttt = table_to_string(tt)
seed_text = tt
len = #tt


------------------ Computing predictions (log probabilities at each timestep) -------------------
-- 

protos.rnn:evaluate()
-- local current_state
current_state = {}
for L = 1, NUM_LAYERS do
    -- c and h for all layers
    local h_init = torch.zeros(1,RNN_SIZE):double()
    table.insert(current_state,h_init:clone())
    table.insert(current_state,h_init:clone())
end
state_size = #current_state

-- do a few seeded timesteps
if len > 0 then
    -- print('seeding with '.. seed_text)
    print('-----------------------')
    for k,w in pairs(tt) do
        print('"'..w..'"')
        prev_word = torch.Tensor{voc[w]}
        local lst = protos.rnn:forward{prev_word, unpack(current_state)}
        -- lst is a list of [state1,state2,..stateN,output]. We want everything but last piece
        current_state = {}
        for i=1, state_size do table.insert(current_state, lst[i]) end
        prediction = lst[#lst] -- last element holds the log probabilities
    end    
else
    print('please add some seeding text')
end

------------------ samapling / argmaxing over the log probabilities at each timestep -------------------
--

-- start sampling / argmaxing
for i=1, 5 do
    -- log probabilities from the previous timestep
    if TEMP_SAMPLING == 0 then
        -- use argmax 
        local _, prev_word_ = prediction:max(2)
        prev_word = prev_word_:resize(1)
    else
        -- use sampling
        prediction:div(TEMP) -- scale by temperatrue
        local probs = torch.exp(prediction):squeeze()
        probs:div(torch.sum(probs)) -- renormalize so probs sum to one
        prev_word = torch.multinomial(probs:float(), 1):resize(1):float()
    end
    -- forward the nn for next word
    local lst = protos.rnn:forward{prev_word, unpack(current_state)}
    current_state = {}
    for i=1,state_size do table.insert(current_state, lst[i]) end
    prediction = lst[#lst] -- last element holds the log probs
    print(index[prev_word[1]])
end

-----------------------	
"ashley"	
"is"	
"in"	


""	
"the"	
"bathroom."	


"cassidy"	
"went"	
"to"	


"the"	
"bathroom."	
"billy"	


"took"	
"the"	
"lamp"	


"in"	
"the"	
"porch."	
"billy"	
"left"	


"the"	
"lamp"	
"in"	


"the"	
"bedsit."	
"billy"	
"joined"	


"cassidy"	
"in"	
"the"	
"bathroom."	
"ashley"	


"went"	
"from"	


"the"	
"bathroom"	
"to"	
"the"	
"the"	
"bedsit"	
"because"	
"he"	


"can't"	
"stand"	


"billy."	
"where"	
"is"	


"the"	
"the"	


"radio"	
"?"	


### Little debugging

In [90]:
torch.Tensor{voc['billy.']}
print(voc['billy.'])

14	


# Scrap