diff --git a/models/Atari2013.lua b/models/Atari2013.lua new file mode 100644 index 0000000..0c2200a --- /dev/null +++ b/models/Atari2013.lua @@ -0,0 +1,28 @@ +local nn = require 'nn' +require 'classic.torch' -- Enables serialisation + +local Body = classic.class('Body') + +-- Constructor +function Body:_init(opts) + opts = opts or {} + + self.recurrent = opts.recurrent + self.histLen = opts.histLen + self.stateSpec = opts.stateSpec +end + +function Body:createBody() + -- Number of input frames for recurrent networks is always 1 + local histLen = self.recurrent and 1 or self.histLen + local net = nn.Sequential() + net:add(nn.View(histLen*self.stateSpec[2][1], self.stateSpec[2][2], self.stateSpec[2][3])) -- Concatenate history in channel dimension + net:add(nn.SpatialConvolution(histLen*self.stateSpec[2][1], 16, 8, 8, 4, 4, 1, 1)) + net:add(nn.ReLU(true)) + net:add(nn.SpatialConvolution(16, 32, 4, 4, 2, 2)) + net:add(nn.ReLU(true)) + + return net +end + +return Body diff --git a/run.sh b/run.sh index ab2e408..4f457bd 100755 --- a/run.sh +++ b/run.sh @@ -69,9 +69,9 @@ elif [ "$PAPER" == "demo-async-a3c" ]; then th main.lua -zoom 4 -async A3C -entropyBeta 0.001 -eta 0.0007 -momentum 0.99 -bootstraps 0 -batchSize 5 -hiddenSize 32 -doubleQ false -duel false -optimiser adam -steps 15000000 -tau 4 -memSize 20000 -epsilonSteps 10000 -valFreq 10000 -valSteps 6000 -bootstraps 0 -PALpha 0 "$@" elif [ "$PAPER" == "async-nstep" ]; then # Steps for "1 day" = 80 * 1e6; for "4 days" = 1e9 - th main.lua -env rlenvs.Atari -modelBody models.Atari -game $GAME -height 84 -width 84 -colorSpace y -async NStepQ -bootstraps 0 -batchSize 5 -momentum 0.99 -rmsEpsilon 0.1 -steps 80000000 -duel false -tau 40000 -optimiser sharedRmsProp -epsilonSteps 4000000 -doubleQ false -PALpha 0 -eta 0.0007 -gradClip 0 "$@" + th main.lua -env rlenvs.Atari -modelBody models.Atari2013 -hiddenSize 256 -game $GAME -height 84 -width 84 -colorSpace y -async NStepQ -bootstraps 0 -batchSize 5 -momentum 0.99 -rmsEpsilon 0.1 -steps 80000000 -duel false -tau 40000 -optimiser sharedRmsProp -epsilonSteps 4000000 -doubleQ false -PALpha 0 -eta 0.0007 -gradClip 0 "$@" elif [ "$PAPER" == "async-a3c" ]; then - th main.lua -env rlenvs.Atari -modelBody models.Atari -game $GAME -height 84 -width 84 -colorSpace y -async A3C -bootstraps 0 -batchSize 5 -momentum 0.99 -rmsEpsilon 0.1 -steps 80000000 -duel false -tau 40000 -optimiser sharedRmsProp -epsilonSteps 4000000 -doubleQ false -PALpha 0 -eta 0.0007 -gradClip 0 "$@" + th main.lua -env rlenvs.Atari -modelBody models.Atari2013 -hiddenSize 256 -game $GAME -height 84 -width 84 -colorSpace y -async A3C -bootstraps 0 -batchSize 5 -momentum 0.99 -rmsEpsilon 0.1 -steps 80000000 -duel false -tau 40000 -optimiser sharedRmsProp -epsilonSteps 4000000 -doubleQ false -PALpha 0 -eta 0.0007 -gradClip 0 "$@" # Examples elif [ "$PAPER" == "demo-grid" ]; then