Skip to content

Commit e11e369

Browse files
remove doom memory
1 parent 9977f7f commit e11e369

File tree

6 files changed

+37
-83
lines changed

6 files changed

+37
-83
lines changed

Core/MazeRLWrapper.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ def apply(self, actionIndex):
4242
if self._env.dead: # unreachable due to actions masking
4343
return nextState, -10, True, prevState
4444

45-
if 0.95 <= self._env.score:
45+
if 0.99 <= self._env.score:
4646
return nextState, 0, True, prevState
4747

4848
if self._movingLoop():
49-
return nextState, -5, True, prevState
49+
return nextState, 0, True, prevState
5050

5151
self._done = False
5252
reward = 0.3 if isNewCell else 0 # small reward for visiting new cell

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@
6464

6565
# Идеи и эксперименты
6666

67-
- [ ] Заменить разделение памяти/эпизодов на основные и после попадания в цикл.
6867
- [ ] Реализовать дистилляцию нескольких политик, используя доп. награды или иные методы.
6968
- [ ] Сравнить обученного без учителя агента с обученным с учителем. (500 эпох)
7069
- [ ] Обучить агента, который не получает информацию о своих перемещениях (только с данным об окружении).

distillation.py

Lines changed: 17 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,6 @@ def learn_environment(teacher, model, params):
106106
]
107107

108108
memory = CebPrioritized(maxSize=5000, sampleWeight='abs')
109-
doomMemory = CebLinear(
110-
maxSize=params.get('max steps after loop', 16) * 10000,
111-
sampleWeight='abs'
112-
)
113109
trainableModel, teacherPower = wrapStudentModel(model)
114110
######################################################
115111
def withTeacherPredictions(replay):
@@ -119,24 +115,20 @@ def withTeacherPredictions(replay):
119115

120116
def testModel(EXPLORE_RATE):
121117
for e in environments: e.reset()
122-
replays = Utils.emulateBatch(
123-
environments,
124-
DQNAgent(model, exploreRate=EXPLORE_RATE, noise=params.get('agent noise', 0)),
125-
maxSteps=params.get('max test steps')
126-
)
127-
for replay, _ in replays:
128-
if params.get('clip replay', False):
129-
replay = Utils.clipReplay(replay, loopLimit=LOOP_LIMIT)
130-
if BOOTSTRAPPED_STEPS < len(replay):
131-
memory.addEpisode(withTeacherPredictions(replay), terminated=True)
118+
replays = [replay for replay, _ in Utils.emulateBatch(
119+
environments,
120+
DQNAgent(model, exploreRate=EXPLORE_RATE, noise=params.get('agent noise', 0)),
121+
maxSteps=params.get('max test steps')
122+
)
123+
]
132124

133-
scores = [x.score for x in environments]
134125
################
135-
# collect bad experience
136-
envs = [e for e in environments if e.hitTheLoop]
137-
if envs:
126+
# explore if hit the loop
127+
envsIndexes = [i for i, e in enumerate(environments) if e.hitTheLoop]
128+
if envsIndexes:
129+
envs = [environments[i] for i in envsIndexes]
138130
for e in envs: e.Continue()
139-
replays = Utils.emulateBatch(
131+
exploreReplays = Utils.emulateBatch(
140132
envs,
141133
DQNAgent(
142134
model,
@@ -145,11 +137,13 @@ def testModel(EXPLORE_RATE):
145137
),
146138
maxSteps=params.get('max steps after loop', 16)
147139
)
148-
for replay, _ in replays:
149-
if BOOTSTRAPPED_STEPS < len(replay):
150-
doomMemory.addEpisode(withTeacherPredictions(replay), terminated=True)
140+
for ind, (replay, _) in zip(envsIndexes, exploreReplays):
141+
replays[ind] += replay[1:]
151142
################
152-
return scores
143+
for replay in replays:
144+
if BOOTSTRAPPED_STEPS < len(replay):
145+
memory.addEpisode(withTeacherPredictions(replay), terminated=True)
146+
return [x.score for x in environments]
153147
######################################################
154148
# collect some experience
155149
for _ in range(2):
@@ -181,19 +175,6 @@ def testModel(EXPLORE_RATE):
181175
}
182176
)
183177
print('Avg. train loss: %.4f' % trainLoss)
184-
185-
if BATCH_SIZE < len(doomMemory):
186-
trainLoss = train(
187-
model, trainableModel, doomMemory,
188-
{
189-
'gamma': GAMMA,
190-
'batchSize': BATCH_SIZE,
191-
'steps': BOOTSTRAPPED_STEPS,
192-
'episodes': params['train doom episodes'](epoch),
193-
'alpha': params.get('doom alpha', lambda _: alpha)(epoch)
194-
}
195-
)
196-
print('Avg. train doom loss: %.4f' % trainLoss)
197178
##################
198179
# test
199180
print('Testing...')
@@ -249,13 +230,11 @@ def testModel(EXPLORE_RATE):
249230
'warm up epochs': 0,
250231
'test episodes': 128,
251232
'train episodes': lambda _: 128,
252-
'train doom episodes': lambda _: 32,
253233

254234
'alpha': lambda _: 1,
255235
'explore rate': lambda _: 0,
256236

257237
'agent noise': 0.01,
258-
'clip replay': True,
259238

260239
'explore rate after loop': 0.2,
261240
'agent noise after loop': 0.1,

learn_environment.py

Lines changed: 17 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -5,47 +5,36 @@
55
import Utils
66
import fit_stage
77
import os
8-
from Utils.ExperienceBuffers.CebLinear import CebLinear
98

109
def learn_environment(model, params):
1110
NAME = params['name']
1211
BATCH_SIZE = params['batch size']
1312
GAMMA = params['gamma']
1413
BOOTSTRAPPED_STEPS = params['bootstrapped steps']
15-
LOOP_LIMIT = params['maze']['loop limit']
1614
metrics = {}
1715

1816
environments = [
1917
MazeRLWrapper(params['maze']) for _ in range(params['test episodes'])
2018
]
2119

2220
memory = CebPrioritized(maxSize=5000, sampleWeight='abs')
23-
doomMemory = CebLinear(
24-
maxSize=params.get('max steps after loop', 16) * 1000,
25-
sampleWeight='abs'
26-
)
27-
2821
######################################################
2922
def testModel(EXPLORE_RATE):
3023
for e in environments: e.reset()
31-
replays = Utils.emulateBatch(
32-
environments,
33-
DQNAgent(model, exploreRate=EXPLORE_RATE, noise=params.get('agent noise', 0)),
34-
maxSteps=params.get('max test steps')
35-
)
36-
for replay, _ in replays:
37-
if params.get('clip replay', False):
38-
replay = Utils.clipReplay(replay, loopLimit=LOOP_LIMIT)
39-
if BOOTSTRAPPED_STEPS < len(replay):
40-
memory.addEpisode(replay, terminated=True)
24+
replays = [replay for replay, _ in Utils.emulateBatch(
25+
environments,
26+
DQNAgent(model, exploreRate=EXPLORE_RATE, noise=params.get('agent noise', 0)),
27+
maxSteps=params.get('max test steps')
28+
)
29+
]
4130

42-
scores = [x.score for x in environments]
4331
################
44-
# collect bad experience
45-
envs = [e for e in environments if e.hitTheLoop]
46-
if envs:
32+
# explore if hit the loop
33+
envsIndexes = [i for i, e in enumerate(environments) if e.hitTheLoop]
34+
if envsIndexes:
35+
envs = [environments[i] for i in envsIndexes]
4736
for e in envs: e.Continue()
48-
replays = Utils.emulateBatch(
37+
exploreReplays = Utils.emulateBatch(
4938
envs,
5039
DQNAgent(
5140
model,
@@ -54,11 +43,13 @@ def testModel(EXPLORE_RATE):
5443
),
5544
maxSteps=params.get('max steps after loop', 16)
5645
)
57-
for replay, _ in replays:
58-
if BOOTSTRAPPED_STEPS < len(replay):
59-
doomMemory.addEpisode(replay, terminated=True)
46+
for ind, (replay, _) in zip(envsIndexes, exploreReplays):
47+
replays[ind] += replay[1:]
6048
################
61-
return scores
49+
for replay in replays:
50+
if BOOTSTRAPPED_STEPS < len(replay):
51+
memory.addEpisode(replay, terminated=True)
52+
return [x.score for x in environments]
6253
######################################################
6354
# collect some experience
6455
for _ in range(2):
@@ -86,19 +77,6 @@ def testModel(EXPLORE_RATE):
8677
}
8778
)
8879
print('Avg. train loss: %.4f' % trainLoss)
89-
90-
if BATCH_SIZE < len(doomMemory):
91-
trainLoss = fit_stage.train(
92-
model, doomMemory,
93-
{
94-
'gamma': GAMMA,
95-
'batchSize': BATCH_SIZE,
96-
'steps': BOOTSTRAPPED_STEPS,
97-
'episodes': params['train doom episodes'](epoch),
98-
'alpha': params.get('doom alpha', lambda _: alpha)(epoch)
99-
}
100-
)
101-
print('Avg. train doom loss: %.4f' % trainLoss)
10280
##################
10381
# test
10482
print('Testing...')

train.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,11 @@ def getModel(shape):
5252
'warm up epochs': 0,
5353
'test episodes': 128,
5454
'train episodes': lambda _: 128,
55-
'train doom episodes': lambda _: 32,
5655

5756
'alpha': lambda _: 1,
5857
'explore rate': lambda _: 0,
5958

6059
'agent noise': 0.01,
61-
'clip replay': True,
6260

6361
'explore rate after loop': 0.2,
6462
'agent noise after loop': 0.1

view_maze.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from model import createModel
2121

2222
def createMaze():
23-
sz = 16 * 4
23+
sz = 64
2424
maze = (0.8 < np.random.rand(sz, sz)).astype(np.float32)
2525
res = CMazeEnvironment(
2626
maze=maze,

0 commit comments

Comments
 (0)