-
Notifications
You must be signed in to change notification settings - Fork 384
/
8_3_GridWorld.py
125 lines (115 loc) · 4.38 KB
/
8_3_GridWorld.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#%%
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np
import random
import itertools
import scipy.misc
import matplotlib.pyplot as plt
class gameOb():
def __init__(self,coordinates,size,intensity,channel,reward,name):
self.x = coordinates[0]
self.y = coordinates[1]
self.size = size
self.intensity = intensity
self.channel = channel
self.reward = reward
self.name = name
class gameEnv():
def __init__(self,size):
self.sizeX = size
self.sizeY = size
self.actions = 4
self.objects = []
a = self.reset()
plt.imshow(a,interpolation="nearest")
def reset(self):
self.objects = []
hero = gameOb(self.newPosition(),1,1,2,None,'hero')
self.objects.append(hero)
goal = gameOb(self.newPosition(),1,1,1,1,'goal')
self.objects.append(goal)
hole = gameOb(self.newPosition(),1,1,0,-1,'fire')
self.objects.append(hole)
goal2 = gameOb(self.newPosition(),1,1,1,1,'goal')
self.objects.append(goal2)
hole2 = gameOb(self.newPosition(),1,1,0,-1,'fire')
self.objects.append(hole2)
goal3 = gameOb(self.newPosition(),1,1,1,1,'goal')
self.objects.append(goal3)
goal4 = gameOb(self.newPosition(),1,1,1,1,'goal')
self.objects.append(goal4)
state = self.renderEnv()
self.state = state
return state
def moveChar(self,direction):
# 0 - up, 1 - down, 2 - left, 3 - right
hero = self.objects[0]
heroX = hero.x
heroY = hero.y
if direction == 0 and hero.y >= 1:
hero.y -= 1
if direction == 1 and hero.y <= self.sizeY-2:
hero.y += 1
if direction == 2 and hero.x >= 1:
hero.x -= 1
if direction == 3 and hero.x <= self.sizeX-2:
hero.x += 1
self.objects[0] = hero
def newPosition(self):
iterables = [ range(self.sizeX), range(self.sizeY)]
points = []
for t in itertools.product(*iterables):
points.append(t)
currentPositions = []
for objectA in self.objects:
if (objectA.x,objectA.y) not in currentPositions:
currentPositions.append((objectA.x,objectA.y))
for pos in currentPositions:
points.remove(pos)
location = np.random.choice(range(len(points)),replace=False)
return points[location]
def checkGoal(self):
others = []
for obj in self.objects:
if obj.name == 'hero':
hero = obj
else:
others.append(obj)
for other in others:
if hero.x == other.x and hero.y == other.y:
self.objects.remove(other)
if other.reward == 1:
self.objects.append(gameOb(self.newPosition(),1,1,1,1,'goal'))
else:
self.objects.append(gameOb(self.newPosition(),1,1,0,-1,'fire'))
return other.reward,False
return 0.0,False
def renderEnv(self):
#a = np.zeros([self.sizeY,self.sizeX,3])
a = np.ones([self.sizeY+2,self.sizeX+2,3])
a[1:-1,1:-1,:] = 0
for item in self.objects:
a[item.y+1:item.y+item.size+1,item.x+1:item.x+item.size+1,item.channel] = item.intensity
b = scipy.misc.imresize(a[:,:,0],[84,84,1],interp='nearest')
c = scipy.misc.imresize(a[:,:,1],[84,84,1],interp='nearest')
d = scipy.misc.imresize(a[:,:,2],[84,84,1],interp='nearest')
a = np.stack([b,c,d],axis=2)
return a
def step(self,action):
self.moveChar(action)
reward,done = self.checkGoal()
state = self.renderEnv()
return state,reward,done