-
Notifications
You must be signed in to change notification settings - Fork 8
/
transit_simulator.py
146 lines (108 loc) · 5.07 KB
/
transit_simulator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import random
import time
from selenium.common.exceptions import NoSuchElementException
from pyfiction.games.Transit.transit import Transit
from pyfiction.simulators.html_simulator import HTMLSimulator
from pyfiction.simulators.simulator import UnknownEndingException
class TransitSimulator(HTMLSimulator):
# the maximum number of steps the agent should take before we interrupt him to break infinite cycles
max_steps = 100
# the recommended number of random game walkthroughs for vocabulary initialization
# should ideally cover all possible states and used words
initialization_iterations = 256
# if the game rewards are in e.g. [-30, 30], set the reward scale to 30 so that the result is in [-1, 1]
reward_scale = 20
def __init__(self, shuffle_actions=True):
super().__init__(Transit, shuffle_actions=shuffle_actions)
self.actions = []
# the game loops infinitely if a same action is selected twice, we remove these actions to avoid infinite cycles
self.actions_history = []
def restart(self):
super().restart()
self.actions_history = []
def write(self, action_index):
action = self.actions[action_index][1]
action_text = self.actions[action_index][0]
# print('choosing', action_text)
# action = self.driver.find_elements_by_link_text(action_text)[-1]
self.actions_history.append(action_text)
action.click()
def read(self, tries=0, max_tries=10):
try:
# text is always in the last passage div:
last_state = self.driver.find_elements_by_class_name("passage")[-1]
text = last_state.text
self.actions = []
self.actions += [(action.text, action) for action in last_state.find_elements_by_tag_name("a") if
action.text]
had_actions = len(self.actions) > 0
# filter the already used actions (workaround for the game bug that causes infinite loops)
self.actions = [action for action in self.actions if action[0] not in self.actions_history]
reward = -0.1
if had_actions and not self.actions:
print('WARNING: Transit simulator removed all actions because of a game bug! Ending with a 0 reward.')
return '', [], 0
if not self.actions:
ending = text.lower()
# tackled a man, found the friend
if ending.startswith('if anyone can help you'):
reward = 10
# death by poisoning
elif ending.startswith('you buy one more can'):
reward = -20
# tackled the correct man
elif ending.startswith('even though it was just in-passing'):
reward = 20
# shot by the security
elif ending.startswith('you make swift use of'):
reward = -20
# jail
elif ending.startswith('the guards know'):
reward = -10
# jail
elif ending.startswith('as you predicted'):
reward = -10
# death in a foreign country
elif ending.endswith('you close your eyes and submit to death.'):
reward = -20
# jail in a foreign country
elif ending.startswith('you\'re in a country'):
reward = -10
# escaped to the plane with the help of energy drinks
elif ending.startswith('through the haze of the drinks'):
reward = 10
# ended in a jail with the help of energy drinks
elif ending.startswith('while the last parts of your mind untouched'):
reward = -10
else:
raise UnknownEndingException('Unknown ending text, cannot assign reward: ', ending)
elif self.shuffle_actions:
random.shuffle(self.actions)
except (UnknownEndingException, NoSuchElementException, IndexError) as e:
if tries == 0:
print('WARNING, simulator exception:', e)
if tries < max_tries:
print('Trying to read again after a short wait, try', tries + 1, 'out of', max_tries)
time.sleep(0.1)
return self.read(tries=tries + 1)
else:
raise e
return text, [action[0] for action in self.actions], reward
if __name__ == '__main__':
simulator = TransitSimulator()
for i in range(16):
while True:
# try:
state, actions, reward = simulator.read()
# except Exception as e:
# print(e)
print(state)
print('actions: ', actions)
print(reward)
print('-----------------------------')
if not actions:
break
action = random.randint(0, len(actions) - 1)
simulator.write(action)
simulator.restart()
simulator.close()