-
Notifications
You must be signed in to change notification settings - Fork 0
/
environment.py
284 lines (227 loc) · 11.2 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
import numpy as np
import gym
from gym import Env
from gym.envs.registration import EnvSpec
import pickle
import codecs
import logging
import torch
from models.model import StateEncodeNetwork, StateEncodeNetworkFixed
from models.model_baseline import RNN4RecNetwork
from run import Parameter
from copy import deepcopy
d = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class MobilityEnv(Env):
"""
Environment for human mobility prediction simulated by static data
"""
def __init__(self, l2_dist_scale=Parameter.l2_dist_scale):
super(MobilityEnv, self).__init__()
self.action_space = Action.space
if Parameter.algorithm == 'SAC':
self.action_space = ContinuousAction.space
self.observation_space = State.space
self.spec = EnvSpec('MobilityEnv-v0')
self.dataset = Dataset(Parameter.dataset,
Parameter.loc_emb_name,
Parameter.tim_emb_name,
Parameter.uid_emb_name)
self.ground_truth_pid = []
self.ground_truth_district = []
self.ground_truth_tid = []
self.history_pids = []
self.history_pids_emb = []
self.baseline_history = []
self.uid = None
dim_emb = Parameter.loc_emb_size
self.state_encode_net_fixed = StateEncodeNetworkFixed(dim_emb).to(d)
self.baseline_net = RNN4RecNetwork().to(d)
pretrained_model_dict = torch.load('data/'+Parameter.dataset+'/cp/'+Parameter.prt_model_name)
baseline_model_dict = torch.load('data/'+Parameter.dataset+'/cp/'+Parameter.bsl_model_name)
fixed_net_dict = self.state_encode_net_fixed.state_dict()
fixed_net_new_dict = {k: v for k, v in pretrained_model_dict.items(
) if k in fixed_net_dict.keys()}
baseline_net_dict = self.baseline_net.state_dict()
baseline_net_new_dict = {k: v for k, v in baseline_model_dict.items(
) if k in baseline_net_dict.keys()}
self.state_encode_net_fixed.load_state_dict(fixed_net_new_dict)
self.state_encode_net_fixed.eval()
self.baseline_net.load_state_dict(baseline_net_new_dict)
self.baseline_net.eval() # for disabling bn and dropout
self.l2_dist_scale = l2_dist_scale
self.step_num = 0
self.last_pid = 0
self.batch_size = 50
self.count = 0
self.is_eval = False
self.batch_baseline = []
def set_eval(self):
self.dataset.set_eval()
self.is_eval = True
def set_train(self):
self.is_eval = False
def step(self, action):
# step and init
self.step_num += 1
this_step_target_pid = self.ground_truth_pid[self.step_num-1]
this_step_target_pid_emb = np.array(self.dataset.id_to_loc_emb(this_step_target_pid))
this_step_target_dist_id = self.ground_truth_district[self.step_num-1]
info = {'ccorrect_bsl': 0,
'ccorrect_ours': 0,
'pcorrect_bsl': 0,
'pcorrect_ours': 0,
'l2dist_bsl': 0,
'l2dist_ours': 0,
'action_bsl': 0,
'action_ours': 0,
'grdtruth':0,
}
# done
done = True if self.step_num == len(self.ground_truth_pid) else False
# state encode
self.history_pids_emb.append(self.dataset.id_to_loc_emb(action))
self.history_pids.append(action)
hx, cx = self.state_encode_net_fixed.init_hidden_states()
self.history_pids_emb_v = torch.FloatTensor([self.history_pids_emb]) # unsqueeze
traj_emb, _ = self.state_encode_net_fixed(self.history_pids_emb_v, hx, cx)
traj_emb = traj_emb[0]
if not done:
next_target_time = self.ground_truth_tid[self.step_num]
next_target_time_emb = self.dataset.id_to_time_emb(next_target_time)
next_target_time_emb = torch.FloatTensor(next_target_time_emb).to(d)
state = torch.cat((traj_emb, next_target_time_emb), 0)
state = torch.cat((state, self.uid_emb), 0) if self.uid is not None else state
else:
next_target_time = 0
next_target_time_emb = torch.zeros(Parameter.loc_emb_size).to(d)
state = torch.cat((traj_emb, next_target_time_emb), 0)
state = torch.cat((state, self.uid_emb), 0) if self.uid is not None else state
# reward
# reward definition
def reward_calculation(a, obj):
"""r: reward, a: action"""
# precision score
r = Parameter.precision_score if this_step_target_pid == a else 0
info['pcorrect_'+obj] = 1 if this_step_target_pid == a else 0
# category score
if r == 0:
r = Parameter.category_score if this_step_target_dist_id == self.dataset.get_district_id(a) else 0
info['ccorrect_'+obj] = 1 if this_step_target_dist_id == self.dataset.get_district_id(a) else 0
# l2 dist punishment
a_emb = np.array(self.dataset.id_to_loc_emb(a))
l2_dist = np.linalg.norm(a_emb - this_step_target_pid_emb) # compute l2 distance
scaled_l2_dist = self.l2_dist_scale * l2_dist
info['l2dist_'+obj] = scaled_l2_dist
r -= scaled_l2_dist
# other rules
if obj == 'bsl':
info['action_bsl'] = a
else:
info['action_ours'] = a
return r
# RL reward
rl_reward = reward_calculation(action, 'ours')
# Baseline reward
baseline_history_v = torch.LongTensor([self.baseline_history]).to(d)
target_time = self.ground_truth_tid[self.step_num-1]
target_time_v = torch.LongTensor([target_time]).to(d)
with torch.no_grad():
dl_pid_logits = self.baseline_net(baseline_history_v, target_time_v, self.uid)
_, idx = dl_pid_logits.data.topk(1)
baseline_action = int(idx.cpu().numpy()[0][0])
baseline_reward = reward_calculation(baseline_action, 'bsl')
self.baseline_history.append(baseline_action)
self.count += 1
# final reward
reward = rl_reward - baseline_reward if self.is_eval or Parameter.reward_use_bsl else rl_reward
self.last_pid = action
info['grdtruth'] = this_step_target_pid
state = state.cpu().numpy() if Parameter.debug['mode'] != 'end2end' else state
return (state, reward, done, info)
def reset(self):
# init
self.step_num = 0
init_data = self.dataset.get(self.is_eval)
self.ground_truth_pid = deepcopy(init_data['pid_label'][0])
self.ground_truth_district = deepcopy(init_data['districts_label'][0])
self.ground_truth_tid = deepcopy(init_data['tid_label'][0])
self.history_pids = deepcopy(init_data['present_pid'][0])
self.history_pids_emb = []
self.baseline_history = deepcopy(init_data['present_pid'][0])
self.uid = deepcopy(init_data['uids']) if Parameter.use_user else None
self.uid_emb = self.dataset.id_to_uid_emb(self.uid[0]) if self.uid is not None else None
self.uid = torch.LongTensor(self.uid).to(d) if Parameter.use_user else None
self.uid_emb = torch.FloatTensor(self.uid_emb).to(d) if self.uid_emb is not None else None
self.last_pid = self.history_pids[-1]
for h_pid in self.history_pids:
self.history_pids_emb.append(self.dataset.id_to_loc_emb(h_pid))
# state encode
hx, cx = self.state_encode_net_fixed.init_hidden_states()
self.history_pids_emb_v = torch.FloatTensor([self.history_pids_emb]) # unsqueeze
traj_emb, _ = self.state_encode_net_fixed(self.history_pids_emb_v, hx, cx)
traj_emb = traj_emb[0]
target_time_emb = self.dataset.id_to_time_emb(self.ground_truth_tid[self.step_num])
target_time_emb = torch.FloatTensor(target_time_emb).to(d)
init_state = torch.cat((traj_emb, target_time_emb), 0)
init_state = torch.cat((init_state, self.uid_emb), 0) if self.uid is not None else init_state
init_state = init_state.cpu().numpy() if Parameter.debug['mode'] != 'end2end' else init_state
return init_state
def render(self, mode='human'):
""" no screen for Human Mobility env to render """
print("[not supporting rendering]")
raise NotImplementedError
def rank_and_select_loc(self, action):
"""rank the scores of pids and select the argmax (or by other stretegies)"""
max_pid = 0
max_score = -1e6
score_list = []
for pid, loc_emb in enumerate(self.dataset.loc_emb_data):
score = np.dot(action, loc_emb)
if score > max_score:
max_pid = pid + 1 - 1
max_score = score + 1 - 1
score_list.append(score)
return max_pid
class Dataset:
def __init__(self, dataset_name: str, loc_emb_name: str, time_emb_name: str, uid_emb_name: str):
self.data_train = pickle.load(open('data/'+dataset_name+'/bsl/train.pk', 'rb'), encoding='iso-8859-1')
self.data_test = pickle.load(open('data/'+dataset_name+'/bsl/test.pk', 'rb'), encoding='iso-8859-1')
print('{} size: {}, {}'.format(dataset_name, len(self.data_train), len(self.data_test)))
self.emb_data = torch.load('data/'+dataset_name+'/cp/'+Parameter.bsl_model_name)
self.time_emb_data = self.emb_data['emb_time.weight'].cpu().numpy()
self.loc_emb_data = self.emb_data['emb_loc.weight'].cpu().numpy()
self.pid_district_map = pickle.load(open('data/'+dataset_name+'/pid_district_map.pk', 'rb'), encoding='iso-8859-1')
if Parameter.use_user:
self.uid_emb_data = self.emb_data['emb_uid.weight'].cpu().numpy()
self.i = 0 # idx
def get(self, is_eval: bool=False):
"""randomly choose an index < 80% in training, and > 80% in eval"""
self.i = np.random.randint(low=0, high=len(self.data_train)) if not is_eval else self.i + 1
return self.data_test[self.i] if is_eval else self.data_train[self.i]
def set_eval(self):
self.i = 0
def id_to_loc_emb(self, id:int):
return self.loc_emb_data[id]
def id_to_time_emb(self, id:int):
return self.time_emb_data[id]
def id_to_uid_emb(self, id:int):
return self.uid_emb_data[id]
def get_district_id(self, pid:int):
return self.pid_district_map[pid]
def is_eval_finished(self):
return True if self.i >= len(self.data_test) - 1 else False
class Action:
"""discrete action space: all pids"""
num_action = Parameter.loc_size
space = gym.spaces.Discrete(n=num_action)
class ContinuousAction:
"""continuous action space: dim == loc_emb_size"""
dim_action = Parameter.loc_emb_size
space = gym.spaces.Box(
low=-np.ones(dim_action), high=np.ones(dim_action))
class State:
"""trajectory representation + target tid emb"""
dim_state = Parameter.traj_emb_size + Parameter.loc_emb_size # traj_emb + time_emb
dim_state += Parameter.uid_emb_size if Parameter.use_user else 0
space = gym.spaces.Box(low=-np.ones(dim_state) * np.inf,
high=np.ones(dim_state) * np.inf)