-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
85 lines (70 loc) · 2.13 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#coding=utf-8
from rl import DDPG
from env import Trans
import os
import time
import csv
import numpy as np
import matplotlib.pyplot as plt
from lib.logger import get_logger
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
V =6
NUM_VEHICLES =20
NUM_MEC = 5 # RSU+BS
MAX_EP_STEPS = V * (NUM_MEC-1)
MEMORY_CAPACITY = 2000
MAX_EPISODES = 1000
env = Trans(NUM_VEHICLES,NUM_MEC)
s_dim = env.states.shape[0]
a_dim = env.action.shape[0]
print(env.states.shape,env.action.shape)
print(env.states.shape[0],env.action.shape[0])
rl=DDPG(a_dim,s_dim)
logger=get_logger('./log',debug=False)
def train():
var=3
plot_re=[]
for i in range(MAX_EPISODES):
s=env.reset()
ep_reward=0
for j in range(MAX_EP_STEPS):
K=int(j/V)
a=rl.choose_action(s,var)
s_,r=env.step(s,a,K)
rl.push(s, a, r, s_)
if rl.pointer>=MEMORY_CAPACITY:
var*=.9998
rl.sample()
s=s_
ep_reward+=r
if j == MAX_EP_STEPS - 1:
logger.info('Episode:{0}, Reward:{1} ,Explore:{2}'.format(i+1,float(ep_reward),float(var)))
plot_re.append(ep_reward)
break
return plot_re
def eval():
rl.load()
while True:
s = env.reset()
for _ in range(200):
a = rl.choose_action(s)
s,r = env.step(s,a)
if __name__=='__main__':
ON_TRAIN=True
plot_re=[]
t1=time.time()
if ON_TRAIN:
plot_re=train()
else:
eval()
with open('result/result_DCRA.csv', 'w',newline='') as csvfile:
result = csv.writer(csvfile, dialect='excel')
result.writerow(['num_vehicles','episodes','reward','Speed'])
for s in range(len(plot_re)):
result.writerow([20,s+1,plot_re[s],30])
plt.plot(np.arange(len(plot_re)), plot_re)
plt.ylabel('Reward')
plt.xlabel('Episodes')
plt.savefig("DCRA.png")
plt.show()
logger.info('Running time:{}'.format(time.time() - t1))