In [1]:
"""
Deep Deterministic Policy Gradient (DDPG), Reinforcement Learning.
P2P network, net bit rate, energy harvesting example for training.
Thanks to : https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow/tree/master/contents/9_Deep_Deterministic_Policy_Gradient_DDPG
Using:
tensorflow 1.0
"""
import math
import tensorflow as tf
import numpy as np
import gym
import time
import EH_P2P
import DDPG_CLASS

np.random.seed(1)
tf.set_random_seed(1)



#####################  hyper parameters  ####################

MAX_EPISODES = 2000
MAX_EP_STEPS = 120
LR_A = 0.0002   # learning rate for actor
LR_C = 0.0002  # learning rate for critic
GAMMA = 0.999    # reward discount
REPLACEMENT = [
    dict(name='soft', tau=0.01),
    dict(name='hard', rep_iter_a=600, rep_iter_c=500)
][0]            # you can try different target replacement strategies
MEMORY_CAPACITY = 40000
BATCH_SIZE = 80

OUTPUT_GRAPH = False
    
env=EH_P2P.EH_P2P()
env.Chanpower()
env.Solarread()
    
state_dim = 3 #channel,battery，solar
action_dim = 1 #Transmission power
action_bound = 1 [0,1]

tip=1
tip2=1
snr=-10

for temp in range(1):
   #for snr in range(-10,10,2):
    for modulation in range(1):
        var = 10

        tf.reset_default_graph()
        sess = tf.Session()
        with tf.name_scope('S'):
            S = tf.placeholder(tf.float32, shape=[None, state_dim], name='s')
        with tf.name_scope('R'):
            R = tf.placeholder(tf.float32, [None, 1], name='r')
        with tf.name_scope('S_'):
            S_ = tf.placeholder(tf.float32, shape=[None, state_dim], name='s_')
        DDPG_CLASS.S=S
        DDPG_CLASS.R=R
        DDPG_CLASS.S_=S_
        actor = DDPG_CLASS.Actor(sess, action_dim, action_bound, LR_A, REPLACEMENT)
        critic = DDPG_CLASS.Critic(sess, state_dim, action_dim, LR_C, GAMMA, REPLACEMENT, actor.a, actor.a_)
        actor.add_grad_to_graph(critic.a_grads)
        M = DDPG_CLASS.Memory(MEMORY_CAPACITY, dims=2 * state_dim + action_dim + 1)


        sess.run(tf.global_variables_initializer())
        saver=tf.train.Saver(max_to_keep=100)

        if OUTPUT_GRAPH:
            tf.summary.FileWriter("logs/", sess.graph)

        print("modulation=",modulation,"snr=",snr)

        for i in range(MAX_EPISODES):
    
            s = env.reset_P2P(snr)

            ep_reward = 0
            for j in range(MAX_EP_STEPS):
                a = actor.choose_action(s)
                a = np.random.normal(a, var)
                a=np.clip(a,0,1)
                s_, r, info = env.step_P2P([a,modulation])#input modulation 0:qpsk,1:8psk,2:16qam

                M.store_transition(s, a, r , s_)

                if M.pointer > MEMORY_CAPACITY:
                    #tip and tip2 are only for printing`#
                    if tip == 1:
                        print("memory full",j,i)
                        tip=0
                    var *= 0.9995  # decay the action randomness
                    if tip2 == 1 and var<0.00000001:
                        print("var zero",j,i)
                        tip2=0
                        
                    b_M = M.sample(BATCH_SIZE)
                    b_s = b_M[:, :state_dim]
                    b_a = b_M[:, state_dim: state_dim + action_dim]
                    b_r = b_M[:, -state_dim - 1: -state_dim]
                    b_s_ = b_M[:, -state_dim:]

                    critic.learn(b_s, b_a, b_r, b_s_)
                    actor.learn(b_s)

                s = s_
                
                ep_reward += r


            if i % 30 == 0 :
                print("net bit rate=",r,"action",a, "solar,channel,battery",s,"epoch",i)
                print("ave_reward",ep_reward/(j+1))
                
                
                
        save_path = saver.save(sess, "folder_for_nn_noise"+"/EH_save_net_snr="+str(snr)+str(modulation)+"epoch="+str(i)+"_P2P.ckpt")
        print("Save to path: ", save_path)

print("----------------------------END--------------------------------")

modulation= 0 snr= -10
net bit rate= 70.61435492552498 action [1.] solar,channel,battery [array([-0.031192]) 1.281355781629867 array([0.193472])]
ave_reward 10494.200864050617
Save to path:  folder_for_nn_noise/EH_save_net_snr=-100epoch=0_1way.ckpt
10582.387425933395 0

net bit rate= 167221.96207069396 action [1.] solar,channel,battery [array([-0.031192]) 1.281355781629867 array([0.193472])]
ave_reward 8965.952927497052
Save to path:  folder_for_nn_noise/EH_save_net_snr=-100epoch=30_1way.ckpt
9041.297069744927 30

net bit rate= 0.0 action [0.] solar,channel,battery [array([-0.031192]) 1.281355781629867 array([0.649872])]
ave_reward 13271.149015774807
Save to path:  folder_for_nn_noise/EH_save_net_snr=-100epoch=60_1way.ckpt
13382.67127641157 60

net bit rate= 10.269756120306216 action [0.945073] solar,channel,battery [array([-0.031192]) 1.281355781629867 array([0.20963328])]
ave_reward 5678.144522168757
Save to path:  folder_for_nn_noise/EH_save_net_snr=-100epoch=90_1way.ckpt
5725.86002

Save to path:  folder_for_nn_noise/EH_save_net_snr=-100epoch=960_1way.ckpt
15505.497182828178 960

net bit rate= 0.0 action [3.33861044e-08] solar,channel,battery [array([-0.031192]) 1.281355781629867 array([1.91429806])]
ave_reward 13802.418081894568
Save to path:  folder_for_nn_noise/EH_save_net_snr=-100epoch=990_1way.ckpt
13918.40478846511 990

net bit rate= 0.0 action [1.63068861e-11] solar,channel,battery [array([-0.031192]) 1.281355781629867 array([1.91554045])]
ave_reward 13485.683795123145
Save to path:  folder_for_nn_noise/EH_save_net_snr=-100epoch=1020_1way.ckpt
13599.00886903174 1020

net bit rate= 0.0 action [8.8798157e-11] solar,channel,battery [array([-0.031192]) 1.281355781629867 array([1.91564956])]
ave_reward 13901.223029812045
Save to path:  folder_for_nn_noise/EH_save_net_snr=-100epoch=1050_1way.ckpt
14018.040030062566 1050

net bit rate= 0.0 action [3.80107966e-13] solar,channel,battery [array([-0.031192]) 1.281355781629867 array([1.91572262])]
ave_reward 13849.2519

Save to path:  folder_for_nn_noise/EH_save_net_snr=-100epoch=1950_1way.ckpt
12365.752072173298 1950

net bit rate= 0.0 action [1.40744446e-42] solar,channel,battery [array([-0.031192]) 1.281355781629867 array([1.9187159])]
ave_reward 13219.6861141693
Save to path:  folder_for_nn_noise/EH_save_net_snr=-100epoch=1980_1way.ckpt
13330.775913448033 1980

net bit rate= 0.0 action [1.49245698e-43] solar,channel,battery [array([0.035152]) -0.42827632965626994 array([0.245816])] 0.0
net bit rate= 0.0 action [1.40262856e-12] solar,channel,battery [array([0.050768]) -0.6728105373876961 array([0.380968])] 0.0
net bit rate= 1.691062904128862e-118 action [1.] solar,channel,battery [array([0.063544]) -0.8609668415980516 array([0.150768])] 5.636876347096206e-119
net bit rate= 0.0 action [1.] solar,channel,battery [array([0.0804]) -0.9538684419873621 array([0.163544])] 4.227657260322155e-119
net bit rate= 0.0 action [1.] solar,channel,battery [array([0.12696]) -0.940146197845513 array([0.1804])] 3.3821

net bit rate= 0.0 action [1.63935587e-10] solar,channel,battery [array([0.608032]) 0.28186701229864086 array([2.583328])] 12590.535717190172
net bit rate= 199998.26253519946 action [1.] solar,channel,battery [array([0.79216]) 0.16644346233101426 array([0.708032])] 15267.788957447448
net bit rate= 0.0 action [2.7429523e-43] solar,channel,battery [array([0.687136]) 0.015391263909440678 array([1.600192])] 15052.74967635664
net bit rate= 0.0 action [1.70205514e-17] solar,channel,battery [array([0.61024]) -0.15933629465604893 array([2.387328])] 14843.683708629464
net bit rate= 198511.91921287775 action [1.] solar,channel,battery [array([0.680232]) -0.34469881757705934 array([0.71024])] 17359.686934715057
net bit rate= 0.0 action [2.55374013e-34] solar,channel,battery [array([0.43556]) -0.5246418837900655 array([1.490472])] 17125.09657073242
net bit rate= 82.93793644838571 action [1.] solar,channel,battery [array([0.426896]) -0.6794080484964884 array([0.53556])] 16897.86778894197
net bit rat