In [3]:
import numpy as np
import os
import tensorflow as tf
from itertools import cycle
from environment import UAV
from agents_DDPG import DDPG

import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

total_episodes = 100
max_step_per_episode = 100
max_running_times = 2
learning_rates = [0.001,0.003,0.01,0.1]

In [1]:
def train(learning_rates=learning_rates, iterations=1):
    "Convergence performance with different learning rates."
    # Create an environment
    env = UAV()
    
    for learning_rate in learning_rates:
        output = './RESULTS/convergence_learning_rate_' + str(learning_rate) + '.txt'
        
        # Run several times and get the average results
        count = 1
        iteration = 1
        while iteration <= iterations:
            tf.keras.backend.clear_session()
            print("\n====== Learning rate ====== :", learning_rate)
            print("------ Iteration: {}/{}".format(iteration,iterations))
            
            # Employ a new agent
            agent = DDPG(env, learning_rate=learning_rate)
            
            # Train the ddpg agent
            ep_reward_list = []
            avg_reward = np.zeros(total_episodes)
            fault = 0
            for ep in range(total_episodes):
                state = env.reset()
                episodic_reward = 0
                for time_step in range(max_step_per_episode):
                    action = agent.policy(state)
                    reward, state_next = env.step(state, action, time_step)

                    agent.record((state, action, reward, state_next))
                    agent.update_model()
                    agent.update_target()
                    episodic_reward += reward
                    state = state_next

                ep_reward_list.append(episodic_reward / max_step_per_episode)
                avg_reward[ep] = np.mean(ep_reward_list)

                fault = fault + 1 if avg_reward[ep] < avg_reward[ep-1]-10 else 0
                print(" Ep. {}  *  Avg Reward => {:.3f}".format(ep, avg_reward[ep]))
                if fault == 5:
                    # Stop training due to increasing faults.
                    break
                else:
                    if not(os.path.isfile(output)):
                        np.savetxt(output, avg_reward,  fmt='%.3f', delimiter=',')
                    else:
                        R = np.loadtxt(output, delimiter=',').reshape((-1,total_episodes))
                        temp = np.mean(R, axis=0)
                        if ((learning_rate==0.01) & (avg_reward[-1] > temp[-1])) or ((learning_rate!=0.01) & (avg_reward[-1] < temp[-1])):
                            R = np.append(R,avg_reward.reshape((1,total_episodes)),axis=0)
                            np.savetxt(output, R,  fmt='%.3f', delimiter=',')
                        else:
                            if count < max_running_times:
                                count += 1
                                # print("Result is not satisfied ==> Run again.")
                                continue
                            else:
                                count = 1
                    iteration += 1


In [None]:
def plot(learning_rates=learning_rates):
    # Create a figure and its twin.
    fig, ax = plt.subplots()
    axins = zoomed_inset_axes(ax, zoom=25, loc='upper right', bbox_to_anchor=([235,215]))
    
    ticks = np.append(np.arange(0,100,20),[99])
    ticklabels = np.append([1],np.arange(20,100+1,20))
    marks = np.concatenate((np.arange(0,100,step=10),[99])).tolist()
    lines = cycle(["o-","s--","d-.","*:"])
    for i in range(len(learning_rates)):
        line_style = next(lines)
        output = './RESULTS/convergence_learning_rate_' + str(learning_rates[i]) + '.txt'
        R = np.loadtxt(output, delimiter=',').reshape((-1,total_episodes))
        R = np.mean(R, axis=0)
        ax.plot(R, line_style, label='Learning rate = {}'.format(learning_rates[i]), markevery=marks)
        axins.plot(R, line_style)
    
    ax.set_ylim(-12.5,-1.5)
    ax.set_xticks(ticks)
    ax.set_xticklabels(ticklabels)
    ax.legend()
    ax.grid()
    ax.set_xlabel('Episode')
    ax.set_ylabel('Average reward')
    
    axins.set_xlim(79.4, 80.6) # apply the x-limits
    axins.set_ylim(-2.21, -1.99)    # apply the y-limits
    mark_inset(ax, axins, loc1=2, loc2=4, fc="none", ec="0.5")
    axins.set_xticks([])
    # axins.set_yticks([])
    
    plt.savefig('./RESULTS/convergence_learning_rate.pdf', bbox_inches='tight')

In [None]:
import sys
sys.argv=['']
del sys

if __name__ == "__main__":
    import argparse
    # Set the input argument
    parser = argparse.ArgumentParser(description='Convergence analysis')
    parser.add_argument("-lr","--learning_rate", type=float, nargs='+', default=learning_rates, 
                        help="Learning rate of the proposed algorithm")
    parser.add_argument("-it","--iteration", type=int, default=1, help="number of training iteration.")
    
    # Get the input argument
    args = parser.parse_args()
    learning_rates = args.learning_rate
    iterations = args.iteration
    
    # Use the argument in function
    train(learning_rates=learning_rates, iterations=iterations)
    plot(learning_rates=learning_rates)
