In [16]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
import seaborn as sns
sns.set_color_codes()
import pandas as pd
import numpy as np
import os
from matplotlib.ticker import FuncFormatter

In [15]:
base_dir = '/Users/tchu/Documents/rl_test/deeprl_dist/data/training/'
plot_dir = base_dir + 'plots/'
if not os.path.exists(plot_dir):
    os.mkdir(plot_dir)
algo_names = ['IA2C', 'ConseNet', 'FPrint', 'DIAL', 'CommNet', 'NeurComm']
algo_labels = ['ia2c', 'ia2c_cu', 'ia2c_fp', 'ma2c_dial', 'ma2c_cnet', 'ma2c_nc']
color_cycle = sns.color_palette()
TRAIN_STEP = 1e6

In [10]:
machine_map = {'ia2c': {0.9: 'gamma', 1.0: 'alpha'}}
window = 100
def millions(x, pos):
    return '%1.1fM' % (x*1e-6)
def plot_single_train_curve(algo_i):
    data = []
    alphas = [0.9, 1.0]
    for alpha in alphas:
        data_dir = base_dir + ('%s_%.1f.csv' % (algo_labels[algo_i], alpha))
#         if algo_i == 5 and alpha == 1.0:
#             data_dir = data_dir[:-4] + '_b.csv'
        data.append(pd.read_csv(data_dir))
    plt.figure()

    colors = 'br'
    for i, alpha in enumerate(alphas):
        df = data[i]
        print('%s, alpha=%.1f, avg R_bar of last 50 episodes: %.2f' % (algo_names[algo_i], alpha, np.mean(df.avg_reward.values[-50:])))
        # use moving avg to make plot smoother
        x_mean = df.avg_reward.rolling(window).mean().values
        x_std = df.std_reward.rolling(window).mean().values
        plt.plot(df.step.values, x_mean, color=colors[i], linewidth=3, label=r'$\alpha$=%.1f' % (alpha))
        plt.fill_between(df.step.values, x_mean - x_std, x_mean + x_std, facecolor=colors[i], edgecolor='none', alpha=0.1)

    plt.xlim([0, TRAIN_STEP])
    plt.ylim([-375, -125])

    formatter = FuncFormatter(millions)
    plt.gca().xaxis.set_major_formatter(formatter)
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    plt.grid(True, which='both')
#     plt.xlabel('Training step', fontsize=20)
#     plt.ylabel('Average episode reward', fontsize=20)
    plt.legend(loc='lower right', fontsize=18)
    plt.tight_layout()
    plt.savefig(plot_dir + ('/%s_train_new.pdf' % algo_labels[algo_i]))
    plt.close()
    
for i in range(6):
    plot_single_train_curve(i)

IA2C, alpha=0.9, avg R_bar of last 50 episodes: -163.47
IA2C, alpha=1.0, avg R_bar of last 50 episodes: -202.42
ConseNet, alpha=0.9, avg R_bar of last 50 episodes: -188.64
ConseNet, alpha=1.0, avg R_bar of last 50 episodes: -199.54
FPrint, alpha=0.9, avg R_bar of last 50 episodes: -169.98
FPrint, alpha=1.0, avg R_bar of last 50 episodes: -178.53
DIAL, alpha=0.9, avg R_bar of last 50 episodes: -251.94
DIAL, alpha=1.0, avg R_bar of last 50 episodes: -214.48
CommNet, alpha=0.9, avg R_bar of last 50 episodes: -247.62
CommNet, alpha=1.0, avg R_bar of last 50 episodes: -169.78
NeurComm, alpha=0.9, avg R_bar of last 50 episodes: -228.49
NeurComm, alpha=1.0, avg R_bar of last 50 episodes: -145.93


In [45]:
comp_data = []
# algos = ['ia2c', 'ma2c_cnet']
algos = ['ia2c', 'ia2c_cu']
# alphas = [0.9, 1.0]
alphas = [0.9, 0.95]
alpha_labels = {0.8:'0.8', 0.6:'0.6', 0.9:'0.9', 0.95:'0.95'}
for algo in algos:
    data = []
    for alpha in alphas:
        data_dir = base_dir + ('%s_%s.csv' % (algo, alpha_labels[alpha]))
        data.append(pd.read_csv(data_dir))
    comp_data.append(data)

# plt.figure(figsize=(8,6.5))

for j, alpha in enumerate(alphas):
    for i, algo in enumerate(algos):
        df = comp_data[i][j]
        print('%s, alpha=%.1f, avg R_bar of last 50 episodes: %.2f' % (algo, alpha, np.mean(df.avg_reward.values[-50:])))
        x_mean = df.avg_reward.rolling(window).mean().values
        x_std = df.std_reward.rolling(window).mean().values
#         print('{},{}: {:.2f}, {:.2f}'.format(alpha, algo, np.nanmean(x_mean), np.nanmean(x_std)))
        if algo == 'ia2c':
            color = color_cycle[0]
        else:
            color = color_cycle[1]
        if alpha == 0.95:
            sty = '-'    
        elif alpha == 0.9:
            sty = '--'
        elif alpha == 0.8:
            sty = '-.'
        else:
            sty = ':'
#         plt.plot(df.step.values, x_mean, sty, color=color, linewidth=3, label='%s,'% algo_names[algo_labels.index(algo)] + \
#                  alpha_labels[alpha])
#         plt.fill_between(df.step.values, x_mean - x_std, x_mean + x_std, facecolor=color, edgecolor='none', alpha=0.1)
# plt.xlim([0, TRAIN_STEP])
# plt.ylim([-375, -125])
# formatter = FuncFormatter(millions)
# plt.gca().xaxis.set_major_formatter(formatter)
# plt.xticks(fontsize=18)
# plt.yticks(fontsize=18)
# plt.grid(True, which='both')
# plt.xlabel('Training step', fontsize=18)
# plt.ylabel('Average episode reward', fontsize=18)
# # ax = plt.gca()
# # box = ax.get_position()
# # ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
# # plt.legend(loc='center left', bbox_to_anchor=(1.02, 0.5), fontsize=18)
# plt.legend(loc='lower left', bbox_to_anchor=(0, 1, 1, 0.2), mode='expand', ncol=4, fontsize=13)
# plt.tight_layout()
# plt.savefig(plot_dir + ('/new_alpha_comp.pdf'))
# plt.close()

ia2c, alpha=0.9, avg R_bar of last 50 episodes: -163.47
ia2c_cu, alpha=0.9, avg R_bar of last 50 episodes: -188.64
ia2c, alpha=0.9, avg R_bar of last 50 episodes: -211.79
ia2c_cu, alpha=0.9, avg R_bar of last 50 episodes: -188.49


In [22]:
comp_data = []
algos = ['ma2c_nc_1.0_nop_agg', 'ma2c_nc_1.0_nop', 'ma2c_nc_1.0_agg', 'ma2c_nc_1.0_mf', 'ma2c_nc_1.0']
algos = [algos[i] for i in [0, 3, 1, 4, 2]]
labels = ['Baseline', 'Concat Only', 'FPrint Only', 'Mean FPrint', 'NeurComm']
labels = [labels[i] for i in [0, 3, 1, 4, 2]]
for algo in algos:
    data_dir = base_dir + ('%s.csv' % (algo))
    comp_data.append(pd.read_csv(data_dir))

plt.figure(figsize=(8,6.5))
colors = [1, 6, 5, 0, 3]
colors = [colors[i] for i in [0, 3, 1, 4, 2]]
stys = [':', '-.', '-.', '-', '-']
stys = [stys[i] for i in [0, 3, 1, 4, 2]]
for i, algo in enumerate(algos):
    df = comp_data[i]
    x_mean = df.avg_reward.rolling(window).mean().values
    x_std = df.std_reward.rolling(window).mean().values
    color = color_cycle[colors[i]]
    sty = stys[i]
    plt.plot(df.step.values, x_mean, sty, color=color, linewidth=3, label=labels[i])
#         plt.fill_between(df.step.values, x_mean - x_std, x_mean + x_std, facecolor=color, edgecolor='none', alpha=0.1)
plt.xlim([0, TRAIN_STEP])
plt.ylim([-375, -125])
formatter = FuncFormatter(millions)
plt.gca().xaxis.set_major_formatter(formatter)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.grid(True, which='both')
plt.xlabel('Training step', fontsize=18)
plt.ylabel('Average episode reward', fontsize=18)
# ax = plt.gca()
# box = ax.get_position()
# ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
# plt.legend(loc='center left', bbox_to_anchor=(1.02, 0.5), fontsize=18)
plt.legend(loc='lower left', bbox_to_anchor=(0, 1, 1, 0.2), mode='expand', ncol=3, fontsize=16)
plt.tight_layout()
plt.savefig(plot_dir + ('/comm_comp.pdf'))
plt.close()

In [42]:
colors = [0, 5, 2, 6, 1, 3]
alphas = [0.9, 0.9, 0.95] + [1.0] * 3
alpha_labels = {0.8:'0.8', 0.6:'0.6', 0.9:'0.9', 0.95:'0.95', 1.0:'1.0'}
comp_data = []
for alpha, algo in zip(alphas, algo_labels):
#     if algo in ['ma2c_nc', 'ia2c']:
#         machine = 'alpha'
#     elif algo in ['ma2c_cnet', 'ma2c_dial']:
#         machine = 'beta'
#     else:
#         machine = 'gamma'
#     print(algo, alpha, machine)
    data_dir = base_dir + ('%s_%s.csv' % (algo, alpha_labels[alpha]))
    comp_data.append(pd.read_csv(data_dir))
    
plt.figure(figsize=(8,6.5))
for i in [0, 3, 1, 4, 2, 5]:
    df = comp_data[i]
    algo = algo_names[i]
    x_mean = df.avg_reward.rolling(window).mean().values
    x_std = df.std_reward.rolling(window).mean().values
    if i < 3:
        sty = ':'
    else:
        sty = '-'
    plt.plot(df.step.values, x_mean, sty, color=color_cycle[colors[i]], linewidth=3, label=algo)
#     plt.fill_between(df.step.values, x_mean - x_std, x_mean + x_std, facecolor=color_cycle[i], edgecolor='none', alpha=0.1)
plt.xlim([0, TRAIN_STEP])
plt.ylim([-375, -125])
formatter = FuncFormatter(millions)
plt.gca().xaxis.set_major_formatter(formatter)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.grid(True, which='both')
plt.xlabel('Training step', fontsize=18)
plt.ylabel('Average episode reward', fontsize=18)
plt.legend(loc='lower left', bbox_to_anchor=(0, 1, 1, 0.2), mode='expand', ncol=3, fontsize=18)
plt.tight_layout()
plt.savefig(plot_dir + ('/algo_comp_new.pdf'))
plt.close()

In [34]:
base1_dir = '/Users/tchu/Documents/rl_test/deeprl_dist/data/execution/'
table_names = ['traffic', 'trip', 'control']
EPISODE_SEC = 3600

def load_summarize_data():
    data = {}
    for algo in ['ia2c_fp', 'ia2c', 'ma2c_nc', 'ma2c_cnet']:
        print(algo)
        data[algo] = {}
        for tab in table_names:
            data_dir = base1_dir + ('large_grid_%s_%s.csv' % (algo, tab))
            df = pd.read_csv(data_dir)
            if tab == 'control':
                col_names = ['reward']
            elif tab == 'traffic':
                col_names = ['avg_queue', 'avg_speed_mps', 'avg_wait_sec', 'number_arrived_car']
            else:
                col_names = ['wait_sec']
            print_metrics(df, col_names, tab)
            data[algo][tab] = df
    return data  
            
def print_metrics(df, col_names, tab_name):
    if tab_name == 'control':
        rewards = df.groupby(['episode']).reward.mean()
        mean_reward = np.mean(rewards.values)
        std_reward = np.std(rewards.values)
        print('R_bar: mean %.2f, std %.2f' % (mean_reward, std_reward))
    episodes = list(df.episode.unique())
    num_episode = len(episodes)
    for name in col_names:
        if tab_name != 'trip':
            res = df.loc[df.episode == episodes[0], name].values
            for episode in episodes[1:]:
                res += df.loc[df.episode == episode, name].values
            res = res / num_episode
            print('%s: mean %.2f, min %.2f, max: %.2f' % (name, np.mean(res), np.min(res), np.max(res)))
        else:
            res = []
            for episode in episodes:
                res += list(df.loc[df.episode == episode, name].values)
            print('%s: mean %d, max %d' % (name, np.mean(res), np.max(res)))
            
data = load_summarize_data()

ia2c_fp
avg_queue: mean 1.62, min 0.00, max: 2.56
avg_speed_mps: mean 0.23, min 0.00, max: 7.52
avg_wait_sec: mean 414.81, min 0.00, max: 661.10
number_arrived_car: mean 0.01, min 0.00, max: 0.16
wait_sec: mean 1949, max 3427
R_bar: mean -155.85, std 5.74
reward: mean -155.85, min -236.48, max: -2.10
ia2c
avg_queue: mean 1.63, min 0.00, max: 2.55
avg_speed_mps: mean 0.26, min 0.00, max: 8.81
avg_wait_sec: mean 376.23, min 0.00, max: 568.29
number_arrived_car: mean 0.01, min 0.00, max: 0.20
wait_sec: mean 1949, max 3252
R_bar: mean -160.19, std 9.69
reward: mean -160.19, min -236.24, max: -1.36
ma2c_nc
avg_queue: mean 1.16, min 0.00, max: 1.91
avg_speed_mps: mean 2.28, min 0.00, max: 7.65
avg_wait_sec: mean 67.86, min 0.00, max: 118.51
number_arrived_car: mean 0.40, min 0.00, max: 1.26
wait_sec: mean 293, max 3374
R_bar: mean -136.09, std 6.09
reward: mean -136.09, min -206.58, max: -2.00
ma2c_cnet
avg_queue: mean 1.44, min 0.00, max: 2.15
avg_speed_mps: mean 1.82, min 0.00, max: 7.94
a

In [44]:
def plot_comp_series(algos, col_name, tab_name, ylabel, window=60):
    plt.figure(figsize=(8,6))
    for algo in algos:
        df = data[algo][tab_name]
        plot_series(df, algo, col_name, tab_name, window)
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    plt.xlim([0, 3600])
    plt.grid(True, which='both')
    plt.xlabel('Simulation time (sec)', fontsize=18)
    plt.ylabel(ylabel, fontsize=18)
    plt.legend(loc='upper left', fontsize=18)
    plt.tight_layout()
    plt.savefig(plot_dir + ('/%s_comp.pdf' % col_name))
    plt.close()
    
def plot_series(df, algo, col_name, tab_name, window):
    episodes = list(df.episode.unique())
    num_episode = len(episodes)
    x = np.zeros((num_episode, EPISODE_SEC))
    for i, episode in enumerate(episodes):
        t_col = 'arrival_sec' if  tab_name == 'trip' else 'time_sec' 
        cur_df = df[df.episode == episode].sort_values(t_col)
        if window > 0:
            cur_x = cur_df[col_name].rolling(window, min_periods=1).mean().values
        else:
            cur_x = cur_df[col_name].values    
        x[i] = cur_x
    x_mean = np.mean(x, axis=0)
    x_std = np.std(x, axis=0)
    t = np.arange(1, EPISODE_SEC + 1)
    algo_i = algo_labels.index(algo)
#     print(algo_i)
    color = color_cycle[colors[algo_i]]
    plt.plot(t, x_mean, color=color, linewidth=3, label=algo_names[algo_i])
    x_lo = x_mean - x_std
    x_lo = np.maximum(x_lo, 0)
    x_hi = x_mean + x_std
    plt.fill_between(t, x_lo, x_hi, facecolor=color, edgecolor='none', alpha=0.1)

cur_algos = ['ma2c_nc', 'ma2c_cnet', 'ia2c', 'ia2c_fp']
plot_comp_series(cur_algos, 'avg_queue', 'traffic', 'Average queue length (veh)')
plot_comp_series(cur_algos, 'avg_wait_sec', 'traffic', 'Average intersection delay (s/veh)')

In [38]:
colors = [0, 2, 5, 6, 1, 3]