In [1]:
import itertools
import pickle
import numpy as np
np.set_printoptions(precision=3)

In [2]:
from IPython.display import clear_output
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
palette = sns.color_palette()

In [3]:
import os
import imageio
import natsort

In [4]:
cart_pos_max = 2.7
cart_pos_min = -2.7

pole_ang_max = 15 * 2 * np.pi / 360
pole_ang_min = -15 * 2 * np.pi / 360

In [5]:
# # import os
# # os.listdir('./100K_run/')

# 'D1QN_D1QN_Naive_1000_freq_100_324267_04171400'
# 'D1QN-PER_D1QN_NaivePER_1000_freq_100_324267_04164957'

# 'DQN_DQN_Naive_1000_freq_100_324267_04171233'
# 'DQN-PER_DQN_NaivePER_1000_freq_100_324267_04164826'
# 'DQN-PER-original_DQN_NaivePER_100000_freq_1000_324267_04163756'

# 'D2QN_D2QN_Naive_1000_freq_100_324267_04170846'
# 'D2QN-PER_D2QN_NaivePER_1000_freq_100_324267_04165444'

# 'DuDQN_DuDQN_Naive_1000_freq_100_324267_04170649'
# 'DuDQN-PER_DuDQN_NaivePER_1000_freq_100_324267_04165300'

# 'DuD2QN_DuD2QN_Naive_1000_freq_100_324267_04170223'
# 'DuD2QN-PER_DuD2QN_NaivePER_1000_freq_100_324267_04165910'


In [6]:
# function to plot overlapping probability density function for cartpos and pole_ang
def single_kde(log_name):
    MEM_FILE = './memories/' + log_name + '.mpk'

    # Load Memories
    with open(MEM_FILE, 'rb') as fpr:
        memories = np.array(list(pickle.load(fpr)))

    visited_states = np.stack(memories[:,0]).squeeze()
    actions = memories[:,1].astype(np.float32)
    rewards = memories[:,2].astype(np.float32)
    next_states = np.stack(memories[:,3]).squeeze()
    done = memories[:,4].astype(np.bool)

    consolidated_memory = np.column_stack((visited_states, actions, rewards, next_states, done))

    cart_pos = visited_states[:,0]
    cart_vel = visited_states[:,1]
    pole_ang = visited_states[:,2]
    pole_vel = visited_states[:,3]
    
    # prepare canvas for plotting
    initial_index = 0 # starting data point
    block_size = 1000 # each plot shows data from block_size amount of data
    num_plots = 100
    subplot_span = 10 # number of subplot columns
    subplot_depth = 10 # number of subplot rows
    fig, axes = plt.subplots(subplot_depth,subplot_span,figsize=(20,20))

    ymax = 5 # max y-value
    
    # hide x_axis and y_axis
    for i, ax in enumerate(fig.axes):
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)


    # iterate through all subplots axes of the figure and kdeplot
    for i in range(num_plots):
        row = i // subplot_span # floor division
        col = i % subplot_span # modulo division
        ax_cart_pos = axes[row, col]
        ax_cart_pos.set_ylim([0,ymax])

        ax_pole_ang = ax_cart_pos.twiny()
        ax_pole_ang.get_xaxis().set_visible(False)

        start = initial_index + i*block_size
        
        # plot kde plot
        cart_pos_density_plot = sns.kdeplot(cart_pos[start:start+block_size], ax=ax_cart_pos, color=palette[0], label='Cart Position', legend=False)
        cart_pos_density_plot.set_xlim(cart_pos_min, cart_pos_max)


        pole_ang_density_plot = sns.kdeplot(pole_ang[start:start+block_size], ax=ax_pole_ang, color=palette[1], label='Pole Angle', legend=False)
        pole_ang_density_plot.set_xlim(pole_ang_min, pole_ang_max)
        
        ax_pole_ang.set_title((str(i+1)+'K'),loc = 'left', pad=-20)


    cart_pos = mpatches.Patch(color=palette[0], label='Cart Position')
    pole_ang = mpatches.Patch(color=palette[1], label='Pole Angle')
    fig.legend(handles=[cart_pos,pole_ang], loc='upper right', bbox_to_anchor=(0.22, 0.75), ncol=2)
    
    # Create folder to save images
    if not os.path.exists(('./images/'+log_name)):
        os.mkdir(('./images/'+log_name))
    
    FIG_FILE = './images/'+log_name+'/'+'single_kdeplot_' + log_name + '.png'
    plt.savefig(FIG_FILE, 
                dpi=300, 
                format='png')
    plt.close() # don't display figure

In [7]:
exp1 = 'D1QN-PER_D1QN_NaivePER_1000_freq_100_324267_04164957'

In [8]:
single_kde(exp1)

In [9]:
# function to plot joint probability density function for cartpos and pole_ang using shaded contours
def joint_kde(log_name):
    MEM_FILE = './memories/' + log_name + '.mpk'

    # Load Memories
    with open(MEM_FILE, 'rb') as fpr:
        memories = np.array(list(pickle.load(fpr)))

    visited_states = np.stack(memories[:,0]).squeeze()
    actions = memories[:,1].astype(np.float32)
    rewards = memories[:,2].astype(np.float32)
    next_states = np.stack(memories[:,3]).squeeze()
    done = memories[:,4].astype(np.bool)

    consolidated_memory = np.column_stack((visited_states, actions, rewards, next_states, done))

    cart_pos = visited_states[:,0]
    cart_vel  = visited_states[:,1]
    pole_ang  = visited_states[:,2]
    pole_vel  = visited_states[:,3] 

   
    # prepare canvas for plotting
    initial_index = 0 # starting data point
    block_size = 1000 # each plot shows data from block_size amount of data
    num_plots = 100
    subplot_span = 10 # number of subplot columns
    subplot_depth = 10 # number of subplot rows
    fig, axes = plt.subplots(subplot_depth,subplot_span,figsize=(20,20))

    # hide x_axis and y_axis
    for i, ax in enumerate(fig.axes):
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        ax.set_xlim(cart_pos_min, cart_pos_max)
        ax.set_ylim(pole_ang_min, pole_ang_max)

    for i in range(num_plots):
            row = i // subplot_span # floor division
            col = i % subplot_span # modulo division

            ax_cur = axes[row, col]

            start = initial_index + i*block_size

            # plot kde plot
            joint_density_plot = sns.kdeplot(cart_pos[start:start+block_size],
                                             pole_ang[start:start+block_size], 
                                             ax=ax_cur,
                                             shade=True,
                                             color=palette[3], 
                                             legend=False)
    #         rugplot_cart_pos = sns.rugplot(cart_pos[start:start+block_size], 
    #                                        color=palette[0], 
    #                                        ax=ax_cur)

    #         rugplot_pole_ang = sns.rugplot(pole_ang[start:start+block_size], 
    #                                        color=palette[1], 
    #                                        vertical=True, 
    #                                        ax=ax_cur)
            ax_cur.set_title((str(i+1)+'K'),loc = 'left',pad=-20)                                      
    
    # Create folder to save images
    if not os.path.exists(('./images/'+log_name)):
        os.mkdir(('./images/'+log_name))
    
    FIG_FILE = './images/'+log_name+'/'+'joint_kdeplot_' + log_name + '.png'
    plt.savefig(FIG_FILE, 
                dpi=300, 
                format='png')
    plt.close() # don't display figure

In [10]:
joint_kde(exp1)

In [11]:
def make_gif(log_name):
    MEM_FILE = './memories/' + log_name + '.mpk'

    # Load Memories
    with open(MEM_FILE, 'rb') as fpr:
        memories = np.array(list(pickle.load(fpr)))

    visited_states = np.stack(memories[:,0]).squeeze()
    actions = memories[:,1].astype(np.float32)
    rewards = memories[:,2].astype(np.float32)
    next_states = np.stack(memories[:,3]).squeeze()
    done = memories[:,4].astype(np.bool)

    consolidated_memory = np.column_stack((visited_states, actions, rewards, next_states, done))

    cart_pos = visited_states[:,0]
    cart_vel  = visited_states[:,1]
    pole_ang  = visited_states[:,2]
    pole_vel  = visited_states[:,3]
    
    # Create image folder and construct a gif of joint plot
    # Draw Images
    if not os.path.exists(('./images/'+log_name+'/gif_maker/')):
        os.makedirs(('./images/'+log_name+'/gif_maker/'))

    import imageio
    images = []
    # Single Iteration Analysis
    block_size = 1000
    total_exp_no = 100_000
    for start in range(0,total_exp_no,block_size):
        sns.jointplot(cart_pos[start:start+block_size],
                      pole_ang[start:start+block_size],
                      kind='kde',
                      space=0,
                      ratio=9,
                      color=palette[3],
                      xlim=[cart_pos_min, cart_pos_max],
                      ylim=[pole_ang_min, pole_ang_max])
        plt.title(str(start/1000)+'K iterations')
        plt.savefig(fname='./images/'+log_name+'/gif_maker/'+(str(start) + '.png'),
                    format='png')
        plt.close() # don't display figure

    # Create GIF
    image_dir = ('./images/'+log_name+'/gif_maker/')
    images = []
    image_filenames=[]

    for file_name in os.listdir(image_dir):
        if file_name.endswith('.png'):
            image_filenames.append(file_name)

    sorted_filenames = natsort.natsorted(image_filenames,reverse=False)
    for file_name in sorted_filenames:
            file_path = os.path.join(image_dir, file_name)
            images.append(imageio.imread(file_path))

    kargs = { 'duration': 0.5 }
    imageio.mimsave(('./movies/'+log_name+'.gif'), 
                    images, 
                    'GIF', 
                    **kargs)

In [12]:
make_gif(exp1)