# Dashboard for DRL

In [None]:
import numpy as np
import pandas as pd
from os.path import join as pjoin

import bokeh

from __future__ import print_function
from ipywidgets import interact
import ipywidgets as widgets

In [None]:
from bokeh.plotting import figure
from bokeh.layouts import column
from bokeh.models.widgets import Panel, Tabs
from bokeh.io import output_notebook, show, output_file, reset_output, gridplot, save

reset_output()
output_notebook()

Please, change the working directory in this cell. 

If you want to have more learners represented, increase the variable n_actors.

In [None]:
working_directory = "/home/alex/Documents/Projet_info/Deep-Reinforcement-Learning/callbacks"
#working_directory = "/Users/nicolashennetier/Deep-Reinforcement-Learning/callbacks"
n_actors = 1
n_plot = 6
list_files = ["rpe.csv", "epsilon.csv", "lr.csv", "rewards.csv", "random.csv", "diff.csv", "action.csv"]
list_titles = ["Reward per Environment", "Epsilon per Environment", "Learning Rate per Environment", "Reward per Iteration", "Randomness","Norm of applied gradients", "gnActions taken"]
list_x_labels = ["Environments", "Environments", "Environments", "Iterations", "Iterations", "Iterations", "Iterations"]
list_y_labels = ["Reward", "Epsilon", "Learning Rate", "Reward", "Random ?", "Norm", "Action"]
colors = ["orange", "orange", "orange", "lime", "navy", "darkred", "purple"]

### Principal Information

In [None]:
reset_output()
output_notebook()

tabs = []
p = []
for j in range(n_actors):
    p.append([])
    for i in range(n_plot):
        data = pd.read_csv(pjoin(working_directory, "actor"+str(j), list_files[i]), header=None, names=["to_plot"])
        if i in [2, 4]:
            data["data"] = data["to_plot"].values
            data["to_plot"] = np.convolve(data.data, np.ones((100,))/100, mode='same')
        p[j].append(figure(title=list_titles[i], width=900, height=350))
        p[j][i].grid.grid_line_alpha = 0
        p[j][i].xaxis.axis_label = list_x_labels[i]
        p[j][i].yaxis.axis_label = list_y_labels[i]
        p[j][i].ygrid.band_fill_color = "olive"
        p[j][i].ygrid.band_fill_alpha = 0.1

        p[j][i].line(data.index, data["to_plot"], color=colors[i])
    
    tabs.append(Panel(child=column(p[j]), title="actor"+str(j)))
    
p.append([])
data = pd.read_csv(pjoin(working_directory, "tester", list_files[0]), header=None, names=["to_plot"])
p[-1].append(figure(title=list_titles[0], width=900, height=350))
p[-1][0].grid.grid_line_alpha = 0
p[-1][0].xaxis.axis_label = list_x_labels[0]
p[-1][0].yaxis.axis_label = list_y_labels[0]
p[-1][0].ygrid.band_fill_color = "olive"
p[-1][0].ygrid.band_fill_alpha = 0.1
p[-1][0].line(data.index, data["to_plot"], color=colors[0])
    
tabs.append(Panel(child=column(p[-1]), title="tester"))

tabs_f = Tabs(tabs=tabs)
show(tabs_f)

### Study of one learner

In [None]:
data = pd.read_csv(pjoin(working_directory, "actor0", "history.csv"), header=None, names=["times", "true", 
                "estimated","", "reward0", "reward1"],# "reward2", "reward3", "reward4", "reward5", "reward6", "reward7", "reward8"], 
                   sep=";", index_col=False)


data["indic"] = np.cumsum((data.times == 0).astype(np.int16))
true = []

for i in np.unique(data.indic):
    pseudo_true = data[data.indic == i]["true"].values[::-1]
    true_temp = [pseudo_true[0]]
    for j in range(1, len(pseudo_true)):
        true_temp.append(pseudo_true[j] + 0.9 * true_temp[j-1])
    true += true_temp[::-1]

data["true"] = true
nb_env = np.max(data.indic)

#### Differences between true and estimated reward

In [None]:
plot_x = np.arange(200)

def plot_rewards(x):
    p=(figure(title="History", width=900, height=350))
    p.grid.grid_line_alpha = 0
    p.xaxis.axis_label = "times"
    p.yaxis.axis_label = "Rewards"
    p.ygrid.band_fill_color = "olive"
    p.ygrid.band_fill_alpha = 0.1

    n = len(data[data.indic == x]["true"].values)

    true = np.zeros(200)
    true[:n] = data[data.indic == x]["true"].values[0:200]

    estimated = np.zeros(200)
    estimated[:n] = data[data.indic == x]["estimated"].values[0:200]

    p.line(plot_x, true, color="lime")
    p.line(plot_x, estimated, color="red")

    show(p)

interact(plot_rewards, x=widgets.IntSlider(min=1,max=nb_env,step=1,value=200))


#### Probabilites/rewards of actions

In [None]:
def plot_proba(x):
    p=(figure(title="History", width=900, height=350))
    p.grid.grid_line_alpha = 0
    p.xaxis.axis_label = "times"
    p.yaxis.axis_label = "Rewards"
    p.ygrid.band_fill_color = "olive"
    p.ygrid.band_fill_alpha = 0.1

    n = len(data[data.indic == x]["true"].values)

    reward0 = np.zeros(200)
    reward0[:n] = data[data.indic == x]["reward0"].values[0:200]

    reward1 = np.zeros(200)
    reward1[:n] = data[data.indic == x]["reward1"].values[0:200]

    p.line(plot_x, reward0, color="navy")
    p.line(plot_x, reward1, color="red")

    show(p)
    
interact(plot_proba, x=widgets.IntSlider(min=1,max=nb_env,step=1,value=400))