In [5]:

import yaml
import matplotlib.pyplot as plt
import gym
from dynamic_programming_env_DCP import dynamic_programming_env_DCP
from visualization_and_metrics import average_n_episodes, visualize_policy_RM
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource, Slider, LogColorMapper, LogTicker, ColorBar, Paragraph
from bokeh.models.widgets import CheckboxGroup
from bokeh.plotting import figure
from bokeh.themes import Theme
from bokeh.io import show, output_notebook
from bokeh.palettes import Viridis7, Greys256, Viridis256
import numpy as np
from bokeh.transform import dodge

output_notebook()

def modify_doc(doc):
    micro_times = 1
    initial_alpha = 0.6
    initial_lambda = 1
    T_max = 100
    C_max = 50
        
    possible_actions = [k for k in range(50, 231, 20)]
    labels = [str(possible_actions[k]) for k in range(len(possible_actions))]
    
    env = gym.make('gym_RMDCP:RMDCP-v0', data_collection_points=T_max, capacity=C_max,
                   micro_times=micro_times, actions=possible_actions, alpha=initial_alpha, lamb=initial_lambda)

    true_V, true_P = dynamic_programming_env_DCP(env)

    true_mean_revenue, true_revenue_at_each_time, true_revenues, true_bookings, prices_proposed = env.average_n_episodes(true_P, 1000)
    true_P = true_P.flatten()
    true_P = [env.A[int(k)] for k in true_P]
    true_P = np.reshape(true_P, (env.T, env.C))
    true_P = true_P[:env.T - 1, :env.C - 1]

    source = ColumnDataSource(data=dict(x=labels, y1=true_bookings, y2=prices_proposed))
    p1 = figure(x_range=labels, plot_height=350)
    p1.xaxis.axis_label = 'Prices'
    p1.yaxis.axis_label = 'Average computed on 10 000 episodes'
    
    p1.vbar(x=dodge('x', -0.15, range=p1.x_range), top='y1', width=0.2, source=source,
       color="#718dbf", legend='Bookings')
    p1.vbar(x=dodge('x', 0.15, range=p1.x_range), top='y2', width=0.2, source=source,
       color="#e84d60", legend='Prices proposed by the optimal policy')
    
    # p1.vbar(x='x', top='y', source=source, width=0.2)
    
    true_P = np.flipud(true_P)
    source2 = ColumnDataSource(data=dict(image=[true_P], dh=[env.T-1], dw=[env.C-1], x=[0], y =[T_max-1]))
    p2 = figure( x_range=(0, C_max-1), y_range=(T_max-1,0), title="Prices coming from the optimal policy")
    p2.xaxis.axis_label = 'Capacity'
    p2.yaxis.axis_label = 'Number of microtimes'
    color_mapper = LogColorMapper(palette=Viridis256, low=possible_actions[0], high=possible_actions[-1])
    p2.image(image='image', color_mapper=color_mapper, dh='dh', dw='dw', x='x', y='y', source=source2)
    color_bar = ColorBar(ticker=LogTicker(), color_mapper=color_mapper,
                     label_standoff=12, border_line_color=None, location=(0,0))    
    p2.add_layout(color_bar, 'right')
    
    trajectory_matrix = np.flipud(env.trajectory_matrix)
    source3 = ColumnDataSource(data=dict(image=[trajectory_matrix], dh=[env.T-1], dw=[env.C-1], x=[0], y =[T_max-1]))
    p3 = figure(x_range=(0, C_max-1), y_range=(T_max-1,0), title="Trajectories coming from the optimal policy")
    p3.xaxis.axis_label = 'Capacity'
    p3.yaxis.axis_label = 'Number of microtimes'
    Greys256.reverse()
    color_mapper = LogColorMapper(palette=Greys256)
    p3.image(image='image', color_mapper=color_mapper, dh='dh', dw='dw', x='x', y='y', source=source3)
    
    hist, edges = np.histogram(true_revenues, bins=15)
    # edges = [str(edges[k]) for k in range(len(edges))]
    source4 = ColumnDataSource(data=dict(left = edges[:-1], right=edges[1:], y=hist))
    p4 = figure(plot_height=350, title='Revenue distribution')
    p4.xaxis.axis_label = 'Revenue'
    p4.quad(top='y', bottom=0, left='left', right='right', source=source4,
       color="#718dbf")
    
    source5 = ColumnDataSource(data=dict(true_revenue_each_time=true_revenue_at_each_time))

    text_banner1 = Paragraph(text="Load factor: {:.2}".format((np.sum(true_bookings))/ env.C))
    text_banner2 = Paragraph(text="Demand ratio: {:.2}".format((env.lamb * env.T)/ env.C))
    text_banner3 = Paragraph(text="Mean revenue: {:4.1}".format(true_mean_revenue))
    
    def callback(attr, old, new):
        true_total_bookings = np.zeros(len(possible_actions))
        true_total_prices_proposed = np.zeros(len(possible_actions))
        action_idx = checkbox_group.active
        actions = [possible_actions[k] for k in action_idx]
        alpha = alpha_slider.value
        lamb = lambda_slider.value
        data_collection_points = microtime_slider.value
        capacity = capacity_slider.value
        time_slider.end = data_collection_points - 1
        if data_collection_points <= time_slider.value:
            time_slider.value = data_collection_points - 1
        t = time_slider.value
        env = gym.make('gym_RMDCP:RMDCP-v0', data_collection_points=data_collection_points, capacity=capacity,
                       micro_times=micro_times, actions=actions, alpha=alpha, lamb=lamb)

        true_V, true_P = dynamic_programming_env_DCP(env)
        true_mean_revenue, true_revenue_at_each_time, true_revenues, true_bookings, prices_proposed = env.average_n_episodes(true_P, 1000)
        true_revenues = true_revenue_at_each_time[t]
        true_mean_revenue = np.mean(true_revenues)
        hist, edges = np.histogram(true_revenues, bins=15)
        # edges = [str(edges[k]) for k in range(len(edges))]
        for k in range(len(true_bookings)):
            true_total_bookings[action_idx[k]] = true_bookings[k]
            true_total_prices_proposed[action_idx[k]] = prices_proposed[k]
        true_P = true_P.flatten()
        true_P = [env.A[int(k)] for k in true_P]
        true_P = np.reshape(true_P, (env.T, env.C))
        true_P = true_P[:env.T - 1, :env.C - 1]
        true_P = np.flipud(true_P)
        trajectory_matrix = np.flipud(env.trajectory_matrix)
        source.data = ColumnDataSource(data=dict(x=labels, y1=true_total_bookings, y2=true_total_prices_proposed)).data
        source2.data = ColumnDataSource(data=dict(image=[true_P], dh=[env.T-1], dw=[env.C-1], x=[0], y =[env.T-1])).data
        source3.data = ColumnDataSource(data=dict(image=[trajectory_matrix], dh=[env.T-1], dw=[env.C-1], x=[0], y =[env.T-1])).data
        source4.data = ColumnDataSource(data=dict(left = edges[:-1], right=edges[1:], y=hist)).data
        source5.data = ColumnDataSource(data=dict(true_revenue_each_time=true_revenue_at_each_time)).data
        text_banner1.text = "Load factor: {:.2}".format((np.sum(true_total_bookings))/ env.C)
        text_banner2.text = "Demand ratio: {:.2}".format((env.lamb * env.T)/ env.C)
        text_banner3.text = "Mean revenue: {}".format(true_mean_revenue)
        
    def callback_revenue_distribution(attr, old, new):
        t = time_slider.value
        true_revenue_at_each_time = source5.data['true_revenue_each_time']
        true_revenues = true_revenue_at_each_time[t]
        true_mean_revenue = np.mean(true_revenues)
        hist, edges = np.histogram(true_revenues, bins=15)
        source4.data = ColumnDataSource(data=dict(left = edges[:-1], right=edges[1:], y=hist)).data
        text_banner3.text = "Mean revenue: {}".format(true_mean_revenue)
    
    checkbox_group = CheckboxGroup(labels=labels, active=[k for k in range(len(possible_actions))])
    checkbox_group.on_change('active', callback)
    
    time_slider = Slider(start=0., end=T_max-1, value=T_max-1, step=1, title="Time")
    time_slider.on_change('value', callback_revenue_distribution)
    microtime_slider = Slider(start=0., end=T_max, value=T_max, step=1, title="Number of micro-times")
    capacity_slider = Slider(start=0., end=C_max, value=C_max, step=1, title="Capacity")
    alpha_slider = Slider(start=0., end=1., value=initial_alpha, step=0.1, title="Price sensitivity")
    lambda_slider = Slider(start=0., end=1., value=initial_lambda, step=0.1, title="Arrival rate")
    alpha_slider.on_change('value', callback)
    lambda_slider.on_change('value', callback)
    microtime_slider.on_change('value', callback)
    capacity_slider.on_change('value', callback)

    doc.add_root(column(row(alpha_slider, lambda_slider, checkbox_group), row(microtime_slider, capacity_slider), p2, p3, row(p1, column(text_banner1, text_banner2)), row(p4, column(text_banner3, time_slider))))
show(modify_doc)
