In [None]:
import pandas as pd
import ast
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

In [None]:
def load_file(fname):
    data = []
    try:
        with open(fname, 'r') as f:
            for line in f:
                sim_parts = line.split('\t')[0].split(";")
                data.append(";".join(sim_parts))
        return data
    except Exception as e:
        print("Exception Occurred while Parsing !!", e)


def convert_to_pandas(data):
    """
        episode=%f,steps=%f,reward=%f,action=%s,state=%s,done=%s,qvalue=%s
    """
    df_list = list()

    for d in data[0:]:
        parts = d.rstrip().split(";")
        episode = float(parts[0])
        steps = float(parts[1])
        reward = float(parts[2])
        action = ast.literal_eval(parts[3])
        state = ast.literal_eval(parts[4])
        done = 0 if 'False' in parts[5] else 1
        qvalue = float(parts[6])
        x = state[0]
        y = state[1]
        df_list.append([episode, steps, x, y, action, state, reward, done, qvalue])

    header = ['episode', 'steps', 'x', 'y', 'action', 'state', 'reward', 'done', 'qvalue']

    return pd.DataFrame(df_list, columns=header)

def simulation_agg(df):
    grouped = df.groupby(['episode'])
    by_steps = grouped['steps'].agg(np.max).reset_index()
    result = by_steps

    by_reward = grouped['reward'].agg(np.sum).reset_index()
    result = result.merge(by_reward, on=['episode'])

    result['quintile'] = pd.cut(result['episode'], 5, labels=['1st', '2nd', '3rd', '4th', '5th'])

    return result

def get_state_index(state):
    return state[0]*5 + state[1]

In [None]:
LOG_FILE = 'logs/training_logs.log'
data = load_file(LOG_FILE)
df = convert_to_pandas(data)

In [None]:
simulation_agg = simulation_agg(df)
simulation_agg

In [None]:
# df[df['episode']==df[df['done']==1]['episode']]

In [None]:
def plot_progress_reward_distribution(df):
    grouped = df.copy().groupby(['episode'])
    by_reward = grouped['reward'].agg(np.mean).reset_index()
    result = by_reward
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=result['episode'], y=result['reward'],
                             mode='lines',name='reward',line=dict({'shape': 'spline', 'smoothing': 1.3})))
    fig.update_layout(height = 650,width=950,title = "Reward Distribution",)
    fig.show()

In [None]:
plot_progress_reward_distribution(simulation_agg)

In [None]:
q_table_agg = np.zeros([df.size, 49, 4])
q_table = np.zeros([49, 4])
for index, row in df.iterrows():
    state_index = get_state_index(row['state'])
    action_index = row['action'][1]
    q_table[state_index, action_index] = row['qvalue']
    q_table_agg[index] = q_table

In [None]:
import numpy as np
import plotly.graph_objs as go

M = q_table_agg

fig = go.Figure(
    data=[go.Heatmap(z=M[0])],
    layout=go.Layout(
        title="Frame 0",
        updatemenus=[dict(
            type="buttons",
            buttons=[dict(label="Play",
                          method="animate",
                          args=[None]),
                     dict(label="Pause",
                          method="animate",
                          args=[None,
                               {"frame": {"duration": 0, "redraw": False},
                                "mode": "immediate",
                                "transition": {"duration": 0}}])
                    ]),
                    ]
    ),
    frames=[go.Frame(data=[go.Heatmap(z=M[i])],
                     layout=go.Layout(title_text=f"Frame {i}")) 
            for i in range(1, 100)]
)

fig.show()