In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import
import shutil

#LINUX
# import sys
# valuesindex = int(sys.argv[1])
# testi = int(sys.argv[2])
# testf = int(sys.argv[3])

def save_obj(obj, name):
    with open('obj/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open('obj/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [2]:
def make_graphs():
    # Calc averages
    repeats = 1000
    iters = 0
    dlist = []
    for i in range(repeats):
        try:
            dlist.append(load_obj("values"+str(i)))
            if iters == 0:
                iters = len(dlist[0][0,"expert_values"])
        except:
            pass

    expert_values =    np.zeros((repeats,iters))
    agent_values =     np.zeros((repeats,iters))
    cumm_regret =      np.zeros((repeats,iters))
    W_L2_dist =        np.zeros((repeats,iters))
    W_Li_dist =        np.zeros((repeats,iters))
    W_angular_dist =   np.zeros((repeats,iters))
    contexts_seen =    np.zeros((repeats,iters))
    contexts_seen_min = np.inf
    for trainset in range(repeats):
        for d in dlist:
            if (trainset,"expert_values") in d:
                expert_values[trainset] =    d[trainset,"expert_values"]
                agent_values[trainset] =     d[trainset,"agent_values"]
                cumm_regret[trainset] =      d[trainset,"cumm_regret"]
                W_L2_dist[trainset] =        d[trainset,"W_L2_dist"]
                W_Li_dist[trainset] =        d[trainset,"W_Li_dist"]
                W_angular_dist[trainset] =   d[trainset,"W_angular_dist"]
                contexts_seen[trainset] =    d[trainset,"contexts_seen"]
                contexts_seen_min = min(contexts_seen_min, contexts_seen[trainset][-1])

    print("all runs had at least ",int(contexts_seen_min)," contexts")
    contexts_seen_min -= contexts_seen_min%5
    contexts_seen_min = int(contexts_seen_min)+5
    test_expert_value= np.zeros((repeats,int(contexts_seen_min/5)))
    test_agent_value = np.zeros((repeats,int(contexts_seen_min/5)))
    test_W_L2_dist   = np.zeros((repeats,int(contexts_seen_min/5)))
    test_W_Li_dist   = np.zeros((repeats,int(contexts_seen_min/5)))
    test_W_angular_dist = np.zeros((repeats,int(contexts_seen_min/5)))
    TESTS = False
    for trainset in range(repeats):
        for d in dlist:
                if (trainset,"test_value",5) in d:
                    TESTS = True
                    for contexts in range(0,contexts_seen_min,5):
                        test_expert_value[trainset][int(contexts/5)] = d["test_value"]
                        test_agent_value[trainset][int(contexts/5)] = d[trainset,"test_value",contexts]
                        test_W_L2_dist[trainset][int(contexts/5)] = d[trainset,"test_L2_dist",contexts]
                        test_W_Li_dist[trainset][int(contexts/5)] = d[trainset,"test_Li_dist",contexts]
                        test_W_angular_dist[trainset][int(contexts/5)] = d[trainset,"test_ang_dist",contexts]

    expert_values = expert_values[~np.all(expert_values == 0, axis=1)]
    agent_values = agent_values[~np.all(agent_values == 0, axis=1)]
    cumm_regret = cumm_regret[~np.all(cumm_regret == 0, axis=1)]
    W_Li_dist = W_Li_dist[~np.all(W_Li_dist == 0, axis=1)]
    W_L2_dist = W_L2_dist[~np.all(W_L2_dist == 0, axis=1)]
    W_angular_dist = W_angular_dist[~np.all(W_angular_dist == 0, axis=1)]
    contexts_seen = contexts_seen[~np.all(contexts_seen == 0, axis=1)]
    test_expert_value = test_expert_value[~np.all(test_expert_value == 0, axis=1)]
    test_agent_value = test_agent_value[~np.all(test_agent_value == 0, axis=1)]
    test_W_L2_dist = test_W_L2_dist[~np.all(test_W_L2_dist == 0, axis=1)]
    test_W_Li_dist = test_W_Li_dist[~np.all(test_W_Li_dist == 0, axis=1)]
    test_W_angular_dist = test_W_angular_dist[~np.all(test_W_angular_dist == 0, axis=1)]
    print("Number of runs used in graph:",len(expert_values))
    print("Timesteps in each run:",len(expert_values[0]))

    expert_values_std =     expert_values.std(axis=0)
    agent_values_std =      agent_values.std(axis=0)
    expert_values_mean =    expert_values.mean(axis=0)
    agent_values_mean =     agent_values.mean(axis=0)
    cumm_regret_mean =      cumm_regret.mean(axis=0)
    cumm_regret_std =       cumm_regret.std(axis=0)
    W_L2_dist_std =           W_L2_dist.std(axis=0)
    W_Li_dist_std =           W_Li_dist.std(axis=0)
    W_L2_dist_mean =           W_L2_dist.mean(axis=0)
    W_Li_dist_mean =           W_Li_dist.mean(axis=0)
    W_angular_dist_std =   W_angular_dist.std(axis=0)
    W_angular_dist_mean =   W_angular_dist.mean(axis=0)
    contexts_seen_std =     contexts_seen.std(axis=0)
    contexts_seen_mean =    contexts_seen.mean(axis=0)

    if TESTS:
        # Value graph (contexts)
        test_expert_value_mean = test_expert_value.mean(axis=0)
        test_expert_value_std = test_expert_value.std(axis=0)
        test_agent_value_mean = test_agent_value.mean(axis=0)
        test_agent_value_std = test_agent_value.std(axis=0)
        fig = plt.figure(figsize=(8,8))
        p = fig.add_subplot(111)

        p.plot(list(range(0,contexts_seen_min,5)), test_expert_value_mean, label="expert value", color='b')
        p.plot(list(range(0,contexts_seen_min,5)), test_agent_value_mean, label="agent value", color='r')

        p.fill_between(list(range(0,contexts_seen_min,5)), test_expert_value_mean - test_expert_value_std, test_expert_value_mean + test_expert_value_std, color='b', alpha=0.1)
        p.fill_between(list(range(0,contexts_seen_min,5)), test_agent_value_mean - test_agent_value_std, test_agent_value_mean + test_agent_value_std, color='r', alpha=0.1)

        p.set_xlabel("Contexts seen")
        p.set_ylabel("Value")
        p.tick_params('y', colors='r')

        plt.title("Test value(contexts seen)")
        p.legend(loc=8)
        plt.show()
        
        # Distance graph (contexts)
        test_W_L2_dist_mean = test_W_L2_dist.mean(axis=0)
        test_W_L2_dist_std = test_W_L2_dist.std(axis=0)
        test_W_Li_dist_mean = test_W_Li_dist.mean(axis=0)
        test_W_Li_dist_std = test_W_Li_dist.std(axis=0)
        test_W_angular_dist_mean = test_W_angular_dist.mean(axis=0)
        test_W_angular_dist_std = test_W_angular_dist.std(axis=0)
        fig = plt.figure(figsize=(8,8))
        p = fig.add_subplot(111)

        p.plot(list(range(0,contexts_seen_min,5)), test_W_L2_dist_mean, label="normalized L2 distance", color='c')
        p.plot(list(range(0,contexts_seen_min,5)), test_W_Li_dist_mean, label="normalized L infinity distance", color='b')
        p.plot(list(range(0,contexts_seen_min,5)), test_W_angular_dist_mean, label="angular distance", color='r')

        p.fill_between(list(range(0,contexts_seen_min,5)), test_W_L2_dist_mean - test_W_L2_dist_std, test_W_L2_dist_mean + test_W_L2_dist_std, color='c', alpha=0.1)
        p.fill_between(list(range(0,contexts_seen_min,5)), test_W_Li_dist_mean - test_W_Li_dist_std, test_W_Li_dist_mean + test_W_Li_dist_std, color='b', alpha=0.1)
        p.fill_between(list(range(0,contexts_seen_min,5)), test_W_angular_dist_mean - test_W_angular_dist_std, test_W_angular_dist_mean + test_W_angular_dist_std, color='r', alpha=0.1)

        p.set_xlabel("Contexts seen")
        p.set_ylabel("Distance")
        p.tick_params('y', colors='r')

        plt.title("Test Distance(contexts seen)")
        p.legend(loc=9)
        plt.show()


    # Value graph (time)
    fig = plt.figure(figsize=(8,8))
    p2 = fig.add_subplot(111)
    p = p2.twinx()

    p2.plot(list(range(iters)), contexts_seen_mean, label="Contexts seen", color='g')
    p2.fill_between(list(range(iters)), contexts_seen_mean - contexts_seen_std, contexts_seen_mean + contexts_seen_std, color='g', alpha=0.05)
    p2.set_ylabel("Contexts seen")
    p2.tick_params('y', colors='g')

    p.plot(list(range(iters)), expert_values_mean, label="expert value", color='b')
    p.plot(list(range(iters)), agent_values_mean, label="agent value", color='r')

    p.fill_between(list(range(iters)), expert_values_mean - expert_values_std, expert_values_mean + expert_values_std, color='b', alpha=0.1)
    p.fill_between(list(range(iters)), agent_values_mean - agent_values_std, agent_values_mean + agent_values_std, color='r', alpha=0.1)

    p.set_xlabel("Time")
    p.set_ylabel("Value")
    p.tick_params('y', colors='r')

    plt.title("Value(t,contexts seen)")
    p2.legend(loc=4)
    p.legend(loc=8)
    plt.show()

    # Distance graph (time)
    fig = plt.figure(figsize=(8,8))
    p2 = fig.add_subplot(111)
    p = p2.twinx()

    p2.plot(list(range(iters)), contexts_seen_mean, label="Contexts seen", color='g')
    p2.fill_between(list(range(iters)), contexts_seen_mean - contexts_seen_std, contexts_seen_mean + contexts_seen_std, color='g', alpha=0.05)
    p2.set_ylabel("Contexts seen")
    p2.tick_params('y', colors='g')

    p.plot(list(range(iters)), W_L2_dist_mean, label="normalized L2 distance", color='c')
    p.plot(list(range(iters)), W_Li_dist_mean, label="normalized Linfinity distance", color='b')
    p.plot(list(range(iters)), W_angular_dist_mean, label="angular distance", color='r')
    
    p.fill_between(list(range(iters)), W_L2_dist_mean - W_L2_dist_std, W_L2_dist_mean + W_L2_dist_std, color='c', alpha=0.1)
    p.fill_between(list(range(iters)), W_Li_dist_mean - W_Li_dist_std, W_Li_dist_mean + W_Li_dist_std, color='b', alpha=0.1)
    p.fill_between(list(range(iters)), W_angular_dist_mean - W_angular_dist_std, W_angular_dist_mean + W_angular_dist_std, color='r', alpha=0.1)

    p.set_xlabel("Time")
    p.set_ylabel("Distance")
    p.tick_params('y', colors='r')

    plt.title("Distance(t,contexts seen)")
    p2.legend(loc=2)
    p.legend(loc=9)
    plt.show()
    
    # Normalized value graph (time)
    fig = plt.figure(figsize=(8,8))

    p2 = fig.add_subplot(111)
    p = p2.twinx()

    normalized_err = np.divide((expert_values-agent_values),expert_values)
    normalized_err_mean = normalized_err.mean(axis=0)
    normalized_err_std = normalized_err.std(axis=0)

    p2.plot(list(range(iters)), contexts_seen_mean, label="Contexts seen", color='g')
    p2.fill_between(list(range(iters)), contexts_seen_mean - contexts_seen_std, contexts_seen_mean + contexts_seen_std, color='g', alpha=0.05)
    p2.set_ylabel("Contexts seen")
    p2.tick_params('y', colors='g')

    p.semilogy(list(range(iters)), normalized_err_mean, label="normalized error", color='m')
    p.fill_between(list(range(iters)), normalized_err_mean - normalized_err_std, normalized_err_mean + normalized_err_std, color='m', alpha=0.1)

    p.set_xlabel("Time")
    p.set_ylabel("normalized error")
    p.tick_params('y', colors='m')

    plt.title("normalized error(t,contexts seen)")
    p2.legend(loc=4)
    p.legend(loc=8)
    plt.show()
    
    # Regret (time)
    fig = plt.figure(figsize=(8,8))
    p2 = fig.add_subplot(111)
    p = p2.twinx()

    p2.plot(list(range(iters)), contexts_seen_mean, label="Contexts seen", color='g')
    p2.fill_between(list(range(iters)), contexts_seen_mean - contexts_seen_std, contexts_seen_mean + contexts_seen_std, color='g', alpha=0.05)
    p2.set_ylabel("Contexts seen")
    p2.tick_params('y', colors='g')

    p.plot(list(range(iters)), cumm_regret_mean, label="Cummulative regret", color='m')

    p.fill_between(list(range(iters)), cumm_regret_mean - cumm_regret_std, cumm_regret_mean + cumm_regret_std, color='m', alpha=0.1)

    p.set_xlabel("Time")
    p.set_ylabel("Cummulative Regret")
    p.tick_params('y', colors='m')

    plt.title("Value(t,contexts seen)")
    p2.legend(loc=4)
    p.legend(loc=8)
    plt.show()


In [None]:
make_graphs()