In [1]:
import matplotlib.pyplot as plt

from distributions.sequence import Sequence

from algorithms.full_bandit_exp3 import FullBanditExp3

import numpy as np
import pandas as pd
import seaborn as sns

import pickle
import json

import os

In [2]:
def get_name(run_name):
    return run_name.split("_")[0]

def get_length(run_name):
    return run_name.split("_")[1]

def theoretical_bound(T, sequence: Sequence):
    return 8 * sequence.m * np.sqrt(T * sequence.K * sequence.d * np.log(np.e * sequence.K / sequence.m) * np.log(np.sqrt(T) * sequence.m * sequence.sigma * sequence.R))



In [3]:
sample_sequences = {}
results = {}
distributions = os.listdir(f"output/")
for dist_index, dist in enumerate(distributions):
    results[dist] = {}
    sample_sequences[dist] = {}

    lengths = os.listdir(f"output/{dist}")

    for length_index, length in enumerate(lengths):
        results[dist][length] = {}
        
        with open(f"output/{dist}/{length}/0/sequence.json", "rb") as input_file:
            sample_sequences[dist][length] = pickle.load(input_file)

        algo_names = os.listdir(f"output/{dist}/{length}/0")
        algo_names = np.unique([x.split("_")[0] for x in algo_names])
        algo_names = algo_names[algo_names != "sequence.json"]
        print(algo_names)

        for algo_name in algo_names:
            results[dist][length][algo_name] = {
                "regret": [],
                "gamma": [],
                "time_elapsed": [],
            }

            iterations = os.listdir(f"output/{dist}/{length}/")
            for iteration in iterations:
                with open(f"output/{dist}/{length}/{iteration}/{algo_name}_general_info.json", "r") as input_file:
                    input_dict = json.load(input_file)

                result_buffer = results[dist][length][algo_name]
                result_buffer["regret"].append(input_dict["regret"])
                result_buffer["gamma"].append(input_dict["gamma"])
                result_buffer["time_elapsed"].append(input_dict["time_elapsed"])

for dist_name in results.keys():
    for length in results[dist_name].keys():
        for algo_name in results[dist_name][length].keys():
            for key in results[dist_name][length][algo_name].keys():
                results[dist_name][length][algo_name][key] = np.array(results[dist_name][length][algo_name][key])

['FullBanditExp3InvM=10' 'FullBanditExp3M=10' 'NonContextualExp3M=10'
 'OnePerContextM=10' 'UniformRandomM=10']
['FullBanditExp3InvM=10' 'FullBanditExp3M=10' 'NonContextualExp3M=10'
 'OnePerContextM=10' 'UniformRandomM=10']
['FullBanditExp3InvM=10' 'FullBanditExp3M=10' 'NonContextualExp3M=10'
 'OnePerContextM=10' 'UniformRandomM=10']
['FullBanditExp3InvM=10' 'FullBanditExp3M=10' 'NonContextualExp3M=10'
 'OnePerContextM=10' 'UniformRandomM=10']
['FullBanditExp3InvM=10' 'FullBanditExp3M=10' 'NonContextualExp3M=10'
 'OnePerContextM=10' 'UniformRandomM=10']


In [6]:
for dist in results.keys():
    for length in results[dist].keys():
        algo = FullBanditExp3()
        sequence = sample_sequences[dist][length]
        algo.set_constants(np.random.default_rng(), sample_sequences[dist][length])
        print("\n")
        print(length, "d:", sequence.d, "K:", sequence.actionset.K, "m:", sequence.actionset.m)
        #print(f"{sequence.K}/{algo.beta}*{algo.gamma}*{sequence.lambda_min}")
        print("gamma", results[dist]["20000"]["FullBanditExp3M=10"]["gamma"][0], "M", algo.M)
        for algo_name in results[dist][length].keys():
            single_result = results[dist][length][algo_name]
            print(algo_name, "regret:", np.average(single_result["regret"]), np.std(single_result["regret"]), np.average(single_result["time_elapsed"]))




20000 d: 12 K: 3 m: 2
gamma 0.24812369487532912 M 4518
FullBanditExp3InvM=10 regret: 2679.0 111.67027357358806 6473.214283794165
FullBanditExp3M=10 regret: 2623.0 49.59334632790976 3166.111566454172
NonContextualExp3M=10 regret: 2533.0625 99.65908685990455 95.71278712153435
OnePerContextM=10 regret: 1993.625 59.410515693772595 86.57697816193104
UniformRandomM=10 regret: 2662.75 68.03078347336594 3.808442384004593


20000 d: 3 K: 3 m: 2
gamma 0.11247196073750401 M 2048
FullBanditExp3InvM=10 regret: 3005.1875 101.44531701241807 638.7364738434553
FullBanditExp3M=10 regret: 2543.25 62.81470767264622 881.9507604390383
NonContextualExp3M=10 regret: 2642.0625 67.37624650980493 106.82167999446392
OnePerContextM=10 regret: 875.1875 51.11777913554148 104.845582857728
UniformRandomM=10 regret: 2661.1875 65.05787687705464 4.760405719280243


20000 d: 3 K: 5 m: 2
gamma 0.210210429320471 M 1826
FullBanditExp3InvM=10 regret: 4045.25 101.991727605723 2386.9883454591036
FullBanditExp3M=10 regret: 473

In [None]:
fig, axs = plt.subplots(figsize=(15, 15))

for algo in algo_names:
    data = np.loadtxt(f"output/BinaryContext1;3_IndependentBernoulli5;3/10000/0/{algo}_probability_array.csv")[:, 0]
    axs.plot(data, label=algo)

fig.legend()
fig.show()

In [None]:
for name in ["FullBanditExp3"]:
    for length in results[name].keys():
        single_result = results[name][length]
        sequence = single_result["sequences"][0]

        theo = theoretical_bound(int(length), sequence)
        print(name, length, "regret:", np.average(single_result["regret"]), f"({theo:.2f})")

        data = single_result["probability_array"][:, :, 0]

        data1 = data[:, sequence.contexts[:-1, 0].astype(bool)]
        data2 = data[:, ~sequence.contexts[:-1, 0].astype(bool)]

        dataframe = pd.DataFrame(data1).melt()
        sns.lineplot(data=dataframe, x="variable", y="value", label="context 1")
        dataframe = pd.DataFrame(data2).melt()
        sns.lineplot(data=dataframe, x="variable", y="value", label="context 2")

