In [1]:
import matplotlib.pyplot as plt

from distributions.sequence import Sequence

from algorithms.full_bandit_exp3 import FullBanditExp3

import numpy as np
import pandas as pd
import seaborn as sns

import pickle
import json

import os

In [2]:
def get_name(run_name):
    return run_name.split("_")[0]

def get_length(run_name):
    return run_name.split("_")[1]

def theoretical_bound(T, sequence: Sequence):
    return 8 * sequence.m * np.sqrt(T * sequence.K * sequence.d * np.log(np.e * sequence.K / sequence.m) * np.log(np.sqrt(T) * sequence.m * sequence.sigma * sequence.R))



In [3]:
sample_sequences = {}
results = {}
distributions = os.listdir(f"output/")
for dist_index, dist in enumerate(distributions):
    results[dist] = {}
    sample_sequences[dist] = {}

    lengths = os.listdir(f"output/{dist}")

    for length_index, length in enumerate(lengths):
        results[dist][length] = {}
        
        with open(f"output/{dist}/{length}/0/sequence.json", "rb") as input_file:
            sample_sequences[dist][length] = pickle.load(input_file)

        algo_names = os.listdir(f"output/{dist}/{length}/0")
        algo_names = np.unique([x.split("_")[0] for x in algo_names])
        algo_names = algo_names[algo_names != "sequence.json"]
        print(algo_names)

        for algo_name in algo_names:
            results[dist][length][algo_name] = {
                "regret": [],
                "gamma": [],
                "time_elapsed": [],
            }

            iterations = os.listdir(f"output/{dist}/{length}/")
            for iteration in iterations:
                with open(f"output/{dist}/{length}/{iteration}/{algo_name}_general_info.json", "r") as input_file:
                    input_dict = json.load(input_file)

                result_buffer = results[dist][length][algo_name]
                result_buffer["regret"].append(input_dict["regret"])
                result_buffer["gamma"].append(input_dict["gamma"])
                result_buffer["time_elapsed"].append(input_dict["time_elapsed"])

for dist_name in results.keys():
    for length in results[dist_name].keys():
        for algo_name in results[dist_name][length].keys():
            for key in results[dist_name][length][algo_name].keys():
                results[dist_name][length][algo_name][key] = np.array(results[dist_name][length][algo_name][key])

['FullBanditExp3Inv' 'NonContextualExp3' 'OnePerContext' 'UniformRandom']
['FullBanditExp3Inv' 'NonContextualExp3' 'OnePerContext' 'UniformRandom']
['FullBanditExp3Inv' 'NonContextualExp3' 'OnePerContext' 'UniformRandom']
['FullBanditExp3Inv' 'NonContextualExp3' 'OnePerContext' 'UniformRandom']
['FullBanditExp3Inv' 'NonContextualExp3' 'OnePerContext' 'UniformRandom']
['FullBanditExp3Inv' 'NonContextualExp3' 'OnePerContext' 'UniformRandom']
['FullBanditExp3Inv' 'NonContextualExp3' 'OnePerContext' 'UniformRandom']
['FullBanditExp3Inv' 'NonContextualExp3' 'OnePerContext' 'UniformRandom']
['FullBanditExp3Inv' 'NonContextualExp3' 'OnePerContext' 'UniformRandom']


In [5]:
for dist in results.keys():
    for length in results[dist].keys():
        algo = FullBanditExp3()
        sequence = sample_sequences[dist][length]
        algo.set_constants(np.random.default_rng(), sample_sequences[dist][length])
        print("\n")
        print(length, "d:", sequence.d, "K:", sequence.actionset.K, "m:", sequence.actionset.m)
        #print(f"{sequence.K}/{algo.beta}*{algo.gamma}*{sequence.lambda_min}")
        print("gamma", results[dist]["100000"]["FullBanditExp3Inv"]["gamma"][0], "M", algo.M)
        for algo_name in results[dist][length].keys():
            single_result = results[dist][length][algo_name]
            print(algo_name, "regret:", np.average(single_result["regret"]), np.std(single_result["regret"]), np.average(single_result["time_elapsed"]))




100000 d: 12 K: 3 m: 2
gamma 0.11655959996928597 M 10610
FullBanditExp3Inv regret: 2687.0 93.41605857667085 12933.105047798157
NonContextualExp3 regret: 9144.08 185.0956336600083 1361.9169422340392
OnePerContext regret: 5033.68 2641.1156691065235 1540.1890019702912
UniformRandom regret: 13384.96 184.27598432785538 40.743307476043704


100000 d: 12 K: 5 m: 2
gamma 0.2178502392089596 M 9462
FullBanditExp3Inv regret: 7155.92 179.2745202196899 15961.52943985939
NonContextualExp3 regret: 18183.84 317.68130949113134 2538.1618492412567
OnePerContext regret: 9046.64 4977.750728029679 2745.829208469391
UniformRandom regret: 23926.36 250.44278867637615 43.729151468276974


100000 d: 12 K: 8 m: 2
gamma 0.3314942594617173 M 9949
FullBanditExp3Inv regret: 12853.0 204.47435046968604 23711.073092308045
NonContextualExp3 regret: 25138.92 303.53950912525374 2820.574728050232
OnePerContext regret: 13757.56 6923.16589476231 2255.8537255859374
UniformRandom regret: 30055.72 159.75068575752658 36.1630886

In [None]:
fig, axs = plt.subplots(figsize=(15, 15))

for algo in algo_names:
    data = np.loadtxt(f"output/BinaryContext1;3_IndependentBernoulli5;3/10000/0/{algo}_probability_array.csv")[:, 0]
    axs.plot(data, label=algo)

fig.legend()
fig.show()

In [None]:
for name in ["FullBanditExp3"]:
    for length in results[name].keys():
        single_result = results[name][length]
        sequence = single_result["sequences"][0]

        theo = theoretical_bound(int(length), sequence)
        print(name, length, "regret:", np.average(single_result["regret"]), f"({theo:.2f})")

        data = single_result["probability_array"][:, :, 0]

        data1 = data[:, sequence.contexts[:-1, 0].astype(bool)]
        data2 = data[:, ~sequence.contexts[:-1, 0].astype(bool)]

        dataframe = pd.DataFrame(data1).melt()
        sns.lineplot(data=dataframe, x="variable", y="value", label="context 1")
        dataframe = pd.DataFrame(data2).melt()
        sns.lineplot(data=dataframe, x="variable", y="value", label="context 2")

