In [3]:
import numpy as np
from multiprocessing import Pool

from functools import partial
import pickle as pkl
import gzip
import os

import games

import TSPM


import subprocess

def evaluate_parallel(nbCores, n_folds, horizon, alg, game, type):
    print("nbCores:", nbCores, "nbFolds:", n_folds, "Horizon:", horizon)
    pool = Pool(processes = nbCores) 
    task = Evaluation(horizon, )

    np.random.seed(1)
    distributions = []

    for jobid in range(n_folds):
        
        if type == 'imbalanced' :
            p = np.random.uniform(0, 0.2) 
        else:
            p = np.random.uniform(0.4,0.5)
        distributions.append( [p, 1-p] )

    return np.asarray(  pool.map( partial( task.eval_policy_once, alg, game ), zip(distributions ,range(n_folds)) ) ) 



class Evaluation:

    def __init__(self, horizon, ):
        self.horizon = horizon

    def get_outcomes(self, game):
        outcomes = np.random.choice( game.n_outcomes , p= list( game.outcome_dist.values() ), size= self.horizon) 
        return outcomes

    def get_feedback(self, game, action, outcome):
        return game.FeedbackMatrix[ action ][ outcome ]

    def eval_policy_once(self, alg, game, job):

        alg.reset()

        distribution, jobid = job

        np.random.seed(jobid)

        outcome_distribution =  {'spam':distribution[0],'ham':distribution[1]}

        game.set_outcome_distribution( outcome_distribution, jobid )
        outcomes = self.get_outcomes(game)

        action_counter = np.zeros( (game.n_actions, self.horizon) )        

        for t in range(self.horizon):

            # policy chooses one action
            action = alg.get_action(t, None)

            # Environment chooses one outcome
            outcome = outcomes[t]

            # print('t', t, 'action', action, 'outcome', outcome, )
            feedback =  self.get_feedback( game, action, outcome )

            alg.update(action, feedback, outcome, None, t)

            for i in range(game.n_actions):
                if i == action:
                    action_counter[i][t] = action_counter[i][t-1] +1
                else:
                    action_counter[i][t] = action_counter[i][t-1]

        regret = np.array( [ game.delta(i) for i in range(game.n_actions) ] ).T @ action_counter

        return regret


In [None]:
import PM_DMED
import bpm

n_cores = 8
n_folds = 96
horizon = 20000

# game = games.apple_tasting(False)
# algos = [ PM_DMED.PM_DMED(  game, horizon, 0) ,bpm.BPM(game,horizon) ]
# labels = [  'PM_DMED', 'BPMLeast'  ]  

# for alg, label in zip( algos, labels):
#     print('AT easy')
#     result1 = evaluate_parallel(n_cores, n_folds, horizon, alg, game, 'imbalanced')
#     np.save('./results/AT/imbalanced_{}_{}_{}'.format(horizon,n_folds, label), result1)
#     print('AT hard')
#     result2 = evaluate_parallel(n_cores, n_folds, horizon, alg, game, 'balanced')
#     np.save('./results/AT/balanced_{}_{}_{}'.format(horizon,n_folds, label), result2)

game = games.label_efficient()
algos = [ PM_DMED.PM_DMED(  game, horizon, 0) ,bpm.BPM(game,horizon) ]
labels = [  'PM_DMED',  ]  #  'BPMLeast' 

for alg, label in zip( algos,  labels):
    print('LE easy')
    result3 = evaluate_parallel(n_cores, n_folds, horizon, alg, game, 'imbalanced')
    np.save('./results/LE/imbalanced_{}_{}_{}'.format(horizon,n_folds, label), result3)
    print('LE hard')
    result4 = evaluate_parallel(n_cores, n_folds, horizon, alg, game, 'balanced')
    np.save('./results/LE/balanced_{}_{}_{}'.format(horizon,n_folds, label), result4)


In [5]:
import TSPM
import cbp

n_cores = 8
n_folds = 96
horizon = 20000

game = games.apple_tasting(False)
algos = [ cbp.CBP(  game, horizon, 1.01) ,TSPM.TSPM_alg(game, horizon, 1), TSPM.TSPM_alg(game, horizon, 0), ]
labels = [  'CBP', 'TSPM_1', 'TSPM_0'  ]  

for alg, label in zip( algos, labels):
    print('AT imbalanced')
    result1 = evaluate_parallel(n_cores, n_folds, horizon, alg, game, 'imbalanced')
    np.save('./results/AT/imbalanced_{}_{}_{}'.format(horizon,n_folds, label), result1)
    print('AT balanced')
    result2 = evaluate_parallel(n_cores, n_folds, horizon, alg, game, 'balanced')
    np.save('./results/AT/balanced_{}_{}_{}'.format(horizon,n_folds, label), result2)

game = games.label_efficient()
algos = [ cbp.CBP(  game, horizon, 1.01) ,TSPM.TSPM_alg(game, horizon, 1), TSPM.TSPM_alg(game, horizon, 0), ]
labels = [  'CBP', ] # 'TSPM_1', 'TSPM_0'   

for alg, label in zip( algos,  labels):
    print('LE imbalanced')
    result3 = evaluate_parallel(n_cores, n_folds, horizon, alg, game, 'imbalanced')
    np.save('./results/LE/imbalanced_{}_{}_{}'.format(horizon,n_folds, label), result3)
    print('LE balanced')
    result4 = evaluate_parallel(n_cores, n_folds, horizon, alg, game, 'balanced')
    np.save('./results/LE/balanced_{}_{}_{}'.format(horizon,n_folds, label), result4)


Restricted license - for non-production use only - expires 2024-10-28
n-actions 2 n-outcomes 2
n-actions 2 n-outcomes 2
AT imbalanced
nbCores: 8 nbFolds: 96 Horizon: 20000
AT balanced
nbCores: 8 nbFolds: 96 Horizon: 20000
AT imbalanced
nbCores: 8 nbFolds: 96 Horizon: 20000
