In [1]:
import numpy as np
from multiprocessing import Pool
from functools import partial
import plotly.graph_objects as go
import gzip
import pickle as pkl

import games
import synthetic_data
import random_algo


import cpb_side
import cpb_side_gaussian
import linucb


import PGIDSratio
import synthetic_data
import numpy as np
from torchvision import datasets, transforms

import matplotlib.pyplot as plt


class FakeIds():

    def __init__(self, M, imbalance):

        self.imbalance = imbalance
        self.M = M
        
        
    def get_contexts(self, horizon, game):

        contexts = np.empty( ( horizon, 10) )
        outcomes = np.zeros( horizon, dtype = int)

        for i in range(horizon):

            label = np.random.choice( [0,1,2,3,4,5,6,7,8,9] , p = self.imbalance)

            contexts[i] = np.zeros(10)
            pred_label = np.random.choice(  [0,1,2,3,4,5,6,7,8,9] , p = self.M[label] )
            contexts[i][pred_label] = 1
            
            if pred_label != label:
                outcomes[i] = 1
            else:
                outcomes[i] = 0
                
        return outcomes, contexts

    def get_context(self, ):
        label = np.random.choice( [0,1,2,3,4,5,6,7,8,9] , p = self.imbalance)

        context = np.zeros(10)
        pred_label = np.random.choice(  [0,1,2,3,4,5,6,7,8,9] , p = self.M[label] )
        context[pred_label] = 1
            
        if pred_label != label:
            outcome = 1
        else:
            outcome = 0
                
        return outcome, context

class Evaluation:

    def __init__(self, ground_truth, epsilon):
        self.ground_truth = ground_truth
        self.epsilon = epsilon

    def get_feedback(self, game, action, outcome):
        return game.FeedbackMatrix[ action ][ outcome ]

    def get_bandit_feedback(self, game, action, outcome):
        return game.banditFeedbackMatrix[ action ][ outcome ]

    def eval_policy_once(self, alg, game, context_generator, jobid,):

        np.random.seed(jobid)
        
        alg.reset()

        estimates_history = {}

        t = 0
        latest_estimate = np.ones(len(self.ground_truth)) * 1000
        print('ground truth', self.ground_truth )
        status = True
        while status == True:

            outcome, context = context_generator.get_context() 
            context = context.reshape((-1,1))

            print(t,  latest_estimate )

        
            if t>2:
                estimates = []
                for i in range(10):
                    sim = np.zeros(10)
                    sim[i] = 1
                    estimate = alg.contexts[1]['weights'] @ sim
                    estimates.append( estimate[0] )
                latest_estimate = estimates
                if ( abs( self.ground_truth - latest_estimate  ) <= self.epsilon ).all() :
                    status = False

            # Environment chooses one outcome and one context associated to this outcome
            
            
            # policy chooses one action
            #print('t', t,  'outcome', outcome, 'context', context)
            action = alg.get_action(t, context)

            # print('t', t, 'action', action, 'outcome', outcome, )
            feedback =  self.get_feedback( game, action, outcome )

            alg.update(action, feedback, outcome, t, context )

            t = t+1
            
        return  t 


In [6]:
np.sum(M,1)

array([0.3 , 0.1 , 0.1 , 0.2 , 0.01, 0.04, 0.01, 0.3 , 0.1 , 0.2 ])

In [9]:
class Random():

    def __init__(self, game):

        self.game = game
        self.weights = np.ones(2)
        self.feedbacks = np.zeros(2)
        self.N = np.zeros(2)

    def get_action(self, t, context = None ):
        
        pbt = np.ones( self.game.n_actions ) / self.game.n_actions
        action = np.random.choice(self.game.n_actions, 1,  p = pbt )[0]
        return action

    def update(self, action, feedback, outcome, context, t):

        if action == 1:
            self.weights[feedback] = self.symbols[feedback] / self.N[1]
        

    def reset(self,):
        self.weights = np.ones(2)
        self.feedbacks = np.zeros(2)
        self.N = np.zeros(2)


def get_ground_truth(M, imbalance):
    # correct probabilities
    probas_correct = np.diag(M) * imbalance

    # correct probabilities

    probas_incorrect = M.copy()
    np.fill_diagonal( probas_incorrect, 0 ) 
    probas_incorrect = np.sum( probas_incorrect.T * imbalance, 1)

    # final probabilites

    final_probas = probas_incorrect / (  probas_correct + probas_incorrect )
    return final_probas

n_cores = 1
n_folds = 1
horizon = 10000

epsilon = 0.025

game = games.apple_tasting(False)


jobid = 3

# algos = [ random_algo.Random(  game, horizon, ),
#           cpb_side_gaussian.RandCPB_side(game, 10, horizon, 1.01, 0.001, 1/8, 10, False, 10e-7)  ]

colors = [  [0,0,0],  [250,0,0]  ] 
labels = [   'random', 'RandCBP'  ]  

imbalance = np.array( [0.05, 0.05, 0.30, 0.1, 0.1, 0.1, 0.1, 0.1, 0.05, 0.05] )

M = np.array(  [ [ 0.7,  0,   0,   0,   0,    0,    0.2,  0,    0,     0.1 ], 
                 [ 0.1,  0.9, 0,   0,   0,    0,    0,    0,    0,     0 ],
                 [ 0.1,  0,   0.9, 0,   0,    0,    0,    0,    0,     0 ],
                 [ 0.1,  0,   0,   0.8, 0,    0,    0,    0,    0.05,  0.05 ],
                 [ 0,    0,   0,   0,   0.99, 0,    0,    0,    0.01,  0 ],
                 [ 0,    0,   0,   0,   0,    0.96, 0.02, 0,    0.02,  0 ],
                 [ 0,    0,   0,   0,   0,    0,    0.99, 0,    0,     0.01 ],
                 [ 0,    0.1, 0.1, 0,   0,    0,    0,    0.70, 0,     0.1 ],
                 [ 0.1,    0,   0,   0,   0,    0,    0,    0,    0.9,     0 ],
                 [ 0,    0.2,   0,   0,   0,    0,    0,    0,    0,     0.8 ] ] )  

ground_truth = get_ground_truth(M, imbalance)
task = Evaluation(ground_truth , epsilon ) 
contexts_generator = FakeIds(M, imbalance )

fig = go.Figure(    )

# for alg, color, label in list(zip(algos, colors, labels))[::-1]:

# alg = cpb_side_gaussian.RandCPB_side(game, 10, horizon, 1.01, 0.05, 1/8, 10, False, 10e-7)

alg = random_algo.Random(  game, horizon, )

estimates_history = task.eval_policy_once(alg, game, contexts_generator, jobid)

    

ground truth [0.58823529 0.30769231 0.03571429 0.         0.         0.
 0.10810811 0.         0.1509434  0.3442623 ]
0 [1000. 1000. 1000. 1000. 1000. 1000. 1000. 1000. 1000. 1000.]
1 [1000. 1000. 1000. 1000. 1000. 1000. 1000. 1000. 1000. 1000.]
2 [1000. 1000. 1000. 1000. 1000. 1000. 1000. 1000. 1000. 1000.]
3 [1000. 1000. 1000. 1000. 1000. 1000. 1000. 1000. 1000. 1000.]


AttributeError: 'Random' object has no attribute 'contexts'

In [39]:
estimates_history

0

In [20]:
import pandas as pd





df = np.vstack( [ np.array([0,1,2,3,4,5,6,7,8,9]),  np.round( final_probas , 2) ] ).T
df = pd.DataFrame(df)
df.columns = ['index', 'ground_truth'] 

for k in estimates_history.keys():
    df[str(k)] = estimates_history[k]

steps = round( (224 - 96) / len(estimates_history.keys() ) )
greys = np.arange(224, 96, -steps)

fig = go.Figure( )

for i , k in enumerate( estimates_history.keys() ):
    col = greys[i]
    fig.add_trace(go.Scatter(x=df['index'], y=df[str(k)], line=dict(color='rgb({},{},{})'.format(col,col,col) ) ,mode='lines+markers',  name='{} rounds'.format(k),   showlegend=True ))

fig.add_trace(go.Scatter(x=df['index'], y=df['ground_truth'], text = df['ground_truth'],  line=dict(color='rgb(255,0,0)' ), textfont=dict( family="sans serif", size=18) ,mode='lines+markers+text',  name='Deployment Error (ground truth)',   showlegend=True ))

fig.update_traces(textposition="top center")

fig.update_layout(yaxis_range=[-0.1,1])

fig.show(legend=True)
# fig.write_image("./estimates_imbalanced.png")



In [26]:
( abs( final_probas - estimates_history[8000] ) <= 0.025 ).all()

False

In [None]:
import gzip
import pickle as pkl

n_cores = 8
n_folds = 25
horizon = 2500

game = games.apple_tasting(False)

algos = [ random_algo.Random(  game, horizon, ),    
          cpb_side.CPB_side(  game, horizon, 1.01, 0.05), 
          cpb_side_gaussian.RandCPB_side(game, horizon, 1.01, 0.05, 1/8, 10, False, 10e-7),
          linucb.LinUCB(game, horizon, 1.01)   ] #cpb_side_gaussian.CPB_side_gauss(game, horizon, 1/16, 10)

colors = [  [0,0,0], [250,0,0], [0,250,0] , [0,0,250]  ] #, [250,0,0]
labels = [  'random', 'CBP-side 0.05',  'RandCPB-side 0.001', 'linUCB' ] #  'Rand CPB-side'

fig = go.Figure( )

for alg, color, label in zip( algos, colors, labels):

    r,g,b = color
    result = evaluate_parallel(n_cores, n_folds, horizon, alg, game, 'easy')
    np.save('./results/apple_tasting/easy_context_{}_{}_{}'.format(horizon,n_folds, label), result)
    regret =  np.mean(result,0) 
    
    xcoords = np.arange(0,horizon,1).tolist()
    std =  np.std(result,0) 
    upper_regret = regret + std

    fig.add_trace(go.Scatter(x=xcoords, y=regret, line=dict(color='rgb({},{},{})'.format(r,g,b)), mode='lines',  name=label )) # 

    fig.add_trace(   go.Scatter( x=xcoords+xcoords[::-1], y=upper_regret.tolist()+regret.tolist()[::-1],  fill='toself', fillcolor='rgba({},{},{},0.2)'.format(r,g,b), 
                         line=dict(color='rgba(255,255,255,0)'),   hoverinfo="skip",  showlegend=False )  )
    
fig.update_layout(legend= dict(yanchor="top",y=0.98,xanchor="left",x=0.77), autosize=False,
                 xaxis_title="Sequence",
                 yaxis_title="Regret",
                 margin=go.layout.Margin( l=0,   r=0,   b=0,    t=0, ), 
                  font=dict(size=13,) )

fig.show(legend=True)

# fig.write_image("./easy_AT_context.pdf")

#fig.update_xaxes(type="log")
# fig.write_image("./easy_AT_context_log.pdf")

fig.show()