In [401]:
import numpy as np
from multiprocessing import Pool
from functools import partial
import plotly.graph_objects as go
import gzip
import pickle as pkl

import games
import synthetic_data
import random_algo


import cpb_side
import cpb_side_gaussian
import linucb


import PGIDSratio
import synthetic_data
import numpy as np
from torchvision import datasets, transforms

import matplotlib.pyplot as plt


class FakeIds():

    def __init__(self, M, imbalance):

        self.imbalance = imbalance
        self.M = M
        self.dim = len(imbalance)
        self.labels = [i for i in range(self.dim)]
        
        
    def get_contexts(self, horizon, game):

        contexts = np.empty( ( horizon, 10) )
        outcomes = np.zeros( horizon, dtype = int)

        for i in range(horizon):

            label = np.random.choice( self.labels , p = self.imbalance)

            contexts[i] = np.zeros(10)
            pred_label = np.random.choice(  self.labels , p = self.M[label] )
            contexts[i][pred_label] = 1
            
            if pred_label != label:
                outcomes[i] = 1
            else:
                outcomes[i] = 0
                
        return outcomes, contexts

    def get_context(self, ):
        label = np.random.choice( self.labels , p = self.imbalance)

        context = np.zeros(self.dim)
        pred_label = np.random.choice(  self.labels , p = self.M[label] )
        context[pred_label] = 1
            
        if pred_label != label:
            outcome = 1
        else:
            outcome = 0
                
        return outcome, context

class Evaluation:

    def __init__(self, ground_truth, epsilon, n_labels):
        self.ground_truth = ground_truth
        self.epsilon = epsilon
        self.n_labels = n_labels

    def get_feedback(self, game, action, outcome):
        return game.FeedbackMatrix[ action ][ outcome ]

    def eval_policy_once(self, alg, game, context_generator, jobid,):

        np.random.seed(jobid)
        
        alg.reset()

        estimates_history = {}

        t = 0
        latest_estimate = np.ones(len(self.ground_truth)) * 1000
        # print('ground truth', self.ground_truth )
        status = True
        while status == True:
            
            outcome, context = context_generator.get_context() 
            context = context.reshape((-1,1))

            if t % 10000 == 0 and t>0 :
                print(t, 'latest estimate', latest_estimate)

            if t>2 and alg.name == 'randcbpside':
                estimates = []
                for i in range( self.n_labels ):
                    sim = np.zeros( self.n_labels )
                    sim[i] = 1
                    estimate = alg.contexts[1]['weights'] @ sim
                    estimates.append( estimate[0] )
                latest_estimate = estimates

            elif t>2 and alg.name == 'random':
                latest_estimate = alg.weights[:,0]

        
            if ( abs( self.ground_truth - latest_estimate  ) <= self.epsilon ).all() :
                status = False

            # Environment chooses one outcome and one context associated to this outcome
            
            
            # policy chooses one action
            #print('t', t,  'outcome', outcome, 'context', context)
            action = alg.get_action(t, context)

            # print('t', t, 'action', action, 'outcome', outcome, )
            feedback =  self.get_feedback( game, action, outcome )

            alg.update(action, feedback, outcome, t, context )

            t = t+1
            
        return  t 


In [403]:
class Random():

    def __init__(self, game, n_labels):

        self.name = 'random'

        self.n_labels = n_labels
        self.game = game

        self.weights = np.ones( (self.n_labels,2) )
        self.feedbacks = np.zeros( (self.n_labels,2) )
        self.N = np.zeros( self.n_labels )

    def get_action(self, t, context = None ):
        
        pbt = np.ones( self.game.n_actions ) / self.game.n_actions
        action = np.random.choice(self.game.n_actions, 1,  p = pbt )[0]
        return action

    def update(self, action, feedback, outcome, t, context):
        if action == 1:
            idx = np.argmax(context)
            self.feedbacks[idx][feedback] += 1
            self.N[idx] += 1
            estimates = [ self.feedbacks[i] / self.N[i] if self.N[i] !=0 else np.zeros( (1,2) ) for i in range(len(self.N) ) ] 
            self.weights = np.vstack(estimates)

    def reset(self,):
        self.weights = np.ones( (self.n_labels,2) )
        self.feedbacks = np.zeros( (self.n_labels,2) )
        self.N = np.zeros( self.n_labels )


def confusion_matrix(M):

    for i in range(n_labels):

        M[i][i] -= errors[i]
        n_splits = np.random.randint(1, n_labels-1) 

        coefs = np.random.uniform(0, 1, n_splits)
        coefs = coefs / sum(coefs)

        for idx in range(n_splits):

            status = True
            while status == True:
                location = np.random.randint(0, n_labels)
                if location != i:
                    status = False

            M[i][location] += coefs[idx] *  errors[i]

    return M

def truncate(value):
    if value<-1 or value>1:
        status = False
        while status == False:
            value = abs( np.random.normal(0, 0.1 ) )
            if value <1:
                status = True
    return value



def get_ground_truth(M, imbalance):
    # correct probabilities
    probas_correct = np.diag(M) * imbalance

    # correct probabilities
    probas_incorrect = M.copy()
    np.fill_diagonal( probas_incorrect, 0 ) 
    probas_incorrect = np.sum( probas_incorrect.T * imbalance, 1)

    # final probabilites
    final_probas = probas_incorrect / (  probas_correct + probas_incorrect )
    return final_probas

game = games.apple_tasting(False)

n_trials = 3

results = np.zeros( (n_trials, 2) )

for trial in range(n_trials):

    print(trial)

    jobid = trial

    n_labels = np.random.randint(3, 10)
    imbalance = np.array( [ np.random.uniform(50,100) if np.random.uniform(0,1)<0.1 else np.random.uniform(0,25) for _ in range(n_labels) ] )
    imbalance = imbalance / sum(imbalance)

    M = np.identity(n_labels)
    errors = np.array( [ truncate( abs( np.random.normal(0.5, 0.5 ) ) ) if np.random.uniform(0,1)<0.1 else truncate( abs( np.random.normal(0, 0.1 ) ) ) for _ in range(n_labels) ] )
    M = confusion_matrix(M)
    
    ground_truth = get_ground_truth(M, imbalance)
    print('ground truth', ground_truth)

    epsilon = 0.01

    task = Evaluation(ground_truth , epsilon, n_labels ) 
    contexts_generator = FakeIds(M, imbalance )

    for idx, alg in enumerate([ Random(  game, n_labels ),  cpb_side_gaussian.RandCPB_side(game, n_labels, None, 1.01, 0.001, 1/8, 10, False, 10e-7)  ]):

        t = task.eval_policy_once(alg, game, contexts_generator, jobid)
        results[trial][idx] = t

    print(results[trial])
    print()

    

0
ground truth [0.226662   0.         0.08376924]
0
1
[ 350. 4111.]

1
ground truth [0.49551873 0.73787411 0.21653922 0.05315866 0.16082235 0.14327177
 0.6895101 ]
10000 latest estimate [0.48931624 0.73129252 0.22978723 0.05904908 0.14930183 0.13513514
 0.69090909]
0
1
10000 latest estimate [0.4772978959025476, 0.6889952153110046, 0.21969696969696997, 0.055133450224040474, 0.15239096163951704, 0.16286644951140059, 0.7388059701492541]
20000 latest estimate [0.4844006568144515, 0.7008928571428572, 0.2370689655172421, 0.05711800521890402, 0.1567423230974636, 0.17142857142857143, 0.7383720930232559]


In [388]:

# algos = [ random_algo.Random(  game, horizon, ),
#           cpb_side_gaussian.RandCPB_side(game, 10, horizon, 1.01, 0.001, 1/8, 10, False, 10e-7)  ]

# colors = [  [0,0,0],  [250,0,0]  ] 
# labels = [   'random', 'RandCBP'  ]  

# imbalance = np.array( [0.05, 0.05, 0.30, 0.1, 0.1, 0.1, 0.1, 0.1, 0.05, 0.05] )

# M = np.array(  [ [ 0.7,  0,   0,   0,   0,    0,    0.2,  0,    0,     0.1 ], 
#                  [ 0.1,  0.9, 0,   0,   0,    0,    0,    0,    0,     0 ],
#                  [ 0.1,  0,   0.9, 0,   0,    0,    0,    0,    0,     0 ],
#                  [ 0.1,  0,   0,   0.8, 0,    0,    0,    0,    0.05,  0.05 ],
#                  [ 0,    0,   0,   0,   0.99, 0,    0,    0,    0.01,  0 ],
#                  [ 0,    0,   0,   0,   0,    0.96, 0.02, 0,    0.02,  0 ],
#                  [ 0,    0,   0,   0,   0,    0,    0.99, 0,    0,     0.01 ],
#                  [ 0,    0.1, 0.1, 0,   0,    0,    0,    0.70, 0,     0.1 ],
#                  [ 0.1,    0,   0,   0,   0,    0,    0,    0,    0.9,     0 ],
#                  [ 0,    0.2,   0,   0,   0,    0,    0,    0,    0,     0.8 ] ] )  



# fig = go.Figure(    )

# for alg, color, label in list(zip(algos, colors, labels))[::-1]:

# alg = cpb_side_gaussian.RandCPB_side(game, 10, horizon, 1.01, 0.05, 1/8, 10, False, 10e-7)

# alg = 




print(M)

[[8.78536976e-01 0.00000000e+00 0.00000000e+00 6.29953284e-02
  0.00000000e+00 5.14247550e-02 7.04294033e-03 0.00000000e+00
  0.00000000e+00]
 [1.44408088e-03 9.35727069e-01 0.00000000e+00 0.00000000e+00
  2.23233266e-02 0.00000000e+00 6.82135529e-03 3.36841686e-02
  0.00000000e+00]
 [0.00000000e+00 4.02106683e-03 9.91952149e-01 4.02678433e-03
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 9.41885059e-03 9.80702832e-01
  0.00000000e+00 2.38852218e-03 2.50439793e-04 2.22066239e-04
  7.01728933e-03]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  9.12914984e-01 8.70850161e-02 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 1.41487890e-02 1.09375899e-02
  6.17261697e-03 8.89402051e-01 4.12934821e-02 3.80454709e-02
  0.00000000e+00]
 [1.76903342e-03 0.00000000e+00 0.00000000e+00 1.92761815e-02
  0.00000000e+00 9.36267281e-04 9.69009050e-01 0.00000000e+00
  9.00946796e-03]
 [0.00

In [287]:
np.random.normal( 0.5, 0.5 )

0.8421090330435743

In [42]:
a = np.ones( (10,2) )
a[0] / 2

array([0.5, 0.5])

In [36]:
a = np.ones( (10,2) ) 
b = np.ones(  10 ) * 2
a / b[:,None]

array([[0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5]])

In [20]:
import pandas as pd





df = np.vstack( [ np.array([0,1,2,3,4,5,6,7,8,9]),  np.round( final_probas , 2) ] ).T
df = pd.DataFrame(df)
df.columns = ['index', 'ground_truth'] 

for k in estimates_history.keys():
    df[str(k)] = estimates_history[k]

steps = round( (224 - 96) / len(estimates_history.keys() ) )
greys = np.arange(224, 96, -steps)

fig = go.Figure( )

for i , k in enumerate( estimates_history.keys() ):
    col = greys[i]
    fig.add_trace(go.Scatter(x=df['index'], y=df[str(k)], line=dict(color='rgb({},{},{})'.format(col,col,col) ) ,mode='lines+markers',  name='{} rounds'.format(k),   showlegend=True ))

fig.add_trace(go.Scatter(x=df['index'], y=df['ground_truth'], text = df['ground_truth'],  line=dict(color='rgb(255,0,0)' ), textfont=dict( family="sans serif", size=18) ,mode='lines+markers+text',  name='Deployment Error (ground truth)',   showlegend=True ))

fig.update_traces(textposition="top center")

fig.update_layout(yaxis_range=[-0.1,1])

fig.show(legend=True)
# fig.write_image("./estimates_imbalanced.png")



In [26]:
( abs( final_probas - estimates_history[8000] ) <= 0.025 ).all()

False

In [None]:
import gzip
import pickle as pkl

n_cores = 8
n_folds = 25
horizon = 2500

game = games.apple_tasting(False)

algos = [ random_algo.Random(  game, horizon, ),    
          cpb_side.CPB_side(  game, horizon, 1.01, 0.05), 
          cpb_side_gaussian.RandCPB_side(game, horizon, 1.01, 0.05, 1/8, 10, False, 10e-7),
          linucb.LinUCB(game, horizon, 1.01)   ] #cpb_side_gaussian.CPB_side_gauss(game, horizon, 1/16, 10)

colors = [  [0,0,0], [250,0,0], [0,250,0] , [0,0,250]  ] #, [250,0,0]
labels = [  'random', 'CBP-side 0.05',  'RandCPB-side 0.001', 'linUCB' ] #  'Rand CPB-side'

fig = go.Figure( )

for alg, color, label in zip( algos, colors, labels):

    r,g,b = color
    result = evaluate_parallel(n_cores, n_folds, horizon, alg, game, 'easy')
    np.save('./results/apple_tasting/easy_context_{}_{}_{}'.format(horizon,n_folds, label), result)
    regret =  np.mean(result,0) 
    
    xcoords = np.arange(0,horizon,1).tolist()
    std =  np.std(result,0) 
    upper_regret = regret + std

    fig.add_trace(go.Scatter(x=xcoords, y=regret, line=dict(color='rgb({},{},{})'.format(r,g,b)), mode='lines',  name=label )) # 

    fig.add_trace(   go.Scatter( x=xcoords+xcoords[::-1], y=upper_regret.tolist()+regret.tolist()[::-1],  fill='toself', fillcolor='rgba({},{},{},0.2)'.format(r,g,b), 
                         line=dict(color='rgba(255,255,255,0)'),   hoverinfo="skip",  showlegend=False )  )
    
fig.update_layout(legend= dict(yanchor="top",y=0.98,xanchor="left",x=0.77), autosize=False,
                 xaxis_title="Sequence",
                 yaxis_title="Regret",
                 margin=go.layout.Margin( l=0,   r=0,   b=0,    t=0, ), 
                  font=dict(size=13,) )

fig.show(legend=True)

# fig.write_image("./easy_AT_context.pdf")

#fig.update_xaxes(type="log")
# fig.write_image("./easy_AT_context_log.pdf")

fig.show()