In [18]:
from sklearn.linear_model import SGDClassifier
import numpy as np
import collections
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch import Tensor
import geometry
import itertools
from collections import defaultdict
import collections
import target
import controler
import utils
from functools import reduce
import linear_regression
from multiprocessing import Pool
from functools import partial


class SyntheticCase:

    def __init__(self, LossMatrix, FeedbackMatrix, horizon ):
 
        self.LossMatrix = LossMatrix 
        self.FeedbackMatrix = FeedbackMatrix 
        self.horizon = horizon
        self.n_actions = len(self.LossMatrix)

    def parameters_Bianchi(self, C):
        # [Bianchi et al. 2006 "Regret minimization under partial monitoring"]
        eta = 1 / C * pow( np.log( self.n_actions ) / ( self.n_actions * self.horizon ) , 2./3.) 
        gamma = C * pow( ( np.log( self.n_actions ) * self.n_actions **2) / self.horizon , 1./3.)
        return eta, gamma 

    def parameters_Piccolboni(self, ):
        ## [Piccolboni Schindelhauer "Discrete Prediction Games with Arbitrary Feedback and Loss" 2000]
        ## fixed-known-horizon settings
        eta = pow( np.log(self.n_actions), 1./2.) / pow(self.horizon, 1./2.)
        gamma = np.fmin(1., pow(self.n_actions, 1./2.) * pow( np.log(self.n_actions),1./4.) / pow(self.horizon, 1./4.))
        return eta, gamma
        
    def feedexp3(self, method, job_id):

        self.LinkMatrix = np.linalg.lstsq(self.FeedbackMatrix, self.LossMatrix,rcond=None )[0]
        k_star = max( [1, np.fabs(self.LinkMatrix).max() ] )
        C = pow( k_star * np.sqrt(np.exp(1.)-2.), 2./3.)

        w = np.ones(self.n_actions)
        cumRegret = []
        cumAllLosses, cumSufferedLoss = 0 , 0 

        if method == 'Bianchi' :
            eta, gamma = self.parameters_Bianchi(C) 
        else:
            eta, gamma = self.parameters_Piccolboni()

        for t in range(self.horizon):

            pbt = [ (1-gamma) * w[i] / sum(w)  + gamma/self.n_actions for i in range(self.n_actions)]
            action = np.random.choice([0,1], p=pbt )

            outcome = np.random.choice([0,1], p=[0.9, 0.1] )
            feedback = self.FeedbackMatrix[action, outcome]

            for i in range(self.n_actions):
                l_i = self.LinkMatrix[i,action] * feedback / pbt[action]
                w[i] = w[i] * np.exp(-eta * l_i)

            # policy suffers loss and regret
            cumAllLosses += self.LossMatrix[...,outcome]
            cumSufferedLoss += self.LossMatrix[action,outcome]
            cumRegret.append(  cumSufferedLoss - min(cumAllLosses) )

        return np.array(cumRegret)


def eval_policy_parallel(nbCores, nbReps, horizon, method):
    LossMatrix, FeedbackMatrix = np.array([ [1., 0.], [0., 1.] ]) , np.array([ [1., 1.], [1., 0.] ] )
    print("nbCores:", nbCores, "nbReps:", nbReps, "Horizon:", horizon)
    pool = Pool(processes = nbCores) 
    task = SyntheticCase(LossMatrix, FeedbackMatrix, horizon) 
    return np.asarray(  pool.map( partial(task.feedexp3,method), range(nbReps) ) ) 


n_cores = 16
horizon = 1000000
n_folds = 25

result = eval_policy_parallel(n_cores, n_folds, horizon,'Bianchi' )
mean = np.mean(  result,0)
plt.plot( mean, label = 'Bianchi' )
plt.fill_between( range(horizon), mean -  np.std(result, axis=0) / np.sqrt(n_folds), mean +  np.std(result, axis=0) / np.sqrt(n_reps), alpha=0.3, color = 'blue') 

result = eval_policy_parallel(n_cores, n_folds, horizon,'Picolboni' )
mean = np.mean(  result,0)
plt.plot( mean, label = 'Picolboni' )
plt.fill_between( range(horizon), mean -  np.std(result, axis=0) / np.sqrt(n_folds), mean +  np.std(result, axis=0) / np.sqrt(n_reps), alpha=0.3, color = 'orange') 

plt.xlabel('Iteration')
plt.ylabel('Cumulative Regret')
plt.legend()
    

nbCores: 16 nbReps: 25 Horizon: 1000000
