# Adversarial thresholding semi-bandit algorithms

## Import libraries

In [None]:
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import colors
import time
%matplotlib inline

## Import ALICE Transition Radiation Detector Anode current dataset (normalised)

In [None]:
Yc_ = np.load(os.getcwd()+'/Yc_.npz')
Yc_ = Yc_['arr_0']
T = np.shape(Yc_)[0]
K = np.shape(Yc_)[1]

## Exponentially-weighted adversarial thresholding semi-bandits
Fixed threshold setting
Experiments repeated ten times

In [None]:
%%time
exp_num = -1
theta = 12/np.float(15)
Threshold_interval = 0
for _ in range(10):
    if Threshold_interval == 0:
        exp_num += 1
        if exp_num == 0:
            FPRs = []
            FNRs = []
            FDRs = []

        TExp3M = 0

        if TExp3M == 1:
            dLETExp3M = 0 # switch to 1 if using dLETExp3M
        else:
            dLETExp3M = 1

        Cum_Arms_pulled = np.zeros([T,K])

        theta = 12/np.float(15)

        Weights = np.ones([T,K])
        Arms = range(K)
        Pulling_probs = np.zeros([T,K])
        #Pulling_probs[0,:] = np.random.rand(K)
        Rewards = np.zeros([T,K])
        Pulled_arms = [[] for t in range(T)]
        Rewarded_arms = [[] for t in range(T)]
        #eta = 0.001
        #eta = 0.005
        eta = 0.01
        #eta = 0.05
        #eta = 0.1
        beta = 10**(-8)
        max_weight = 10**20
        min_weight = 10**(-20)
        X_ = np.zeros([T,K])

        epsilon_ = 1 # label efficient parameter
        epsilon = np.ones(K)*epsilon_

        umicron = 0.001
        e_rate = 0.9
        Threshold_list_old = []
        Arms_pulled = np.zeros([T,K])
        Prev_x = np.zeros(K)
        
        Time = np.zeros(T)
        prev_time = time.time()
        
        for t in range(1,T-1):
            print('Round', t)
            #time_ = time.time()
            #Time[t] = time_ - prev_time
            #prev_time = time_
            #print('Time', Time[t])
            if TExp3M == 1:
                kc = np.int(len(np.where(Yc_[t,:]>theta)[0]))
            else:
                kc = 20

            Weights[t,np.where(Weights[t,:]>max_weight)[0]] = max_weight
            Weights[t,np.where(Weights[t,:]<min_weight)[0]] = min_weight
            weight_sum = np.sum(Weights[t,:])
            Pulling_probs[t,:] = (1-eta)*(Weights[t,:] / np.float(weight_sum)) + eta / np.float(K)

            Order = np.random.choice(Arms, kc, p=Pulling_probs[t,:], replace=False)
            Order = np.setdiff1d(Order,Threshold_list_old)
            All_arms = np.append(Order,Threshold_list_old)
            Pulled_arms[t] = All_arms.astype(int)

            X_[t,:] = Yc_[t,:]

            Weights[t+1,:] = Weights[t,:]
            Threshold_list_new = []
            B = np.random.rand()
            if dLETExp3M == 1:
                for i in range(len(Threshold_list_old)):
                    if B < epsilon[Threshold_list_old[i]]:
                        Arms_pulled[t,Threshold_list_old[i]] = 1
                        if X_[t,Threshold_list_old[i]] > theta:
                            x_hat = (X_[t,Threshold_list_old[i]] + umicron) / (np.float(Pulling_probs[t,Threshold_list_old[i]])*epsilon[Threshold_list_old[i]])
                            Weights[t+1,Threshold_list_old[i]] = Weights[t,Threshold_list_old[i]] * np.exp((eta*kc/np.float(K))*x_hat)
                            Rewards[t,Threshold_list_old[i]] = 1
                            Threshold_list_new.append(Threshold_list_old[i])
                            epsilon[Threshold_list_old[i]] = epsilon_
                        else:
                            x_hat = 0
                            Weights[t+1,Threshold_list_old[i]] = Weights[t,Threshold_list_old[i]] * beta * np.exp(x_hat-theta)
                            if dLETExp3M == 1:
                                epsilon[Threshold_list_old[i]] = e_rate*epsilon[Threshold_list_old[i]]
                            Rewards[t,Threshold_list_old[i]] = 0  
                    else:
                        Pulled_arms[t] = np.setdiff1d(Pulled_arms[t],Threshold_list_old[i])

            for i in range(len(Order)):
                if B < epsilon[Order[i]]:
                    Arms_pulled[t,Order[i]] = 1
                    if X_[t,Order[i]] > theta:
                        x_hat = (X_[t,Order[i]] + umicron) / (np.float(Pulling_probs[t,Order[i]])*epsilon[i])
                        Weights[t+1,Order[i]] = Weights[t,Order[i]] * np.exp((eta*kc/np.float(K))*x_hat)
                        Rewards[t,Order[i]] = 1
                        Threshold_list_new.append(Order[i])
                        if dLETExp3M == 1:
                                epsilon[Order[i]] = epsilon_
                    else:
                        x_hat = umicron / (np.float(Pulling_probs[t,Order[i]])*epsilon[i])
                        Weights[t+1,Order[i]] = Weights[t,Order[i]] * np.exp((eta*kc/np.float(K))*x_hat)
                        if dLETExp3M == 1:
                            epsilon[Order[i]] = e_rate*epsilon[Order[i]]
                        Rewards[t,Order[i]] = 0 
                else:
                    Pulled_arms[t] = np.setdiff1d(Pulled_arms[t],Order[i])
                    Rewards[t,Order[i]] = 0

            arms_pulled = np.where(Arms_pulled[t,:]==1)
            if len(arms_pulled) > 1:
                arms_pulled = arms_pulled[1]
            #print('Round {}, Arms pulled {}'.format(t,sorted(Pulled_arms[t])))
            if dLETExp3M == 1:
                Threshold_list_old = Threshold_list_new
            rewarded_arms = np.where(Rewards[t,:]==1)
            if len(rewarded_arms) > 1:
                rewarded_arms = rewarded_arms[1]
            Rewarded_arms[t] = rewarded_arms
        Cum_Arms_pulled = Cum_Arms_pulled + Arms_pulled
     
        tp = 0
        fp = 0
        tn = 0
        fn = 0
        for t in range(T):
            arms_pulled = np.where(Arms_pulled[t]==1)[0]
            arms_not_pulled = np.where(Arms_pulled[t]==0)[0]
            for arm in arms_pulled:
                if Yc_[t,arm] > theta:
                    tp += 1
                else:
                    fp += 1
            for arm in arms_not_pulled:
                if Yc_[t,arm] > theta:
                    fn += 1
                else:
                    tn += 1
        FPR = fp / np.float(fp + tn)
        FNR = fn / np.float(fn + tp)
        FDR = fp / np.float(fp + tp)
        FPRs.append(FPR)
        FNRs.append(FNR)
        FDRs.append(FDR)

        if dLETExp3M == 0:
            print('Average false positive rate for T-Exp3.M is {}'.format(np.mean(FPRs)))
            print('Average false negative rate for T-exp3.M is {}'.format(np.mean(FNRs)))
            print('Average false discovery rate for T-Exp3.M is {}'.format(np.mean(FDRs)))
        else:
            print('Average false positive rate for dLET-Exp3.M is {} with epsilon {}, k = {}'.format(np.mean(FPRs),epsilon_, kc))
            print('Average false negative rate for dLET-Exp3.M is {} with epsilon {}'.format(np.mean(FNRs),epsilon_))
            print('Average false discovery rate for dLET-Exp3.M is {} with epsilon {}'.format(np.mean(FDRs),epsilon_))

        Regret = np.zeros(T)
        for t in range(T):
            opt_sum = np.sum(Yc_[t,np.where(Yc_[t,:]>theta)[0]])
            act_sum = np.sum(Yc_[t,np.where(Arms_pulled[t,:]==1)[0]]*((Yc_[t,np.where(Arms_pulled[t,:]==1)[0]] - theta)>0))
            Delta = np.abs(len(np.where(Yc_[t,:]>theta)[0]) - len(np.where(Arms_pulled[t,:]==1)[0])) + 1
            Regret[t] = opt_sum - (1/np.float(Delta))*act_sum
        Regret = np.cumsum(Regret)

        if exp_num == 0:
            Exp_regret = np.zeros([T,10])
        Exp_regret[:,exp_num] = Regret
        Mean_regret = np.mean(Exp_regret, axis=1)

        if exp_num == 0:
            Experiments = [[] for i in range(10)]

        max_arm = np.nan
        max_count = 0
        for arm in range(K):
            if np.cumsum(Arms_pulled[:,arm])[-1] > max_count:
                max_arm = arm
                max_count = np.cumsum(Arms_pulled[:,arm])[-1] 

        Cum_arms = np.zeros([T,K])

        for arm in range(K):
            Cum_arms[:,arm] = np.cumsum(Arms_pulled[:,arm])
            
        Experiments[exp_num] = Cum_arms
        print('Max arm:', max_arm)
        print('Experiment number:', exp_num)

In [None]:
np.savez_compressed(os.getcwd()+'/Experiments_fixed_d1k20.npz',Experiments)
np.savez_compressed(os.getcwd()+'/Regret_fixed_d1k20.npz',Exp_regret)

## AliceBandit
Fixed threshold setting

In [None]:
%%time
exp_num = -1
theta = 12/np.float(15)
Threshold_interval = 0
for _ in range(10):
    if Threshold_interval == 0:
        exp_num += 1
        if exp_num == 0:
            FPRs = []
            FNRs = []
            FDRs = []
        Cum_Arms_pulled = np.zeros([T,K])
        i1 = 10
        detection = 0
        print('Experiment {}'.format(i1+1))
        Weights = np.ones([T,K])
        Arms = range(K)
        Pulling_probs = np.zeros([T,K])
        #Pulling_probs[0,:] = 1
        N_mistakes = np.zeros(K)
        time_pulled = np.zeros(K)
        Prev_x = np.zeros(K)
        Rewards = np.zeros([T,K])
        Pulled_arms = [[] for t in range(T)]
        Rewarded_arms = [[] for t in range(T)]
        #eta = 0.001
        #eta = 0.005
        eta = 0.01
        #eta = 0.05
        #eta = 0.1
        beta = 10**(-8)
        max_weight = 10**20
        min_weight = 10**(-20)
        X_ = np.zeros([T,K])
        epsilon_ = 1 # label efficient parameter
        epsilon = np.ones(K)*epsilon_
        umicron = 0.001
        Threshold_list_old = []
        s = 0
        min_n = 1 # minimum number of arms selected each round

        tmax = 60 # maximum number of minutes between pulls for each arm

        t_max = 30*tmax
        Arms_pulled = np.zeros([T,K])
        time_last_pulled = np.zeros(K)
        Normalised_current_thresholds = np.ones(T)*0.2

        X_ = Yc_
        s = 0
        for t in range(1,T-1):
            print('Round', t)
            if np.mod(s,K) == 0:
                s = 0
                A = np.arange(K)
                n_ = np.random.randint(min_n,0.5*K-s)
                s += n_
                C = np.random.choice(A,n_, replace=False)
            else:
                if K-s < 2*min_n:
                    A = np.setdiff1d(A,C)
                    s = 0
                else:
                    A = np.setdiff1d(A,C)
                    n_ = np.random.randint(min_n,K-s)
                    s += n_
                    C = np.random.choice(A,n_, replace=False)

            Order = C
            Order = np.setdiff1d(Order,Threshold_list_old)
            All_arms = np.append(Order,Threshold_list_old)
            Pulled_arms[t] = All_arms.astype(int)
            Pulling_probs[t,:] = Pulling_probs[t-1,:]

            Threshold_list_new = []

            for arm in Threshold_list_old:
                B = np.random.rand()
                if B < Pulling_probs[t,arm]:
                    Arms_pulled[t,arm] = 1
                    time_last_pulled[arm] = t
                    if X_[t,arm] > theta:
                        detection += 1
                        Prev_x[arm] = X_[t,arm]
                        Threshold_list_new.append(arm)
                        Pulling_probs[t,arm] = 1
                        Rewards[t,arm] = 1
                    else:
                        N_mistakes[arm] += 1
                        delta = 0
                        exponent = eta*len(Threshold_list_old)*N_mistakes[arm]/np.float(K) 
                        Pulling_probs[t,arm] = Pulling_probs[t,arm]*np.exp(-exponent)
                        if X_[t,arm] == 0:
                            Pulling_probs[t,arm] = 0
                else:
                    Pulled_arms[t] = np.setdiff1d(Pulled_arms[t],arm)

            for arm in Order:
                B = np.random.rand()
                if B < Pulling_probs[t,arm]:
                    Arms_pulled[t,arm] = 1
                    time_last_pulled[arm] = t
                    if X_[t,arm] > theta:
                        detection += 1
                        Prev_x[arm] = X_[t,arm]
                        Threshold_list_new.append(arm)
                        Pulling_probs[t,arm] = 1
                        Rewards[t,arm] = 1
                    else:
                        N_mistakes[arm] += 1
                        delta = 0
                        exponent = eta*len(Threshold_list_old)*N_mistakes[arm]/np.float(K*(1-delta)) 
                        Pulling_probs[t,arm] = Pulling_probs[t,arm]*np.exp(-exponent)
                        if X_[t,arm] == 0:
                            Pulling_probs[t,arm] = 0
                else:
                    Pulled_arms[t] = np.setdiff1d(Pulled_arms[t],arm)

            for arm in np.intersect1d(np.where(Arms_pulled[t,:]==0)[0],np.where(t-time_last_pulled>t_max)[0]):
                Arms_pulled[t,arm] = 1
                time_last_pulled[arm] = t
                if X_[t,arm] > theta:
                    detection += 1
                    Prev_x[arm] = X_[t,arm]
                    Threshold_list_new.append(arm)
                    Pulling_probs[t,arm] = 1
                    Rewards[t,arm] = 1
                else:
                    N_mistakes[arm] += 1
                    delta = 0
                    exponent = eta*len(Threshold_list_old)*N_mistakes[arm]/np.float(K*(1-delta)) 
                    Pulling_probs[t,arm] = Pulling_probs[t,arm]*np.exp(-exponent)
                    if X_[t,arm] == 0:
                        Pulling_probs[t,arm] = 0

            Threshold_list_old = Threshold_list_new

        tp = 0
        fp = 0
        tn = 0
        fn = 0
        for t in range(T):
            arms_pulled = np.where(Arms_pulled[t]==1)[0]
            arms_not_pulled = np.where(Arms_pulled[t]==0)[0]
            for arm in arms_pulled:
                if Yc_[t,arm] > theta:
                    tp += 1
                else:
                    fp += 1
            for arm in arms_not_pulled:
                if Yc_[t,arm] > theta:
                    fn += 1
                else:
                    tn += 1
        FPR = fp / np.float(fp + tn)
        FNR = fn / np.float(fn + tp)
        FDR = fp / np.float(fp + tp)
        FPRs.append(FPR)
        FNRs.append(FNR)
        FDRs.append(FDR)

        print('Average false positive rate for AliceBandit is {} with tmax {}'.format(np.mean(FPRs),tmax))
        print('Average false negative rate for AliceBandit is {} with tmax {}'.format(np.mean(FNRs),tmax))
        print('Average false discovery rate for AliceBandit is {} with tmax {}'.format(np.mean(FDRs),tmax))

        Regret = np.zeros(T)
        for t in range(T):
            opt_sum = np.sum(Yc_[t,np.where(Yc_[t,:]>theta)[0]])
            act_sum = np.sum(Yc_[t,np.where(Arms_pulled[t,:]==1)[0]]*((Yc_[t,np.where(Arms_pulled[t,:]==1)[0]] - theta)>0))
            Delta = np.abs(len(np.where(Yc_[t,:]>theta)[0]) - len(np.where(Arms_pulled[t,:]==1)[0])) + 1
            Regret[t] = opt_sum - (1/np.float(Delta))*act_sum
        Regret = np.cumsum(Regret)

        if exp_num == 0:
            Exp_regret = np.zeros([T,10])
        Exp_regret[:,exp_num] = Regret
        Mean_regret = np.mean(Exp_regret, axis=1)

        if exp_num == 0:
            Experiments = [[] for i in range(10)]

        max_arm = np.nan
        max_count = 0
        for arm in range(K):
            if np.cumsum(Arms_pulled[:,arm])[-1] > max_count:
                max_arm = arm
                max_count = np.cumsum(Arms_pulled[:,arm])[-1] 

        Cum_arms = np.zeros([T,K])

        for arm in range(K):
            Cum_arms[:,arm] = np.cumsum(Arms_pulled[:,arm])
            
        Experiments[exp_num] = Cum_arms
        print('Max arm:', max_arm)
        print('Experiment number:', exp_num)

In [None]:
np.savez_compressed(os.getcwd()+'/Experiments_fixed_A60.npz',Experiments)
np.savez_compressed(os.getcwd()+'/Regret_fixed_A60.npz',Exp_regret)

## Reward under threshold interval setting

In [None]:
omega = 0.01
theta_interval = 0.2
Prev_opt = [[0] for arm in range(K)]
for arm in range(K):
    for t in range(T):
        if np.abs(Yc_[Prev_opt[arm][-1],arm] - Yc_[t,arm]) > theta_interval:
            Prev_opt[arm].append(t)

P_image = np.zeros([T,K])
for arm in range(K):
    for i in range(1,len(Prev_opt[arm])):
        P_image[Prev_opt[arm][i]:,arm] = Prev_opt[arm][i]

P_image = P_image.astype(int)

r = np.zeros([T,K])
for arm in range(K):
    for t in range(T):
        val = np.max([0,Yc_[P_image[t,arm],arm] - theta_interval])
        r[t,arm] = val*np.exp(-(omega/np.float(K))*(t-P_image[t,arm]))

opt_ = np.sum(r[:,[3,4,27]], axis=1)

## Exponentially-weighted adversarial thresholding semi-bandits
Threshold interval setting

In [None]:
%%time
exp_num = -1
theta_interval = 0.2
Threshold_interval = 1
Normalised_current_thresholds = np.ones(T)*0.2
for _ in range(10):
    if Threshold_interval == 1:
        exp_num += 1
        if exp_num == 0:
            FPRs = []
            FNRs = []
            FDRs = []

        TExp3M = 0

        if TExp3M == 1:
            dLETExp3M = 0 # switch to 1 if using dLETExp3M
        else:
            dLETExp3M = 1

        Cum_Arms_pulled = np.zeros([T,K])

        theta = 12/np.float(15)

        Weights = np.ones([T,K])
        Arms = range(K)
        Pulling_probs = np.zeros([T,K])
        #Pulling_probs[0,:] = np.random.rand(K)
        Rewards = np.zeros([T,K])
        Pulled_arms = [[] for t in range(T)]
        Rewarded_arms = [[] for t in range(T)]
        #eta = 0.001
        #eta = 0.005

        eta = 0.001
        omega = 0.01

        #eta = 0.05
        #eta = 0.1
        beta = 10**(-8)
        max_weight = 10**20
        min_weight = 10**(-20)
        X_ = np.zeros([T,K])

        epsilon_ = 1 # label efficient parameter

        epsilon = np.ones(K)*epsilon_
        umicron = 0.001
        e_rate = 0.9
        Threshold_list_old = []
        Arms_pulled = np.zeros([T,K])
        Prev_x = np.zeros(K)
        Prev_t = np.zeros(K)

        for t in range(1,T-1):
            print('Round', t)
            if TExp3M == 1:
                kc = 3
            else:
                kc = 5

            Weights[t,np.where(Weights[t,:]>max_weight)[0]] = max_weight
            Weights[t,np.where(Weights[t,:]<min_weight)[0]] = min_weight
            weight_sum = np.sum(Weights[t,:])
            Pulling_probs[t,:] = (1-eta)*(Weights[t,:] / np.float(weight_sum)) + eta / np.float(K)

            Order = np.random.choice(Arms, kc, p=Pulling_probs[t,:], replace=False)
            Order = np.setdiff1d(Order,Threshold_list_old)
            All_arms = np.append(Order,Threshold_list_old)
            Pulled_arms[t] = All_arms.astype(int)

            X_[t,:] = Yc_[t,:]

            Weights[t+1,:] = Weights[t,:]
            Threshold_list_new = []
            B = np.random.rand()
            if dLETExp3M == 1:
                for i in range(len(Threshold_list_old)):
                    if B < epsilon[Threshold_list_old[i]]:
                        Arms_pulled[t,Threshold_list_old[i]] = 1
                        if np.abs(Prev_x[Threshold_list_old[i]]-X_[t,Threshold_list_old[i]]) > theta_interval:
                            x = np.abs(Prev_x[Threshold_list_old[i]]-X_[t,Threshold_list_old[i]])
                            x_hat = (X_[t,Threshold_list_old[i]] + umicron) / (np.float(Pulling_probs[t,Threshold_list_old[i]])*epsilon[Threshold_list_old[i]])
                            Weights[t+1,Threshold_list_old[i]] = Weights[t,Threshold_list_old[i]] * np.exp((eta*kc/np.float(K))*x_hat)
                            Threshold_list_new.append(Threshold_list_old[i])
                            epsilon[Threshold_list_old[i]] = epsilon_
                            Prev_x[Threshold_list_old[i]] = X_[t,Threshold_list_old[i]]
                            Prev_t[Threshold_list_old[i]] = t
                            Rewards[t,Threshold_list_old[i]] = X_[t,Threshold_list_old[i]]
                        else:
                            x_hat = 0
                            Weights[t+1,Threshold_list_old[i]] = Weights[t,Threshold_list_old[i]] * beta * np.exp(x_hat-Normalised_current_thresholds[0])
                            if dLETExp3M == 1:
                                epsilon[Threshold_list_old[i]] = e_rate*epsilon[Threshold_list_old[i]]
                            Rewards[t,Threshold_list_old[i]] = Prev_x[arm]*np.exp(-(omega/np.float(K))*(t-Prev_t[Threshold_list_old[i]])) 
                    else:
                        Pulled_arms[t] = np.setdiff1d(Pulled_arms[t],Threshold_list_old[i])

            for i in range(len(Order)):
                if B < epsilon[Order[i]]:
                    Arms_pulled[t,Order[i]] = 1
                    if np.abs(Prev_x[Order[i]]-X_[t,Order[i]]) > theta_interval:
                        x = np.abs(Prev_x[Order[i]]-X_[t,Order[i]])
                        x_hat = (X_[t,Order[i]] + umicron) / (np.float(Pulling_probs[t,Order[i]])*epsilon[i])
                        Weights[t+1,Order[i]] = Weights[t,Order[i]] * np.exp((eta*kc/np.float(K))*x_hat)
                        Threshold_list_new.append(Order[i])
                        Prev_x[Order[i]] = X_[t,Order[i]]
                        Prev_t[Order[i]] = t
                        Rewards[t,Order[i]] = X_[t,Order[i]]
                        if dLETExp3M == 1:
                                epsilon[Order[i]] = epsilon_
                    else:
                        x_hat = umicron / (np.float(Pulling_probs[t,Order[i]])*epsilon[i])
                        Weights[t+1,Order[i]] = Weights[t,Order[i]] * np.exp((eta*kc/np.float(K))*x_hat)
                        if dLETExp3M == 1:
                            epsilon[Order[i]] = e_rate*epsilon[Order[i]]
                        Rewards[t,Order[i]] = Prev_x[arm]*np.exp(-(omega/np.float(K))*(t-Prev_t[Order[i]]))
                else:
                    Pulled_arms[t] = np.setdiff1d(Pulled_arms[t],Order[i])
                    Rewards[t,Order[i]] = 0

            arms_pulled = np.where(Arms_pulled[t,:]==1)
            if len(arms_pulled) > 1:
                arms_pulled = arms_pulled[1]
            #print('Round {}, Arms pulled {}'.format(t,sorted(Pulled_arms[t])))
            if dLETExp3M == 1:
                Threshold_list_old = Threshold_list_new
            rewarded_arms = np.where(Rewards[t,:]==1)
            if len(rewarded_arms) > 1:
                rewarded_arms = rewarded_arms[1]
            Rewarded_arms[t] = rewarded_arms
        Cum_Arms_pulled = Cum_Arms_pulled + Arms_pulled

        tp = 0
        fp = 0
        tn = 0
        fn = 0
        for t in range(T):
            arms_pulled = np.where(Arms_pulled[t]==1)[0]
            arms_not_pulled = np.where(Arms_pulled[t]==0)[0]
            for arm in arms_pulled:
                if arm == 3 or arm == 4 or arm == 27:
                    tp += 1
                else:
                    fp += 1
            for arm in arms_not_pulled:
                if arm == 3 or arm == 4 or arm == 27:
                    fn += 1
                else:
                    tn += 1
        FPR = fp / np.float(fp + tn)
        FNR = fn / np.float(fn + tp)
        FDR = fp / np.float(fp + tp)
        FPRs.append(FPR)
        FNRs.append(FNR)
        FDRs.append(FDR)

        if dLETExp3M == 0:
            print('Average false positive rate for T-Exp3.M is {}'.format(np.mean(FPRs)))
            print('Average false negative rate for T-exp3.M is {}'.format(np.mean(FNRs)))
            print('Average false discovery rate for T-Exp3.M is {}'.format(np.mean(FDRs)))
        else:
            print('Average false positive rate for dLET-Exp3.M is {} with epsilon {}, k = {}'.format(np.mean(FPRs),epsilon_, kc))
            print('Average false negative rate for dLET-Exp3.M is {} with epsilon {}'.format(np.mean(FNRs),epsilon_))
            print('Average false discovery rate for dLET-Exp3.M is {} with epsilon {}'.format(np.mean(FDRs),epsilon_))

        if exp_num == 0:
            Experiments = [[] for i in range(10)]
            Cum_rewards = [[] for i in range(10)]

        Rewards[np.where(Rewards<0)[0],0] = 0
        Delta = (np.abs(3-np.count_nonzero(Arms_pulled==1, axis=1))+1)
        act_ = np.multiply(1/Delta,np.sum(Rewards[:,[3,4,27]], axis=1))
        reward = np.cumsum(act_)

        max_arm = np.nan
        max_count = 0
        for arm in range(K):
            if np.cumsum(Arms_pulled[:,arm])[-1] > max_count:
                max_arm = arm
                max_count = np.cumsum(Arms_pulled[:,arm])[-1] 

        Cum_arms = np.zeros([T,K])
        for arm in range(K):
            Cum_arms[:,arm] = np.cumsum(Arms_pulled[:,arm])

        Experiments[exp_num] = Cum_arms
        Cum_rewards[exp_num] = reward
        print('Experiment number:', exp_num)

In [None]:
np.savez_compressed(os.getcwd()+'/Experiments_interval_d1k05.npz',Experiments)
np.savez_compressed(os.getcwd()+'/Reward_interval_d1k05.npz',Cum_rewards)

## Import ALICE Cosmic Ray Detector simulated dataset

In [None]:
with open('full_track.csv', encoding='UTF-16') as f:
    df = pd.read_csv(f)
df.columns = ['Time','Name','Value']
df['Time'] = pd.to_datetime(df['Time'], format='%Y%m%d_%H%M%S_%f')

df_ICCs = [[] for i in range(8)]
for i in range(8):
    df_ICCs[i] = df.loc[df['Name'] == 'ACO/HV/V0A/SECTOR'+str(i+1)+'/ICC']
    print('ICC {} complete...'.format(i+1))

ICC = [[] for i in range(8)]
for i in range(8):
    ICC[i] = df_ICCs[i]['Value'].values

data = np.array(ICC)
data = np.transpose(data)
data = data[:50000,:]

max_c = 50
min_c = 9
diff_c = max_c - min_c
data1 = data
for arm in range(K):
    for t in range(T):
        data1[t,arm] = (data1[t,arm] - min_c) / np.float(diff_c)

x1 = np.zeros(50000)
x1[:10000] = data[:10000,4]
x1[10000:] = data[10000:,4] + 0.21

x2 = np.zeros(50000)
intervals = np.arange(0,40000,10000)
for i in range(len(intervals)):
    x2[intervals[i]:intervals[i]+5000] = data[:5000,4]
    x2[intervals[i]+5000:intervals[i]+10000] = data[:5000,4]+0.5

Data = np.zeros([T,10])
Data[:,2:] = data
Data[:,0] = x1
Data[:,1] = x2

In [None]:
T = Data.shape[0]
K = Data.shape[1]