<h1> PAC- Median Elimination </h1>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

In [4]:
def pac(k,eps,delta,mu='random'):
    
    #k=number of arms
    #iterations= number of pulls of the arms
    #mu=an array of length k, which holds the true expected value of each arm
    #mu="random"-> the expected value is sampled from a normal distribution with standard deviation 1 (default)
    #mu=user defined-> the user sends the true expected values of each arm
    #eps=the solution is an eps optimal arm (defines the aprroximity part)
    #delta=the solution is an optimal solution with a probability of 1-delta (defines the probability part)
    
    if mu=="random":
        mu=np.random.normal(0,1,k) #now mu is an array containing the true expected values of each arm
    if len(mu)!=k: #case wwhen mu is user defined
        print("The length of the entered array of true expected values does not match with the number of arms entered \n")
        return
    
    q=np.zeros(k) #the array of the estimated expected values of each arm
    arm_pull_number=np.zeros(k) #stores the number of times each arm has been pulled till the current pull
    reward=0 #the reward earned at current pull
    a=0 #the arm pulled 

    #the number of times each arm has to be sampled
    l=round((4/(eps**2))*np.log((2*k)/delta))
    
    for a in tqdm(range(k)):
        for sample in range(l):
            reward=np.random.normal(mu[a],1) #the reward is sampled from the normal distribution with mean equalling the true expected value of arm and a std-dev of 1  
            
            #updating counts
            arm_pull_number[a]+=1
            
            #updating the estimated value of arm a pulled at the current iteration
            q[a]=((q[a]*(arm_pull_number[a]-1))+reward)/arm_pull_number[a]
        
            
    optimal_arm=np.argmax(q)        
    
    #returns the epsilon optimal arm with a prob of (1-delta) 
    #returns the estimated expected values of the arms after all the pulls 
    #the number of samples drawn for each arm
    
    return optimal_arm,q,l

In [315]:
def pac_median_elimination(k,eps,delta,mu='random'):
    
    #k=number of arms
    #iterations= number of pulls of the arms
    #mu=an array of length k, which holds the true expected value of each arm
    #mu="random"-> the expected value is sampled from a normal distribution with standard deviation 1 (default)
    #mu=user defined-> the user sends the true expected values of each arm
    #eps=the solution is an eps optimal arm (defines the aprroximity part)
    #delta=the solution is an optimal solution with a probability of 1-delta (defines the probability part)
    
    if mu=="random":
        mu=np.random.normal(0,1,k) #now mu is an array containing the true expected values of each arm
    if len(mu)!=k: #case wwhen mu is user defined
        print("The length of the entered array of true expected values does not match with the number of arms entered \n")
        return
    
    #initially all arms are present
    q=np.zeros(k) #the array of the estimated expected values of each arm which has not been eliminated
    arm_pull_number=np.zeros(k) #stores the number of times each arm has been pulled till the current pull
    reward=0 #the reward earned at current pull
    a=0 #the arm pulled 
    
    eps_l=eps/4
    delta_l=delta/2
    #initially no arm has been eliminated
    arm_present=[i for i in range(k)] #list will contain the arms present after each elimination
    
    total_sample_arm=0 #will indicate at the end how many times the estimated optimal arm was sampled
    
    
    #VVVIMP
    #arm_present will hold the arm numbers of all present arms only
    #q will hold the estimated expected rewards of the corresponding arms in arm_present

    
    while(len(arm_present)!=1): #loop until only one arm is present
        
        l=round((1/(eps_l/2)**2)*np.log(3/delta_l)) #number of times each arm has to be sampled at the current iteration
        total_sample_arm+=l
        
        for a in tqdm(range(len(arm_present))): #for each arm in the list of non eliminated arms
            for sample in range(l):
                reward=np.random.normal(mu[a],1) #the reward is sampled from the normal distribution with mean equalling the true expected value of arm and a std-dev of 1  
            
                #updating counts
                arm_pull_number[a]+=1
            
                #updating the estimated value of arm a pulled at the current iteration
                q[a]=((q[a]*(arm_pull_number[a]-1))+reward)/arm_pull_number[a]
        
        #median elimination
        med=np.median(q) #median of the estimated expected rewards of alll present arms 
        
        #indices of all arms to be eliminated
        index=[i for i in range(len(arm_present)) if q[i]>med] 

        #eliminating all the arms from the list of present arms whose estimated expected reward is less than the median
        arm_present=[arm_present[i] for i in index]
        arm_pull_number=[arm_pull_number[i] for i in index]
        
        #eliminating the rewards of the eliminated arm
        q=[q[i] for i in index]
        
        #eliminating the true expexted reward of the eliminated arm
        mu=[mu[i] for i in index]
        
        #updating eps_l and delta_l
        eps_l=(3/4)*eps_l
        delta_l=delta_l/2
        
            
    optimal_arm=arm_present[0] #index of the optimal arm
    q=q[0] #estimated expected reward of the optimal arm
    
    #returns the epsilon optimal arm with a prob of (1-delta) 
    #returns the estimated expected values of the arms after all the pulls 
    #the number of samples drawn for each arm
    
    return optimal_arm,q,l

In [316]:
k=10 #number of arms
mu=np.random.normal(0,1,k) #true expected rewards of each arm
optimal_arm_naive,q_naive,samples_drawn_naive=pac(k,0.01,0.1,mu)
optimal_arm_med,q_med,samples_drawn_med=pac_median_elimination(k,0.01,0.1,mu)

  # This is added back by InteractiveShellApp.init_path()
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.07it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [01:59<00:00, 12.00s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:43<00:00, 20.69s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [01:17<00:00, 38.57s/it]


<h1> Quantitative Analysis - Median Sampling vs Pac Bounds </h1>

In [323]:
print("the true expected rewards for each arm: ")
print(mu)
print("\n")
a=np.argmax(mu)
print("the index of the true optimal arm: ",a)

the true expected rewards for each arm: 
[-1.0594003   0.30002799 -1.56002198 -0.25207715 -0.23152665 -1.27870865
 -0.09070675  0.84848132 -0.13511335  1.49404991]


the index of the true optimal arm:  9


In [326]:
print("the estimated expected rewards for the calculated optimal arm: ")
print(q_naive[a])
print("\n")
print("the index of the estimated optimal arm: ",optimal_arm_naive)
print("\n")
print("the difference between the true and the estimated expected reward of the optimal arm: ")
print(abs(q_naive[a]-mu[a]))
print("\n")
print("the number of pulls/samples drawn for each of the ",k," arms: ",samples_drawn_naive)

the estimated expected rewards for the calculated optimal arm: 
1.4922850871326094


the index of the estimated optimal arm:  9


the difference between the true and the estimated expected reward of the optimal arm: 
0.0017648231423570415


the number of pulls/samples drawn for each of the  10  arms:  211933


In [328]:
print("the estimated expected rewards for the calculated optimal arm: ")
print(q_med)
print("\n")
print("the index of the estimated optimal arm: ",optimal_arm_med)
print("\n")
print("the difference between the true and the estimated expected reward of the optimal arm: ")
print(abs(q_med-mu[a]))
print("\n")
print("the number of pulls/samples drawn for each of the ",k," arms: ",samples_drawn_med)

the estimated expected rewards for the calculated optimal arm: 
1.49403518365574


the index of the estimated optimal arm:  9


the difference between the true and the estimated expected reward of the optimal arm: 
1.4726619226523852e-05


the number of pulls/samples drawn for each of the  10  arms:  11085776
