# Simulation plans

## Fixed parameters:

9 arms

## different true convention rates:

- case 1: 1 clear winner, [0.20, 0.1, 0.1,0.1, 0.1, 0.1,0.1, 0.1, 0.1]
- case 2: no clear winner, [0.11, 0.1, 0.1,0.1, 0.1, 0.1,0.1, 0.1, 0.1]
- case 3: 2 clear winners but 1 best, [0.20, 0.18, 0.1,0.1, 0.1, 0.1,0.1, 0.1, 0.1]

1. Varying number of batches


2. Varying value of best arm


3. Varying value of the first batch size


## This file is study 1,  clear winner case, no spillover effects

- All the static design and TS algorithm is contained in class MAB.
- Since it takes a long time to run 5000 replications for each experiment, I seperate experiments into different files.

In [1]:
from collections import Counter
import numpy as np
import matplotlib.pyplot as plt
from numpy.core.fromnumeric import repeat
from scipy import stats
import seaborn as sns
import pandas as pd
import seaborn as sns
import scipy.stats as st

In [None]:
def best_coverage(data,n_trials,n_resamples,true_val):
    xb = np.random.choice(data, (n_resamples, n_trials), replace=True)
    low,high=st.t.interval(0.95, len(xb)-1, loc=np.mean(xb), scale=st.sem(xb))
    return np.sum(np.logical_and(true_val >= low, true_val <= high))/n_trials


In [None]:
class MAB:
    def __init__(self, probs, karms, nfirst, nEpisodes):
        '''
        meanings for the parameters:
        probs: True success rate for each arm k in {1, ..., K} 
        kArms: Number of arms to choose among
        nsamples: Number of subjects to test at each time step

        '''
        self.n_obs = 4000
        self.probs = probs
        self.K = karms
        self.n = int((self.n_obs-nfirst)/(nEpisodes-1)) if nEpisodes > 1 else 0
        self.T = nEpisodes
        self.first = nfirst
        self.true_win_arm = np.argmax(np.asarray(probs))

    def best_arm(self, s:np.ndarray, asgn):
        draws = 10000
        # + 1 for a Beta(1, 1) prior
        new_alpha = s+1
        new_beta = asgn-s+1
        selection_table = []
        for i in range(self.K):
            theta = np.random.beta(new_alpha[i], new_beta[i], draws)
            selection_table.append(theta)

        winning_arms = np.argmax(selection_table, axis = 0)
        winning_prob = []
        
#         for i in range(self.K):
#             winning_prob.append(winning_arms.count(i)/draws)
        c = Counter(winning_arms)
        winning_prob = np.zeros(self.K)
        for i in range(self.K):
            winning_prob[i] = c[i]/draws

        return winning_prob


    def reset(self):
        # initialization
        success = np.zeros((self.T, self.K))   # record the success cases of each arm at each time step
        assigned = np.zeros((self.T, self.K)) # record the assigned cases of each arm at each time step
        posterior_probs = 0 # estimated success rate
        winning_probs = np.zeros((self.T, self.K)) # probability to be the true best arm
        # rewards = np.zeros(self.T) # record the average rewards per period
        # regrets = np.zeros(self.T) # record the regrets per period
        return (success, assigned, posterior_probs, winning_probs)


    def do_experiment(self, assignments):
        c = Counter(assignments)
        assigned = [c[i] for i in range(self.K)]
        success = [np.random.binomial(assigned[i], self.probs[i]) for i in range(self.K)]
        return assigned, success


    def Thompson_sampling(self):
        # initialization
        (success, assigned, posterior_probs, winning_probs) = self.reset()

        # for round 1, samples are randomly assigned
        new_assign = np.random.choice(range(self.K), size=self.first)
        assigned[0], success[0]  = self.do_experiment(new_assign)
        posterior_probs = success[0,0]/ assigned[0,0]
        winning_probs[0] = self.best_arm(success[0], assigned[0])
        adj_probs = success[0][0]*self.K/self.first
        # print(adj_probs)
        if self.T > 1:
            count0 = 0
            for i in range(1, self.T):              
                new_assign = np.random.choice(range(self.K), size=self.n, p=list(winning_probs[i-1])) # adaptive design
                new_assigned, new_success = self.do_experiment(new_assign)
                assigned[i] = assigned[i-1]+ new_assigned
                success[i] = success[i-1]+ new_success
                winning_probs[i] = self.best_arm(success[i], assigned[i])
                if winning_probs[i-1,0] == 0:
                    count0 += 1
                else:
                    adj_probs += new_success[0]/winning_probs[i-1,0]/self.n
            posterior_probs = adj_probs / (self.T-count0)
        return (success, assigned, posterior_probs, winning_probs)
    


    def static(self):
        # initialization
        (success, assigned, posterior_probs, winning_probs) = self.reset()

        # for round 1, samples are randomly assigned
        new_assign = np.random.choice(range(self.K), size=self.first).tolist()
        assigned[0], success[0]  = self.do_experiment(new_assign)
        posterior_probs = success[0,0]/ assigned[0,0]
        winning_probs[0] = self.best_arm(success[0], assigned[0])
        adj_probs = success[0][0]*self.K/self.first
        for i in range(1, self.T):
            new_assign = np.random.choice(range(self.K), size=self.n).tolist() # static design
            new_assigned, new_success = self.do_experiment(new_assign)
            assigned[i] = assigned[i-1]+ new_assigned
            success[i] = success[i-1]+ new_success
            winning_probs[i] = self.best_arm(success[i], assigned[i])
            adj_probs += new_success[0]*self.K/self.n
        posterior_probs = adj_probs / self.T


        return (success, assigned, posterior_probs, winning_probs)
        
    def do_replication(self, times, method):
        '''
        parameters:
        final_regrets -- to record the final regret of each replication, in order to compare efficiency
        final_win_arm -- to record the final win arm selected by two methods, in order to compare the accurarcy
        final_win_probs -- to record the final probability of each arm being the best arm, in order to compare the accurarcy
        final_assignment -- to record the final number of assigned subjects to the true best arm, comparing the exploitition
        cum_rewards -- to record the total rewards of every replication
        '''
        # records of the replication
        final_win_arm = np.zeros(times)
        final_win_probs = np.zeros(shape = (times, self.K))
        final_assignment = np.zeros(times) # only record the true best arm
        estimation = np.zeros(times)
        
        if method == 'TS':
            for i in range(times):
                (success, assigned, posterior_probs, winning_probs) = self.Thompson_sampling()
                final_win_arm[i] = np.argmax(winning_probs[-1])
                final_win_probs[i] = winning_probs[-1]
                final_assignment[i] = assigned[-1][self.true_win_arm]
                estimation[i] = posterior_probs
        else:
            for i in range(times):
                (success, assigned, posterior_probs, winning_probs) = self.static()
                final_win_arm[i] = np.argmax(winning_probs[-1])
                final_win_probs[i] = winning_probs[-1]
                final_assignment[i] = assigned[-1][self.true_win_arm]
                estimation[i] = posterior_probs

        ate = np.mean(estimation)
        mse = np.mean(np.square(np.subtract(estimation,self.probs[0])))
        rmse = np.sqrt(mse)
        # rmse = np.std(estimation,0)
        # coverage = best_coverage(estimation, 5000,100,self.probs[0])
        low,high=st.t.interval(0.95, len(estimation)-1, loc=np.mean(estimation), scale=st.sem(estimation))
        win_counts = Counter(final_win_arm)
        estimate = {"best_selected":win_counts[0]/times,"ATE":ate, "RMSE":rmse, "Coverage":(low,high)}
        print(estimate)
        return (final_win_arm, final_win_probs, final_assignment, estimation)


In [None]:
def draw_rep_plots(static_agent_results, ts_agent_results, stages, character):
    # Initialise 
    
    columns = ['Best_Arm','Arm2','Arm3','Arm4','Arm5','Arm6','Arm7','Arm8','Arm9']
    indexs = ["r_"+str(i) for i in stages]
    
    # create dataframe
    ddf1 = pd.DataFrame(ts_agent_results, columns=columns)
    ddf1['periods'] = indexs
    ddf2 = pd.DataFrame(static_agent_results, columns=columns)
    ddf2['periods'] = indexs

    df1 = pd.melt(ddf1, id_vars='periods', value_vars=columns,
             var_name='arms', value_name=character)
    
    df2 = pd.melt(ddf2, id_vars='periods', value_vars=columns,
             var_name='arms', value_name=character)
    
    concatenated = pd.concat([df1.assign(dataset='ts_agent'), df2.assign(dataset='static_agent')])
    
    # plot
    g = sns.FacetGrid(concatenated, col="dataset", margin_titles = True, height=6)
    g.map(sns.boxplot, 'arms', character)
    g.add_legend()

# Clear winner

In [None]:
# experiment 1
k = 9 # number of treatments
probs = [0.2] + [0.1]*8 # true value
np.random.seed(99332)
first = 4000
periods = 1

In [None]:
sim1 = MAB(probs,k,first, periods)
(s1_ts_final_win_arm, s1_ts_final_win_probs, s1_ts_final_assignment, s1_ts_estimation) = sim1.do_replication(5000, "TS")
(s1_st_final_win_arm, s1_st_final_win_probs, s1_st_final_assignment, s1_st_estimation) = sim1.do_replication(5000, "static")

{'best_selected': 1.0, 'ATE': 0.19989398231642014, 'RMSE': 0.018972875446102395, 'Coverage': (0.1993679189470593, 0.20042004568578098)}
{'best_selected': 0.9996, 'ATE': 0.20014875000000001, 'RMSE': 0.021044867842303024, 'Coverage': (0.19956524169619227, 0.20073225830380775)}


In [None]:
# experiment 3
k = 9 # number of treatments
probs = [0.2] + [0.1]*8 # true value
np.random.seed(99332)
first = 2000
periods = 2
sim3 = MAB(probs,k,first, periods)
(s3_ts_final_win_arm, s3_ts_final_win_probs, s3_ts_final_assignment, s3_ts_estimation) = sim3.do_replication(5000, "TS")
# (s3_st_final_win_arm, s3_st_final_win_probs, s3_st_final_assignment, s3_st_estimation) = sim3.do_replication(5000, "static")


{'best_selected': 1.0, 'ATE': 0.19987570954545, 'RMSE': 0.015716320992066563, 'Coverage': (0.19943994790628117, 0.20031147118461884)}


In [None]:
# experiment 4
k = 9 # number of treatments
probs = [0.2] + [0.1]*8 # true value
np.random.seed(99332)
first = 800
periods = 5
sim4 = MAB(probs,k,first, periods)
(s4_ts_final_win_arm, s4_ts_final_win_probs, s4_ts_final_assignment, s4_ts_estimation) = sim4.do_replication(5000, "TS")
# (s4_st_final_win_arm, s4_st_final_win_probs, s4_st_final_assignment, s4_st_estimation) = sim4.do_replication(5000, "static")


{'best_selected': 0.9998, 'ATE': 0.19969761574543823, 'RMSE': 0.012129166978215295, 'Coverage': (0.1993614080435539, 0.20003382344732257)}


In [None]:
# experiment 5
k = 9 # number of treatments
probs = [0.2] + [0.1]*8 # true value
np.random.seed(99332)
first = 400
periods = 10
sim5 = MAB(probs,k,first, periods)
(s5_ts_final_win_arm, s5_ts_final_win_probs, s5_ts_final_assignment, s5_ts_estimation) = sim5.do_replication(5000, "TS")
# (s5_st_final_win_arm, s5_st_final_win_probs, s5_st_final_assignment, s5_st_estimation) = sim5.do_replication(5000, "static")


KeyboardInterrupt: ignored

In [None]:
# experiment 2
first = 200
periods = 20
sim2 = MAB(probs,k,first, periods)
(s2_ts_final_win_arm, s2_ts_final_win_probs, s2_ts_final_assignment, s2_ts_estimation) = sim2.do_replication(5000, "TS")
(s2_st_final_win_arm, s2_st_final_win_probs, s2_st_final_assignment, s2_st_estimation) = sim2.do_replication(5000, "static")


In [None]:
# experiment 7
first = 80
periods = 50
sim7 = MAB(probs,k,first, periods)
(s7_ts_final_win_arm, s7_ts_final_win_probs, s7_ts_final_assignment, s7_ts_estimation) = sim7.do_replication(5000, "TS")
# (s2_st_final_win_arm, s2_st_final_win_probs, s2_st_final_assignment, s2_st_estimation) = sim2.do_replication(5000, "static")

In [None]:
# experiment 6
k = 9 # number of treatments
probs = [0.2] + [0.1]*8 # true value
np.random.seed(99332)
first = 40
periods = 100
sim6 = MAB(probs,k,first, periods)
(s6_ts_final_win_arm, s6_ts_final_win_probs, s6_ts_final_assignment, s6_ts_estimation) = sim6.do_replication(5000, "TS")
# (s6_st_final_win_arm, s6_st_final_win_probs, s6_st_final_assignment, s6_st_estimation) = sim6.do_replication(5000, "static")


  posterior_probs[0] = success[0]/ assigned[0]


{'ATE': array([0.19968857, 0.08504902, 0.08509751, 0.08514692, 0.08507756,
       0.08491323, 0.08443989, 0.08503525, 0.08474891]), 'RMSE': array([0.00705397, 0.03732316, 0.03771846, 0.03682221, 0.03767545,
       0.03750051, 0.03750083, 0.03712019, 0.03775847]), 'Coverage': 0.921}


  posterior_probs[0] = success[0]/ assigned[0]


# Social network

In [None]:
!pip install networkx

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import networkx as nx
from google.colab import drive
drive.mount('/content/drive')
%cd drive/MyDrive

Mounted at /content/drive
/content/drive/MyDrive


In [None]:
G1 =nx.read_edgelist("facebook_combined.txt", create_using = nx.Graph(), nodetype=int)
neigh = [1,20,40,65,75,90,1000]
for i in range(len(neigh)):
    all_neighbors = list(nx.classes.function.all_neighbors(G1,neigh[i]))
    print("All neighbors for Node ", str(neigh[i])," ---> ", str(all_neighbors))

All neighbors for Node  1  --->  [0, 48, 53, 54, 73, 88, 92, 119, 126, 133, 194, 236, 280, 299, 315, 322, 346]
All neighbors for Node  20  --->  [0, 2, 14, 41, 44, 111, 115, 149, 162, 214, 226, 312, 326, 333, 343]
All neighbors for Node  40  --->  [0, 21, 25, 26, 29, 56, 67, 72, 77, 113, 132, 133, 141, 142, 158, 169, 172, 199, 200, 203, 212, 213, 224, 231, 232, 239, 257, 258, 265, 271, 272, 274, 277, 280, 298, 304, 307, 315, 317, 322, 325, 329, 332, 334]
All neighbors for Node  65  --->  [0, 7, 13, 25, 82, 118, 203, 252, 261, 297, 314, 339]
All neighbors for Node  75  --->  [0, 9, 56, 67, 85, 170, 188, 200, 258, 272, 274, 304, 322, 323]
All neighbors for Node  90  --->  [0, 179]
All neighbors for Node  1000  --->  [107, 924, 974, 985, 1010, 1127, 1134, 1228, 1304, 1474, 1640, 1667, 1703, 1725, 1759, 1840]


In [None]:
nx.average_node_connectivity(G1)