In [1]:
# import necessary packages
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

import json

In [None]:
# reading JSON from file
f = open("input.json")
data = json.load(f)
f.close()

# reading JSON from a JSON string
js = "<JSON STRING OBJECT PLACEHOLDER>"
data = json.loads(js)

In [None]:
# Global constants.
NUM_ARMS = 8       # The number of arms
# Num_Clusters = 4   # The number of clusters
CONTEXT_DIM = 25   # The dimension of context vectors
T = 20000          # The number of rounds

In [None]:
# context is a randomly selected CONTEXT_DIM dimensional np array from the input data

In [None]:
def train_disjoint_LinUCB(alpha, input_A=None, input_b=None):
    '''
    Input:
        alpha: exloration parameter
        input_A: input A from previous training sessions
        input_b: input b from previous training sessions
    
    Output:
        A: return this parameter so it can be saved and used in later runs if needed
        b: return this parameter so it can be saved and used in later runs if needed
        average_reward: numpy array of shape=(T,), the average reward of the algorithm after each round
        num_pulls: dict, the number of pulls of each arm after T rounds
        
    '''
    # Initialization
    average_reward = np.zeros(T)
    num_pulls = {arm: 0.0 for arm in range(1, NUM_ARMS+1)}
    
    A = {arm: np.identity(CONTEXT_DIM) for arm in range(1, NUM_ARMS+1)}
    if(input_A is not None):
        A = input_A
        
    b = {arm: np.zeros(CONTEXT_DIM) for arm in range(1, NUM_ARMS+1)}
    if(input_b is not None):
        b = input_b
    
    # Interact with the contextual bandit simulator
    for t in range(1, T+1):
        # A context is revealed to the algorithm
        # TODO: REPLACE THIS LINE WITH HOW WE ARE ACCESSING INPUT ENTRIES
        context = env.generate_context()
        
        # Choose arm = argmax UCB[i], with ties broken uniformly random
        UCB = []
        for arm in range(1, NUM_ARMS+1):
            # theta_hat = A[arm]^{-1} b[arm]
            theta_hat = np.linalg.solve(A[arm], b[arm])
            # ucb = theta_hat^T context + alpha * sqrt(context^T A[arm]^{-1} context)
            ucb = np.dot(theta_hat, context) + alpha * np.sqrt(np.dot(np.dot(context, np.linalg.inv(A[arm])), context))
            UCB.append(ucb)
            
        max_ucb = np.max(UCB)
        candidates = [arm for arm in range(1, NUM_ARMS+1) if UCB[arm-1] == max_ucb]
        arm = np.random.choice(candidates)
        
        # A reward is revealed to the algorithm
        # TODO: REPLACE THIS LINE WITH HOW WE ARE GENERATING REWARDS
        r = env.generate_reward(arm, context)
        
        # TO DO:
        # Update A and b based on the context and the reward
        # x x^T results in an d by d matrix so we need to reshape context right????
        A[arm] = A[arm] + ( context.reshape((CONTEXT_DIM,1)) @ context.reshape((1,CONTEXT_DIM)) )
        b[arm] = b[arm] + (r * context)
        
        # TO DO:
        # Update average_reward and num_pulls based on the arm pulled and the reward
        average_reward[t-1] = r if (t==1) else (average_reward[t-1-1]*(t-1) + r) / t
        num_pulls[arm] = num_pulls[arm] + 1
    
    print("------------------------------------------------------------------------------------------------------------------------")
    
    return A, b, average_reward, num_pulls

In [None]:
def get_rec(context, input_A, input_b):
    # Choose arm = argmax UCB[i], with ties broken uniformly random
    UCB = []
    for arm in range(1, NUM_ARMS+1):
        # theta_hat = A[arm]^{-1} b[arm]
        theta_hat = np.linalg.solve(input_A[arm], input_b[arm])
        # ucb = theta_hat^T context + alpha * sqrt(context^T A[arm]^{-1} context)
        ucb = np.dot(theta_hat, context) + alpha * np.sqrt(np.dot(np.dot(context, np.linalg.inv(input_A[arm])), context))
        UCB.append(ucb)
            
    max_ucb = np.max(UCB)
    candidates = [arm for arm in range(1, NUM_ARMS+1) if UCB[arm-1] == max_ucb]
    arm = np.random.choice(candidates)
    
    return arm