In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

In [None]:
# Import data
df = pd.read_csv(".../zozo_Context_80items.csv", index_col = 0)
df.head()
 

In [None]:
def CombineNames(N1,N2):
    '''
    Create a tuple of two inputs
    '''
    return (N1,N2)
    
df.item_id = df.item_id-1

## We apply CombineNames to create new arms consisting (item,pos)
df['NArm'] = list(map(CombineNames,df.item_id,df.position))


## In order to keep realtion betwwen actual (item,pos) to new created arm, we use a dictionary
NArmIDDict = dict(zip(df.NArm.unique(),range(len(df.NArm.unique()))))
# Also the filpped version of dictionary for final analysis
NArmIDDictFlipped =  {v: k for k, v in NArmIDDict.items()}

# We create a new column using defined dictionary
df['NArmID'] = df['NArm'].map(NArmIDDict)

In [None]:
def GetPotentialSort(PotentialArmsID, n_max =3):
    '''
    Considering a sorted list of arms [ (item, pos)] based on their score,
       this function returns first ''n_max'' of arms that their "pos" don't overlap each other
    '''
    SelectedAtmsID = []
    StoredArmsID = []
    selected_products = set()
    selected_positions = set()

    while (n_max>0) and len(PotentialArmsID)>0:

        ItemID = PotentialArmsID[0]
        Item = NArmIDDictFlipped[ItemID]

        if (Item[0] not in selected_products) and (Item[1] not in selected_positions):
            SelectedAtmsID.append(ItemID)
            StoredArmsID.append(ItemID)
            selected_products.add(Item[0])
            selected_positions.add(Item[1])
            n_max = n_max-1

        PotentialArmsID = PotentialArmsID[1:]

    return np.array(SelectedAtmsID)



## Random Sampling

In [None]:


def RandomChooseArm( n_arms, n_max = 3):
    ## Random shuffle to choose first 3 arms 

    armsList= list(range(n_arms))
    np.random.shuffle(armsList)

    selectedArms = GetPotentialSort(armsList, n_max =3)

    return selectedArms


def RandomSimulator(NArmIDV, clickV, indices, n_arms, n_steps):
    
    draws = np.zeros((n_steps*3, 3))
    
    k=0
    for i in range(n_steps):
        arms = RandomChooseArm(n_arms)

        for arm in arms:
            armIndex = np.where(NArmIDV==arm)[0][0]
            reward = clickV[armIndex]
            ind = indices[armIndex]

            NArmIDV = np.delete(NArmIDV, armIndex)
            clickV = np.delete(clickV, armIndex)
            indices = np.delete(indices, armIndex)

            draws[k] = np.array([arm, reward, ind])
            k += 1

    return draws

## Thompson Sampling

In [None]:
def THChooseArm(alphas, betas, n_arms, sample_size = 100, n_max = 3):
    # Sampling ""sample_size"" number of alphas and betas for arms using beta distribution
    samples = np.random.beta(alphas, betas, size = (sample_size, n_arms))

    # Averaging values of ""sample_size"" samples
    means = samples.mean(axis = 0)

    # Sorting arms and extracting their indices in the np.array
    meansID = np.argsort(means)[::-1]

    # making sure that the arms don't overlap in their "pos"
    SelectedAtmsID = GetPotentialSort(meansID, n_max = n_max)

    return SelectedAtmsID


def THsimulator(NArmIDV, clickV, indices, n_arms, n_steps, batch_size = 1):

    # Starting with alpha = 1 and beta =1 for all arms
    alphas = np.ones(n_arms)
    betas = np.ones(n_arms)
    
    draws = np.zeros((n_steps * 3, 3))
    
    k = 0
    
    alphas_batch = np.ones(n_arms)
    betas_batch = np.ones(n_arms)
    
    for i in range(n_steps):
        arms = THChooseArm(alphas, betas, n_arms)
        
        for arm in arms:
            armIndex = np.where(NArmIDV == arm)[0][0]
            reward = clickV[armIndex]
            ind = indices[armIndex]

            NArmIDV = np.delete(NArmIDV, armIndex)
            clickV = np.delete(clickV, armIndex)
            indices = np.delete(indices, armIndex)

            draws[k] = np.array([arm, reward, ind])
            k+=1

            alphas_batch[arm] =  alphas_batch[arm  ] + reward
            betas_batch[arm ] =  betas_batch[arm ] + 1 - reward
        
        # Updaring every "batch_size" steps of simulation
        if i % batch_size == 0:
            alphas = alphas_batch
            betas = betas_batch
        
    return alphas, betas, draws

## UCB 

In [None]:
# UCB
def chooseArmUCB(draws, c = 0.1):
    t = len(draws)
    draws_arms = draws[:, 0]
    draws_rews = draws[:, 1]
    
    UCB_indices = []

    # Calculating UCB value for each arm
    for arm in np.unique(draws_arms):
        
        arm_obs = (draws_arms == arm).sum()
        arm_total_rew = (draws_rews[draws_arms == arm]).sum()
        avg_rew = arm_total_rew/arm_obs
        Ut = (np.log(t)/arm_obs) ** 0.5
        UCB = avg_rew + c * Ut + np.random.normal(0, 0.00001) # error for robustness
        
        UCB_indices.append(UCB)

    # making sure that the sorted arms don't overlap in their "pos"
    SelectedAtmsID = GetPotentialSort(np.argsort(UCB_indices)[::-1], n_max = 3)

    return SelectedAtmsID


def simulatorUCB(NArmIDV, clickV, indices, n_arms, n_steps, c = 0.1):
    
    draws = np.zeros((n_steps * 3 + n_arms, 3))
    
    # Initialization of arms
    for arm in range(n_arms):
        armIndex = np.where(NArmIDV == arm)[0][0]
        reward = clickV[armIndex]
        ind = indices[armIndex]

        NArmIDV = np.delete(NArmIDV, armIndex)
        clickV = np.delete(clickV, armIndex)
        indices = np.delete(indices, armIndex)

        draws[arm] = np.array([arm, reward, ind])
        
    k = n_arms
    for i in range(n_arms, n_steps + n_arms):
        arms = chooseArmUCB(draws[:k], c)

        for arm in arms:
            armIndex = np.where(NArmIDV==arm)[0][0]
            reward = clickV[armIndex]
            ind = indices[armIndex]

            NArmIDV = np.delete(NArmIDV, armIndex)
            clickV = np.delete(clickV, armIndex)
            indices = np.delete(indices, armIndex)

            draws[k] = np.array([arm, reward, ind])
            k += 1
        
    return draws

## Simulation

In [None]:
import random
## Setting random states for simulation 
random_states = [2321, 3577, 4722, 1518, 2667, 2110, 3500, 2151, 4197, 3729]

ResultDict = {}
NumSimulation = 10

In [None]:
n_arms= len(df.NArmID.unique())

for I in tqdm(range(10)):
    dfShuffled = df.sample(frac=1, random_state=random_states[I])
    NArmIDV = dfShuffled.NArmID.values # arms
    clickV  = dfShuffled.click.values  # rewards
    indices = dfShuffled.index.values  # sample data indices in actual data

    ResultDict[I] = THsimulator(NArmIDV, clickV, indices, n_arms , n_steps = 10000)


    # ResultDict[I] = RandomSimulator(NArmIDV, clickV, indices, n_arms, n_steps = 10000)


    # ResultDict[I] = simulatorUCB(NArmIDV, clickV, indices, n_arms , n_steps = 10000, c = 0.5)
