In [None]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import math

plt.style.use('fivethirtyeight')
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

random_seed = 0

In [None]:
# LOAD DATA
opioid_df = pd.read_csv('opioid.csv', encoding='latin1', index_col=False)
opioid_df.patient-= 1 # 0~112

In [None]:
n_visits = 360
n_iterations = 100

reward_history = opioid_df
item_col_name = 'treatment'
visitor_col_name = 'patient'
reward_col_name = 'outcome'

In [None]:
arrival_dates = [0] * n_visits

for index in range(n_visits):
    arrival_dates[index] = random.randrange(210) + 1 # for 30 week

arrival_dates.sort()
update_dates = arrival_dates + 14 * np.ones(len(arrival_dates)) # update after two weeks later.

In [None]:
class ReplaySimulator(object):

    def __init__(self, n_visits, reward_history, n_iterations, random_seed=random_seed):
        np.random.seed(random_seed)
        self.t =  0
        self.reward_history = reward_history
        self.item_col_name = item_col_name
        self.visitor_col_name = visitor_col_name
        self.reward_col_name = reward_col_name
        
        self.n_visits = n_visits
        self.n_iterations = n_iterations 
    
        # items under test
        self.items = self.reward_history['treatment'].unique() # 1,2 (treatment's value)
        self.n_items = len(self.items)
        
        # visitors in the historical reward_history
        self.visitors = self.reward_history['patient'].unique()
        self.n_visitors = len(self.visitors)
        
        self.treatment = reward_history[['treatment','outcome']].groupby(['treatment']).mean().reset_index() # treatment's mean
        self.best_id = int(self.treatment[self.treatment['outcome'] == max(self.treatment['outcome'])]['treatment'].values) # best arm
  
    def reset(self): 
        self.t = 0
        self.n_item_samples = np.zeros(self.n_items)
        self.n_item_rewards = np.zeros(self.n_items)
        self.reward_squared = np.zeros(self.n_items)
        
    def replay(self):
        
        results = []

        for iteration in range(0, self.n_iterations): 
            self.reset() 
            total_rewards = 0 
            unobserved_total_rewad = 0
            total_best_find = 0
            time_reward =  []
           
            for visit in range(0, self.n_visits): 
                self.t = visit + 1
                arrival_date = arrival_dates[visit]
                update_date = update_dates[visit]
                update_reward = 0

                for time in range(1,arrival_date +1):
                    
                    for i,value in enumerate(time_reward):
                        if int(time) == int(value[2]):
                            self.record_result(value[0],value[1],value[3]) #visit, treatment index, reward
                            update_reward += value[3]
                            total_rewards += value[3]
                            time_reward[i] = [0]*4

                item_idx = self.select_item() # treatment index
                item_id = self.items[item_idx] # treatment id

                if int(item_id) == int(self.best_id):
                    best_find = 1
                else:
                    best_find = 0

                total_best_find += best_find
                
                # Bootstrap
                # patient, treatment, outcome
                bootstrap_outcome = self.reward_history[self.reward_history['treatment']==item_id].sample(n=1).to_numpy().reshape(-1)
                visitor_id = bootstrap_outcome[0] # id
                reward = bootstrap_outcome[2] # outcome (Success : 1, Fail : 0)
                reward_vector = [visit,item_idx,update_date,reward]
                            # t, Index of treatment, Update date, Reward 
                time_reward.append(reward_vector)

                unobserved_total_rewad += reward
                
                result = {}
                result['iteration'] = iteration 
                result['visit'] = visit
                result['visitor_id'] = visitor_id #id
                result['arrival_date'] = arrival_date # Arrival date
                result['update_date'] = update_date # Update date

                result['item_id'] = item_id # treatment
                result['unobserved_reward'] = reward # reward
                result['unobserved_total_reward'] = unobserved_total_rewad # reward
                result['unobserved_average_reward'] = unobserved_total_rewad/self.t
                
                result['update_reward'] = update_reward # total cumulative reward
                result['update_total_reward'] = total_rewards # total cumulative reward
                result['update_total_average_reward'] = total_rewards/self.t # total cumulative reward
                
                result['best_find'] = best_find # if they got bupnal then 1, else 0
                result['total_best_find'] = total_best_find
                result['average_best_find'] = total_best_find/self.t
                
                results.append(result)

        return results
                        
    def record_result(self, visit, item_idx, reward): #t, visit, treatment index, reward
        self.n_item_samples[item_idx] += 1

        n = self.n_item_samples[item_idx] 
        self.reward_squared[item_idx] += reward**2
        value = self.n_item_rewards[item_idx]
        
        # update
        new_value = ((n-1)/float(n))*value + (1/float(n)) * reward 
        self.n_item_rewards[item_idx] = new_value 