In [20]:
%matplotlib inline
import numpy as np
import pandas as pd
from numpy.linalg import inv
import matplotlib.pyplot as plt
from abc import ABC, abstractmethod
import os

In [21]:
os.getcwd()
os.chdir('C:\\python\\YES24')

In [22]:
import seaborn as sns

In [23]:
df = pd.read_csv('context24.csv')

In [24]:
df_train = df[df['month']!=5]
df_test = df[df['month']==5]

MemoryError: Unable to allocate 169. MiB for an array with shape (98, 225811) and data type int64

In [None]:
df.head()

In [19]:
train_arms = df_train.iloc[:,0].astype('int').to_numpy()
train_rewards = df_train.iloc[:,1].astype('float').to_numpy()
train_contexts = df_train.iloc[:,3:].astype('float').to_numpy()

MemoryError: Unable to allocate 676. MiB for an array with shape (96, 922826) and data type int64

In [40]:
test_arms = df_test.iloc[:,0].astype('int').to_numpy()
test_rewards = df_test.iloc[:,1].astype('float').to_numpy()
test_contexts = df_test.iloc[:,3:].astype('float').to_numpy()

In [18]:
train_contexts

NameError: name 'train_contexts' is not defined

In [8]:
class MAB(ABC):
    """
    Abstract class that represents a multi-armed bandit (MAB)
    """
    
    @abstractmethod
    def play(self, tround, context):
        """
        Play a round
        
        Arguments
        =========
        tround : int
            positive integer identifying the round
        
        context : 1D float array, shape (self.ndims * self.narms), optional
            context given to the arms
        
        Returns
        =======
        arm : int
            the positive integer arm id for this round
        """
    
    @abstractmethod
    def update(self, arm, reward, context):
        """
        Updates the internal state of the MAB after a play
        
        Arguments
        =========
        arm : int
            a positive integer arm id in {1, ..., self.narms}
        
        reward : float
            reward received from arm
        
        context : 1D float array, shape (self.ndims * self.narms), optional
            context given to arms
        """

In [9]:
def offlineEvaluate(mab, arms, rewards, contexts, nrounds=None):
    """
    Offline evaluation of a multi-armed bandit
    
    Arguments
    =========
    mab : instance of MAB
    
    arms : 1D int array, shape (nevents,) 
        integer arm id for each event
    
    rewards : 1D float array, shape (nevents,)
        reward received for each event
    
    contexts : 2D float array, shape (nevents, mab.narms*nfeatures)
        contexts presented to the arms (stacked horizontally) 
        for each event.
        
    nrounds : int, optional
        number of matching events to evaluate `mab` on.
    
    Returns
    =======
    out : 1D float array
        rewards for the matching events
    """
    
    # basic input checks to ensure everything works after assertions are passed
    assert isinstance(mab,MAB), 'MAB instance required'
    assert type(arms) is np.ndarray, 'arms should be numpy array'
    assert type(rewards) is np.ndarray, 'rewards should be numpy array'
    assert type(contexts) is np.ndarray, 'contexts should be numpy array'
    if nrounds is not None:
        assert type(nrounds) is int, 'input type int required for nrounds'
        assert nrounds > 0, 'nrounds should be greater than 0'
    assert 'int' in str(arms.dtype), 'arms should be int numpy array'
    assert 'float' in str(rewards.dtype), 'rewards should be float numpy array'
    assert 'float' in str(contexts.dtype), 'contexts should be float numpy array'
    assert arms.ndim == 1, 'arms should be 1D numpy array (nevents,) '
    assert rewards.ndim == 1, 'rewards should be 1D numpy array (nevents,)'
    assert contexts.ndim == 2, 'contexts should be 2D numpy array (nevents, mab.narms*nfeatures)'
    
    ##ndims mab에서 불러오기
    ndims = mab.ndims
    
    # initially empty history
    history = []
    ### mean값 담는 곳
    mean_array = np.zeros(shape=(nrounds, ndims))
    cov_array = np.zeros(shape=(nrounds,ndims,ndims))
    
    
    
    # initially empty payoff
    out = []
    
    # get total number of events
    T = arms.shape[0]
    
    # ensuring there are events to evaluate
    assert T > 0, "There are no events for offline evaluation"
    
    # initilize control parameters
    events_parsed = 0
    all_events_parsed = False
    
    # sequentially parse stream of events
    for t in range(0,T):
        
        # repeat until matching arm is found
        while(True):
            
            # if all events are parsed - stop processing
            if events_parsed == T:
                all_events_parsed = True
                break
            
            # Get next event    
            # get selected arm
            a = arms[events_parsed]
            # get observed payoff for the arm
            r_a = rewards[events_parsed]
            # get observed context for the arm
            context = contexts[events_parsed,:]
            
           
            
            # keeping track of parsed events
            events_parsed += 1
     
            # set required round on the basis of history recorded
            tround = len(history)+1
            # play arm for tround
            arm,mean,cov = mab.play(tround,context)
            mean_array[tround-1] = mean
            cov_array[tround-1] = cov
            # check if arm is matched or not
            if arm == a:
                # matching arm found - update played arm
                mab.update(arm,r_a,context)
                
                break 
        
        # check if all events are parsed       
        if all_events_parsed:
            # stop processing
            break
            
        # record event tuple in history for each matched round
        event = (tround,a,r_a,context)
        history.append(event)      

        # add observed payoff for the matched arm
        out.append(r_a)
        
        # check if required rounds are reached, if provided
        if nrounds is not None and len(history) == nrounds:
            break
    
    # return per round rewards recorded ##mean값 담은 것 추출
    return out,mean_array,cov_array

In [10]:
class LinThompson(MAB):
    """
    Contextual Thompson sampled multi-armed bandit (LinThompson)

    Arguments
    =========
    narms : int
        number of arms

    ndims : int
        number of dimensions for each arm's context

    v : float
        positive real explore-exploit parameter
    """
    
    def __init__(self, narms, ndims, v):
      
        # basic input checks
        assert type(narms) is int, 'input type int required for narms'
        assert type(ndims) is int, 'input type int required for ndims'
        assert type(v) is float, 'input type float required for v'
        assert narms > 0, "number of arms should be greater than 0"
        assert ndims > 0, "ndims should be greater than 0"
        assert v > 0, "v should be greater than 0"
        
        # initilize MAB parameters
        self.narms = narms
        self.ndims = ndims
        self.v = v
        
        # initilize MAB state ## 
        self.B = np.identity(ndims)          # ndims x ndims
        self.mu_hat = np.expand_dims(me[-1],axis=1) # ndims x 1
        self.f =  np.zeros((ndims,1))    # ndims x 1
        
        
        
    def play(self, tround, context):
        # initilize play parameters
        bounds = [0] * self.narms
        
        # generate samples from multivariate normal distribution 
        mean = np.transpose(self.mu_hat)[0]
        covariance_matrix = ((self.v)**2)*np.linalg.inv(self.B)
        
        
        # setting seed to reproduce results reliably
        np.random.seed(50005) # [bug] seed sometimes unreliable for multivariate normal
        samples = np.random.multivariate_normal(mean, covariance_matrix)
        # (ndims x 1) samples
        sample_mu_tilde = np.expand_dims(samples,axis=1)
       
        # perform operation for all arms
        for arm in range(0,self.narms):
            
            # get context for the arm (ndims x 1)
            arm_context = np.expand_dims(np.transpose(context[(arm*4):(arm*4)+4]),axis=1)
            #print(arm_context)
            # calculate value for the arm
            bounds[arm] = np.dot(np.transpose(arm_context),sample_mu_tilde)
            
        # get value-maximising arm from generated bounds
        maximum_arm_value = np.max(bounds)
            
        # handle tie-breaking using uniformly-at-random selection
        # setting seed to reproduce results reliably
        np.random.seed(50005)
        arm_to_play = np.random.choice(np.where(bounds==maximum_arm_value)[0],1)[0]
            
        # returning an arm integer in {1,...,self.narms}
        
        return arm_to_play+1, mean, covariance_matrix
        
    def update(self, arm, reward, context):
        # update played arm
        
        # verifying arm input
        assert arm <= self.narms, "arm is larger than property narms"
        
        # Getting arm index
        arm_idx = arm-1
        
        # get context for the played arm
        arm_context = np.expand_dims(np.transpose(context[(arm_idx*4):(arm_idx*4)+4]),axis=1)
        
        # update state of the played arm
        self.B = self.B + np.dot(arm_context,np.transpose(arm_context))
        self.f = self.f + arm_context*reward
        self.mu_hat = np.dot(inv(self.B),self.f)
        #print(arm_context, reward)

In [109]:
mab = LinThompson(24, 4, 1.0)
results_LinThompson,me,cov = offlineEvaluate(mab, train_arms, train_rewards, train_contexts, 20000)
print('LinThompson average reward', np.mean(results_LinThompson))

LinThompson average reward 0.2454


In [119]:
np.expand_dims(me[-1],axis=1)

array([[0.],
       [0.],
       [0.],
       [0.]])

In [111]:
cov[-1]

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [11]:
mab = LinThompson(24, 4, 1.0)
results_LinThompson,me,cov = offlineEvaluate(mab, test_arms, test_rewards, test_contexts, 20000)
print('LinThompson average reward', np.mean(results_LinThompson))

NameError: name 'cov' is not defined