In [2]:
import math
import random
import numpy as np
import pandas as pd
from scipy.stats import beta
import matplotlib.pyplot as plt

In [3]:
random.seed(3)

In [4]:
class Bandit(object):
    def __init__(self, num_options=2, prior=(1.0, 1.0)):
        self.trials = np.zeros(shape=(num_options,), dtype=int)
        self.successes = np.zeros(shape=(num_options,), dtype=int)
        self.num_options = num_options
        self.prior = prior

    def update(self, choice, conv):
        self.trials[choice] = self.trials[choice] + 1
        if (conv):
            self.successes[choice] = self.successes[choice] + 1

    def evaluate(self):
        sampled_theta = []
        for i in range(self.num_options):
            dist = beta(self.prior[0] + self.successes[i],
                        self.prior[1] + self.trials[i] - self.successes[i])
            sampled_theta += [dist.rvs()]
        return sampled_theta.index(max(sampled_theta))

In [5]:
theta = (0.3, 0.4)

def click(c):
    if random.random() < theta[c]:
        return True
    else:
        return False

In [6]:
N = 10000
incr_trials = np.zeros(shape=(N, 2))
successes = np.zeros(shape=(N, 2))

b = Bandit()
for i in range(N):
    choice = b.evaluate()
    conv = click(choice)
    b.update(choice, conv)
    incr_trials[i] = b.trials
    successes[i] = b.successes

In [12]:
n = np.arange(N) + 1
f, ax = plt.subplots(figsize=(15, 7))
ax.set(xscale='log', yscale='log')
ax.set_title('Pulls Per Arm with Respect to Trial Number')
ax.set_xlabel('Trials')
ax.set_ylabel('Pulls')
plt.plot(n, incr_trials[:, 0], label='Theta 0')
plt.plot(n , incr_trials[:, 1], label='Theta 1')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()

In [15]:
f, ax = plt.subplots(figsize=(15, 7))
ax.set(xscale='log', yscale='linear')
ax.set_title('Click Through Rate With Respect to Trial Number')
ax.set_xlabel('Trial')
ax.set_ylabel('CTR')
ax.set_xlim(10, N)
ax.set_ylim(.15, .55)
plt.plot(n, (successes[:, 0] + successes[:, 1]) / n, label='Overal CTR')
plt.plot(n, np.zeros(shape=(N,)) + 0.3, label='Theta 0 CTR')
plt.plot(n, np.zeros(shape=(N,)) + 0.4, label='Theta 1 CTR')
plt.legend(['Overal CTR', 'Theta 0 CTR', 'Theta 1 CTR'])
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()

In [24]:
symbol_list = ['JPM', 'TGT', 'WFC', 'AAPL', 'WMT', 'COST', 'GS', 'AMZN', 'MSFT']
sym_dict = {}
sym_key_val = []
for i, sym in enumerate(symbol_list):
    sym_dict[sym] = i
    sym_key_val.append((sym, i))
start = '2003-09-22'
end = '2013-09-17'
df = get_pricing(symbol_list, fields=['open_price', 'close_price'], start_date=start, end_date=end)

NameError: name 'get_pricing' is not defined

In [17]:
def run(df):
    random.shuffle(sym_key_val)
    trading_days = len(df['open_price'].ix[:,0])
    num_stocks = len(df['open_price'].columns)

    reward = lambda choice, t: payoff(df, t, sym_key_val[choice][0])
    action_reward = lambda j: sum([reward(j,t) for t in range(trading_days)])

    best_action = max(range(num_stocks), key=action_reward)
    best_action_reward_cum = action_reward(best_action)

    cum_reward = 0
    t = 0
    ucb1gen = ucb1(num_stocks, reward)
    for (action, reward, ucbs) in ucb1gen:
        cum_reward += reward
        t += 1
        if t == trading_days:
            break
            
    return cum_reward, best_action_reward_cum, ucbs, sym_key_val[best_action][0]

In [18]:
def upper_bound(step, num_tests):
    return math.sqrt(2.0 * math.log(step + 1) / num_tests)

def ucb1(num_stocks, reward):
    payoff_sums = [0.0] * num_stocks
    num_tests = [1] * num_stocks
    ucbs = [0.0] * num_stocks

    for t in range(num_stocks):
        payoff_sums[t] = reward(t,t)
        yield t, payoff_sums[t], ucbs

    t = num_stocks

    while True:
        ucbs = [payoff_sums[i] / num_tests[i] + upper_bound(t, num_tests[i]) for i in range(num_stocks)]
        action = max(range(num_stocks), key=lambda i: ucbs[i])
        reward_val = reward(action, t)

        for a in range(num_stocks):
            num_tests[a] += 1
            payoff_sums[a] += reward(a, t)

        yield action, reward_val, ucbs
        t = t + 1

In [19]:
def payoff(df, t, stock, cash_am=1.0):
    open_p, close_p = df['open_price'].ix[:,sym_dict[stock]][t], \
                            df['close_price'].ix[:,sym_dict[stock]][t]
    # allows for purchasing fraction of shares     
    shares_purchased = cash_am / open_p
    cash_from_sale = shares_purchased * close_p

    return cash_from_sale - cash_am

In [20]:
ucb_list = lambda L: ', '.join(['%.3f' % x for x in L])

def mean(lst):
    sm = 0
    count = 0
    for x in lst:
        sm += x
        count += 1
    return 0 if count == 0 else float(sm) / count

def stats(lst):
    vals = [x for x in lst]
    avg = mean(vals)
    devs = [(x-avg)*(x-avg) for x in vals]
    return (avg, mean(devs))

In [21]:
reward, best_action_reward, ucbs, best_stock = run(df)

NameError: name 'df' is not defined

In [22]:
print "Payoff: {}".format(reward)
print "Regret: {}".format(best_action_reward - reward)
print "Best stock: {} (action reward: {})".format(best_stock, best_action_reward)
print "ucbs: {}".format(ucb_list(ucbs))


NameError: name 'reward' is not defined

In [23]:
days = len(df['open_price'].ix[:,0])

reward = lambda choice, t: payoff(df, t, choice)
action_rewards = lambda s: np.array([reward(s,t) for t in range(days)])
xs = np.array(list(range(days)))

f, ax = plt.subplots(figsize=(15, 7))

ax.set_title('Rewards Over Time')
ax.set_xlabel('Day')
ax.set_ylabel('Reward')
ax.set_xlim(0, days-1)
for sym in symbol_list:
    plt.plot(xs, np.cumsum(action_rewards(sym)), label=sym)

plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

NameError: name 'df' is not defined