# Market Bandit

Load a dataset derived from [NYU Stern](http://pages.stern.nyu.edu/~adamodar/New_Home_Page/datafile/histretSP.html) that shows returns for nearly a century of market data, including dividends and adjusted for inflation.

In [107]:
import pandas as pd

def load_market_data (file_name):
    with open(file_name, "r") as f:
        return pd.read_table(f)

In [108]:
df = load_market_data("market.tsv")
df.head()

Unnamed: 0,year,inflation,sp500,t.bill,t.bond,corp
0,1928,-1.15,45.49,4.28,2.01,4.42
1,1929,0.0,-8.3,3.16,4.2,3.02
2,1930,-2.67,-23.07,7.42,7.41,3.3
3,1931,-8.93,-38.33,12.34,7.0,-7.41
4,1932,-10.3,1.85,12.68,21.28,37.78


In [109]:
df.describe()

Unnamed: 0,year,inflation,sp500,t.bill,t.bond,corp
count,92.0,92.0,92.0,92.0,92.0,92.0
mean,1973.5,3.041957,8.413261,0.434239,2.166413,4.21663
std,26.70206,3.803579,19.619605,3.573035,8.126432,8.625809
min,1928.0,-10.3,-38.9,-12.05,-14.57,-14.85
25%,1950.75,1.415,-2.74,-1.185,-2.62,-1.3225
50%,1973.5,2.75,10.515,0.59,1.07,3.91
75%,1996.25,4.275,20.6225,2.1175,7.0375,9.2875
max,2019.0,14.39,58.2,12.68,25.14,37.78


In [114]:
import gym
from gym.spaces import Discrete, Box
import numpy as np
import random

class MarketBandit (gym.Env):
    MAX_INFLATION = 100.
    TICKERS = ["sp500", "t.bill", "t.bond", "corp"]

    ## NB: using a hard-coded full path here -
    ## there does not appear to be any way to pass any kind of 
    ## configuration into the Tune trials of a bandit?
    FILE = "/Users/paco/src/rllib_tutorials/bandits/market.tsv"
    
    def __init__ (self, config=None):
        print("CONFIG", config)

        self.action_space = Discrete(4)
        self.observation_space = Box(
            low=-self.MAX_INFLATION,
            high=self.MAX_INFLATION,
            shape=(1, )
        )
        self.df = load_market_data(self.FILE)
        self.cur_context = None


    def reset (self):
        self.year = self.df["year"].min()
        #self.cur_context = self.df.loc[df["year"] == self.year]["inflation"]
        self.cur_context = 0
        self.done = False
        self.info = {}

        return [self.cur_context]


    def step (self, action):
        if self.done:
            reward = 0.
            regret = 0.
        else:
            row = self.df.loc[df["year"] == self.year]

            # calculate reward
            ticker = self.TICKERS[action]
            reward = float(row[ticker])

            # calculate regret
            max_reward = max(map(lambda t: float(row[t]), self.TICKERS))
            regret = round(max_reward - reward)

            # update the context
            ## NB: Tune/RLlib throw exceptions about the context 
            ## being out of range for an observation value (it's not)
            #self.cur_context = float(row["inflation"])
            self.cur_context = 0

            # increment the year
            self.year += 1

            if self.year >= self.df["year"].max():
                self.done = True

        context = [self.cur_context]

        self.info = {
            "regret": regret
        }
         
        return [context, reward, self.done, self.info]


    def seed (self, seed=None):
        """Sets the seed for this env's random number generator(s).
        Note:
            Some environments use multiple pseudorandom number generators.
            We want to capture all such seeds used in order to ensure that
            there aren't accidental correlations between multiple generators.
        Returns:
            list<bigint>: Returns the list of seeds used in this env's random
              number generators. The first value in the list should be the
              "main" seed, or the value which a reproducer should pass to
              'seed'. Often, the main seed equals the provided 'seed', but
              this won't be true if seed=None, for example.
        """
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

In [115]:
bandit = MarketBandit()
bandit.reset()

for i in range(10):
    action = bandit.action_space.sample()
    obs = bandit.step(action)
    print(action, obs)

CONFIG None
0 [[0], 45, False, {'regret': 0}]
0 [[0], -8, False, {'regret': 12}]
1 [[0], 7, False, {'regret': 0}]
3 [[0], -7, False, {'regret': 20}]
0 [[0], 2, False, {'regret': 36}]
1 [[0], 6, False, {'regret': 52}]
0 [[0], -5, False, {'regret': 19}]
0 [[0], 43, False, {'regret': 0}]
3 [[0], 10, False, {'regret': 20}]
3 [[0], -8, False, {'regret': 6}]


In [116]:
stop = {
    "training_iteration": 100,
    "timesteps_total": 100000,
    "episode_reward_mean": 20.0,
}

config = {
    "env": MarketBandit,
}

In [117]:
from ray import tune

analysis = tune.run("contrib/LinUCB", config=config, stop=stop)

Trial name,status,loc
contrib_LinUCB_MarketBandit_00000,RUNNING,


[2m[36m(pid=53962)[0m CONFIG {}
[2m[36m(pid=53962)[0m 2020-05-26 14:43:57,706	INFO trainer.py:421 -- Tip: set 'eager': true or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=53962)[0m 2020-05-26 14:43:57,710	INFO trainer.py:580 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=53962)[0m 2020-05-26 14:43:57,723	INFO trainable.py:217 -- Getting current IP.


2020-05-26 14:43:58,258	ERROR trial_runner.py:519 -- Trial contrib_LinUCB_MarketBandit_00000: Error processing event.
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 467, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/anaconda3/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 431, in fetch_result
    result = ray.get(trial_future[0], DEFAULT_GET_TIMEOUT)
  File "/opt/anaconda3/lib/python3.7/site-packages/ray/worker.py", line 1515, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): [36mray::LinUCB.train()[39m (pid=53962, ip=192.168.1.65)
  File "python/ray/_raylet.pyx", line 463, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 417, in ray._raylet.execute_task.function_executor
  File "/opt/anaconda3/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 498, in train
    raise e
  File "/opt/anaconda3

Trial name,status,loc
contrib_LinUCB_MarketBandit_00000,ERROR,

Trial name,# failures,error file
contrib_LinUCB_MarketBandit_00000,1,/Users/paco/ray_results/contrib/LinUCB/contrib_LinUCB_MarketBandit_0_2020-05-26_14-43-51ze06e_dp/error.txt


Trial name,status,loc
contrib_LinUCB_MarketBandit_00000,ERROR,

Trial name,# failures,error file
contrib_LinUCB_MarketBandit_00000,1,/Users/paco/ray_results/contrib/LinUCB/contrib_LinUCB_MarketBandit_0_2020-05-26_14-43-51ze06e_dp/error.txt


TuneError: ('Trials did not complete', [contrib_LinUCB_MarketBandit_00000])