# Santa 2020 - The Candy Cane Contest

Bayesian UCB agent inspired by https://lilianweng.github.io/lil-log/2018/01/23/the-multi-armed-bandit-problem-and-its-solutions.html  
Thompson Sampling agent inspired by https://www.kaggle.com/ilialar/simple-multi-armed-bandit  
Training data collection inspired by https://www.kaggle.com/lebroschar/1000-greedy-decision-tree-model  
Ray support from https://www.kaggle.com/nigelcarpenter/parallel-processing-agent-trials-using-ray  

Ideas:
- Add more data and see what happens
- Test agents trained on varying amounts of data
- Add training data from agents around my ranking
- Try ensemble model
- Try nn model

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
import ray
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import psutil

num_cpus = psutil.cpu_count(logical=False)
print(f"Initializing ray with {num_cpus} cpus")
ray.shutdown()
ray.init(num_cpus=num_cpus)

# Stats

In [None]:
import math
import scipy.stats

def get_wins(scores):
    # returns a tuple of win counts (p1 wins, p2 wins)
    scores = np.array(scores)
    return np.sum(scores[:,0] > scores[:,1]), np.sum(scores[:,1] > scores[:,0])

def get_los_from_scores(scores):
    p1_wins, p2_wins = get_wins(scores)
    return get_los(p1_wins, p2_wins)

def get_los(p1_wins, p2_wins):
    # calculate likelihood of superiority for player 1 based on win counts
    # the LOS for player 2 is the complement
    if p1_wins == 0:
        return 0
    if p2_wins == 0:
        return 1

    return scipy.stats.beta(p1_wins, p2_wins).sf(0.5)

def print_inline_stats(scores):
    p1_wins, p2_wins = get_wins(scores)
    p1_los = get_los(p1_wins, p2_wins)
    
    print(f"Results after {len(scores)} games: {p1_wins}-{p2_wins} LOS: {p1_los:.3f}", end='\r')

def print_stats(scores):
    scores = np.array(scores)
    
    p1_mean, p2_mean = np.average(scores, axis=0)
    p1_wins, p2_wins = get_wins(scores)
    
    p1_los = get_los(p1_wins, p2_wins)
    p2_los = 1 - p1_los

    print(f"Wins: {p1_wins:5} {p2_wins:5}")
    print(f"Mean: {p1_mean:.1f} {p2_mean:.1f}")
    print(f"LOS:  {p1_los:0.3f} {p2_los:.3f}")

print_stats([[100,110],[120,125],[150,125]])
print_inline_stats([[100,110],[120,125],[150,125]])
print_inline_stats([[100,110],[120,125],[150,125]])

# Random Agent

In [None]:
# %%writefile random_agent.py

import random

class RandomAgent:
    
    def step(self, observation, configuration):
        return random.randrange(configuration.banditCount)
    
    def description(self):
        return "Random Agent"

# Thompson Sampling Agent

Inspired by https://www.kaggle.com/ilialar/simple-multi-armed-bandit

In [None]:
# %%writefile thompson.py

import numpy as np
import random

class ThompsonAgent():
    
    def __init__(self):
        self.bandit_states = None
        self.last_action = None
        self.total_reward = 0
    
    def name(self):
        return "Thompson Agent"
    
    def description(self):
        return self.name()
    
    def step(self, observation, configuration):
        if observation.step == 0:
            self.bandit_states = np.ones((configuration.banditCount, 2))
        else:
            player = observation.agentIndex
            opponent = 1 if player == 0 else 0
        
            reward = observation.reward - self.total_reward
            self.total_reward = observation.reward
        
            if reward:
                self.bandit_states[self.last_action][0] += 1
            else:
                self.bandit_states[self.last_action][1] += 1
        
            for bandit in observation.lastActions:
                self.bandit_states[bandit][0] *= 0.97

        probs = np.random.beta(self.bandit_states[:,0], self.bandit_states[:,1])
        best_bandit = int(np.argmax(probs))
    
        self.last_action = best_bandit
    
        return best_bandit
            
agent = ThompsonAgent()

def thompson_agent(observation, configuration):
    return agent.step(observation, configuration)


# Bayesian UCB Agent

https://lilianweng.github.io/lil-log/2018/01/23/the-multi-armed-bandit-problem-and-its-solutions.html

In [None]:
# %%writefile bayesian_ucb_with_02_opp.py

import numpy as np
import scipy.stats

# todo replace self.lastAction with lastActions[agentIndex]
class UcbAgent:
    
    def __init__(self, c=3, decay=1.0, opp_bonus=0, random=False):
        self.wins = None
        self.losses = None
        self.num_bandits = None
        self.last_action = 0
        self.opp_actions = []
        self.total_reward = 0

        # parameters
        self.c = c # number of standard deviations in confidence interval
        self.decay = decay
        self.opp_bonus = opp_bonus
        self.random = random
        
    def name(self):
        return "UCB Agent"
    
    def description(self):
        return f"UCB Agent c={self.c} decay={self.decay:.2f} opp_bonus={self.opp_bonus} random={self.random}"
    
    def step(self, observation, configuration) -> int:
        if observation.step == 0:
            self.num_bandits = configuration.banditCount
            self.wins = np.ones(self.num_bandits)
            if self.random:
                self.wins += np.random.rand(self.num_bandits)/100
            self.losses = np.ones(self.num_bandits)
        else:
            player = observation.agentIndex
            opponent = 1 if player == 0 else 0
        
            reward = observation.reward - self.total_reward
            self.total_reward = observation.reward
        
            # adjust win or loss counts
            if reward:
                self.wins[self.last_action] += 1
            else:
                self.losses[self.last_action] += 1
            
            self.opp_bonus_adjustment(observation.lastActions[opponent])
        
            # update total pull counts and decay
            if observation.step < 10:
                for bandit in observation.lastActions:
                    self.wins[bandit] *= self.decay
        
        ucbs = (self.wins)/(self.wins+self.losses) + self.c * scipy.stats.beta.std(self.wins, self.losses)
        self.last_action = int(np.argmax(ucbs))
        return self.last_action
    
    def opp_bonus_adjustment(self, opp_action):
        # give a bonus to a bandit if our opponent tried it twice in a row

        if self.opp_actions and self.opp_actions[-1] == opp_action:
            # could add decay here but it probably won't matter because opp_bonus is arbitrary anyway
            self.wins[opp_action] += self.opp_bonus
        
        self.opp_actions.append(opp_action)
                
agent = UcbAgent(
    opp_bonus=0.2,
    c=3
)

def ucb_agent(observation, configuration):
    return agent.step(observation, configuration)
        

# Pull Vegas Agent

I'm not going to submit this, but since it's from a popular notebook I need to train against it  
https://www.kaggle.com/a763337092/pull-vegas-slot-machines-add-weaken-rate-continue5

In [None]:
import numpy as np
import pandas as pd
import random, os, datetime, math
from collections import defaultdict

class PullVegasAgent:
    
    def __init__(self, num_bandits):
        self.total_reward = 0
        self.actions = []
        self.opp_actions = [] 
        
        self.wins = np.ones(num_bandits)
        self.losses = np.zeros(num_bandits)
        self.opp = np.zeros(num_bandits)
        self.my_continue = np.zeros(num_bandits)
        self.opp_continue = np.zeros(num_bandits)
        
    def description(self):
        return "Pull Vegas"
    
    def step(self, observation, configuration):
        return self.multi_armed_probabilities(observation, configuration)
        
    def get_next_bandit(self):
        total_pulls = self.wins + self.losses + self.opp
        probs = (self.wins - self.losses + self.opp - (self.opp>0)*1.5 + self.opp_continue) / (total_pulls) \
                    * np.power(0.97, total_pulls)
        best_bandit = np.argmax(probs)
        return best_bandit

    def multi_armed_probabilities(self, observation, configuration):
        
        if observation.step == 0:
            return random.randrange(configuration.banditCount)
        
        last_reward = observation.reward - self.total_reward
        self.total_reward = observation.reward

        my_idx = observation.agentIndex
        my_last_action = observation.lastActions[my_idx]
        opp_last_action = observation.lastActions[1-my_idx]

        self.actions.append(my_last_action)
        self.opp_actions.append(opp_last_action)

        if last_reward:
            self.wins[my_last_action] += 1
        else:
            self.losses[my_last_action] += 1
                
        self.opp[opp_last_action] += 1

        if observation.step >= 3:
            if self.actions[-1] == self.actions[-2]:
                self.my_continue[my_last_action] += 1
            else:
                self.my_continue[my_last_action] = 0
            if self.opp_actions[-1] == self.opp_actions[-2]:
                self.opp_continue[opp_last_action] += 1
            else:
                self.opp_continue[opp_last_action] = 0

        if last_reward:
            return my_last_action

        if observation.step >= 4:
            if (self.actions[-1] == self.actions[-2]) and (self.actions[-1] == self.actions[-3]):
                if random.random() < 0.5:
                    return self.actions[-1]

        return self.get_next_bandit()


# Sklearn Regression Agent

Uses sklearn regression model to predict the probability for each machine

In [None]:
# %%writefile sklearn_with_streak.py

import joblib
import numpy as np
import pandas as pd


class SklearnRegressionAgent():
    
    def __init__(self, num_bandits, filename, margin=0.99):
        self.machine_states = pd.DataFrame(
            index=range(num_bandits), 
            columns=['step', 'n_pulls', 'n_success', 'n_opp_pulls', 'streak', 'opp_streak']
        ).fillna(0)
        self.total_reward = 0
        self.filename = filename
        self.model = joblib.load(filename)
        self.margin = margin
        
    def name(self):
        return "Sklearn Regression Agent"
    
    def description(self):
        return f"Sklearn - {self.filename}, margin:{self.margin:.2f}"
    
    def step(self, observation, configuration):
        if observation.step == 0:
            return np.random.randint(configuration.banditCount)
        
        reward = observation.reward - self.total_reward
        self.total_reward = observation.reward
        last_action = observation.lastActions[observation.agentIndex]
        opp_action = observation.lastActions[1-observation.agentIndex]
        
        self.machine_states['step'] = observation.step
        self.machine_states.at[last_action, 'n_pulls'] += 1
        self.machine_states.at[last_action, 'n_success'] += reward
        self.machine_states.at[opp_action, 'n_opp_pulls'] += 1
        
        self.machine_states.at[last_action, 'streak'] += 1
        self.machine_states.loc[self.machine_states.index != last_action, 'streak'] = 0
        self.machine_states.at[opp_action, 'opp_streak'] += 1
        self.machine_states.loc[self.machine_states.index != opp_action, 'opp_streak'] = 0
        
        probs = self.model.predict(self.machine_states)
        
        max_return = np.max(probs)
        result = np.random.choice(np.where(probs >= self.margin * max_return)[0])
        return int(result)

agent = None

def regression_agent(observation, configuration):
    global agent
    if observation.step == 0:
        print("Creating decision tree agent")
        agent = SklearnRegressionAgent(configuration.banditCount, "/kaggle_simulations/agent/model.joblib")
    
    return agent.step(observation, configuration)
        

# Keras Regression Agent

In [None]:
# %%writefile keras_agent.py
from tensorflow import keras
import numpy as np
import pandas as pd


class KerasRegressionAgent():
    
    def __init__(self, num_bandits, filename, margin=0.99):
        self.total_reward = 0
        self.filename = filename
        self.margin = margin

        self.model = keras.models.load_model(self.filename)
        self.machine_states = pd.DataFrame(
            index=range(num_bandits), 
            columns=['step', 'n_pulls', 'n_success', 'n_opp_pulls', 'streak', 'opp_streak']
        ).fillna(0)
        
    def name(self):
        return "Keras Regression Agent"
    
    def description(self):
        return f"Keras - {self.filename}, margin:{self.margin:.2f}"
    
    def step(self, observation, configuration):
        if observation.step == 0:
            return np.random.randint(configuration.banditCount)
        
        reward = observation.reward - self.total_reward
        self.total_reward = observation.reward
        last_action = observation.lastActions[observation.agentIndex]
        opp_action = observation.lastActions[1-observation.agentIndex]
        
        self.machine_states['step'] = observation.step
        self.machine_states.at[last_action, 'n_pulls'] += 1
        self.machine_states.at[last_action, 'n_success'] += reward
        self.machine_states.at[opp_action, 'n_opp_pulls'] += 1
        
        self.machine_states.at[last_action, 'streak'] += 1
        self.machine_states.loc[self.machine_states.index != last_action, 'streak'] = 0
        self.machine_states.at[opp_action, 'opp_streak'] += 1
        self.machine_states.loc[self.machine_states.index != opp_action, 'opp_streak'] = 0
        
        probs = self.model(self.machine_states.to_numpy())
        
        max_return = np.max(probs)
        result = np.random.choice(np.where(probs >= self.margin * max_return)[0])
        return int(result)

agent = None

def regression_agent(observation, configuration):
    global agent
    if observation.step == 0:
        print("Creating keras agent")
        agent = KerasRegressionAgent(configuration.banditCount, "models/streak_model.h5")
    
    return agent.step(observation, configuration)

# Ensemble Regression Agent

In [None]:
# %%writefile ensemble.py

import joblib
from tensorflow import keras
import numpy as np
import pandas as pd


class EnsembleRegressionAgent():
    
    def __init__(self, num_bandits, keras_file, scikit_file, margin=0.99, alpha=0.5):
        self.total_reward = 0
        self.filename = f"{keras_file} {scikit_file}"
        self.margin = margin
        self.alpha = alpha

        self.keras_model = keras.models.load_model(keras_file)
        self.scikit_model = joblib.load(scikit_file)
        self.machine_states = pd.DataFrame(
            index=range(num_bandits), 
            columns=['step', 'n_pulls', 'n_success', 'n_opp_pulls', 'streak', 'opp_streak']
        ).fillna(0)
        
    def name(self):
        return "Ensemble Regression Agent"
    
    def description(self):
        return f"Ensemble - {self.filename}, margin:{self.margin:.2f} alpha:{self.alpha}"
    
    def step(self, observation, configuration):
        if observation.step == 0:
            return np.random.randint(configuration.banditCount)
         
        reward = observation.reward - self.total_reward
        self.total_reward = observation.reward
        last_action = observation.lastActions[observation.agentIndex]
        opp_action = observation.lastActions[1-observation.agentIndex]
        
        self.machine_states['step'] = observation.step
        self.machine_states.at[last_action, 'n_pulls'] += 1
        self.machine_states.at[last_action, 'n_success'] += reward
        self.machine_states.at[opp_action, 'n_opp_pulls'] += 1
        
        self.machine_states.at[last_action, 'streak'] += 1
        self.machine_states.loc[self.machine_states.index != last_action, 'streak'] = 0
        self.machine_states.at[opp_action, 'opp_streak'] += 1
        self.machine_states.loc[self.machine_states.index != opp_action, 'opp_streak'] = 0
        
        probs = self.get_probs()
        
        max_return = np.max(probs)
        result = np.random.choice(np.where(probs >= self.margin * max_return)[0])
        return int(result)
    
    def get_probs(self):
        keras_probs = self.keras_model(self.machine_states.to_numpy())
        scikit_probs = self.scikit_model.predict(self.machine_states)
        scikit_probs = np.reshape(scikit_probs, (-1, 1))
        return self.alpha * keras_probs + (1 - self.alpha) * scikit_probs

agent = None

def regression_agent(observation, configuration):
    global agent
    if observation.step == 0:
        print("Creating ensemble agent")
        agent = EnsembleRegressionAgent(configuration.banditCount, 
                                        keras_file="/kaggle_simulations/agent/model.h5",
                                        scikit_file="/kaggle_simulations/agent/model.joblib")
    
    return agent.step(observation, configuration)

In [None]:
smoke_test(KerasRegressionAgent(100, filename='models/streak_model.h5'))
smoke_test(SklearnRegressionAgent(100, filename='models/dtr_streak_model.joblib'))
smoke_test(EnsembleRegressionAgent(100, keras_file='models/streak_model.h5', scikit_file='models/dtr_streak_model.joblib'))
smoke_test(RandomAgent())

# Submit Model

In [None]:
!cp models/streak_model.h5 model.h5
!cp models/dtr_streak_model.joblib model.joblib
!cp ensemble.py main.py
!tar cvfz ensemble.tar.gz main.py model.h5 model.joblib
!rm model.h5
!rm model.joblib

# Simulator Code

In [None]:
from collections import namedtuple

Configuration = namedtuple('Configuration', ['banditCount'])
Observation = namedtuple('Observation', ['step', 'reward', 'agentIndex', 'lastActions'])

def smoke_test(agent):
    config = Configuration(banditCount=100)
    obs = Observation(step=0, reward=0, agentIndex=0, lastActions=[])
    action = agent.step(obs, config)
    
    obs = Observation(step=1, reward=0, agentIndex=0, lastActions=[action, 2])
    action = agent.step(obs, config)
    
    obs = Observation(step=2, reward=1, agentIndex=0, lastActions=[action, 5])
    action = agent.step(obs, config)

@ray.remote
def simulate_mab(agent_lambdas, num_steps=2000, num_bandits=100, game_id=0):
    
    config = Configuration(banditCount=num_bandits)
    probs = np.random.rand(num_bandits)
    lastActions = [0, 0]
    totals = [0, 0]
    agents = [l(num_bandits) for l in agent_lambdas]
    
    d = {'step':[], 'p1_total':[], 'p2_total':[]}

    for i in range(num_steps):
        for j, agent in enumerate(agents):
            obs = Observation(step=i, reward=totals[j], agentIndex=j, lastActions=lastActions)
            choice = agent.step(obs, config)
            totals[j] += np.random.rand() < probs[choice]
            lastActions[j] = choice
            
        d['step'].append(i)
        d['p1_total'].append(totals[0])
        d['p2_total'].append(totals[1])
        
        for action in lastActions:
            probs[action] *= 0.97

    df = pd.DataFrame(data=d)
    df['diff'] = df.p1_total - df.p2_total
    df['game_id'] = game_id
    
    return totals, df

def compare_agents(agent_lambdas, num_games=50, num_bandits=100, num_steps=2000, min_games=20):

    num_cpus = 4
    
    scores = []
    df = pd.DataFrame()
    
    for i in range(0, num_games, num_cpus):
        result_ids = [simulate_mab.remote(
            agent_lambdas, 
            num_steps=num_steps, 
            num_bandits=num_bandits, 
            game_id=i) for i in range(i, i+num_cpus)]

        batch_results = ray.get(result_ids)

        for score, game_df in batch_results:
            scores.append(score)
            df = df.append(game_df)

        print_inline_stats(scores)
        p1_los = get_los_from_scores(scores)
        if len(scores) >= min_games and (p1_los < 0.03 or p1_los > 0.97):
            break
            
    return scores, df

def round_robin(agent_lambdas, num_games=50, num_bandits=100, num_steps=2000, min_games=20):
    num_agents = len(agent_lambdas)
    agent_names = [l(num_bandits).description() for l in agent_lambdas]
    records = np.zeros((num_agents, 3), dtype='int32')
    los_matrix = np.full((num_agents, num_agents), 0.5)
    
    print(f"Starting round robin with {num_agents} agents:")
    for name in agent_names:
        print("\t"+name)
    print("")
    
    for i in range(num_agents-1):
        for j in range(i+1, num_agents):
            print(f"Starting new round:\n\t{agent_names[i]}\n\t{agent_names[j]}")
            scores, _ = compare_agents([agent_lambdas[i], agent_lambdas[j]], 
                                            num_games=num_games,
                                            num_bandits=num_bandits, 
                                            num_steps=num_steps,
                                            min_games=min_games)
            p1_wins, p2_wins = get_wins(scores)
            ties = len(scores) - p1_wins - p2_wins
            records[i] += [p1_wins, p2_wins, ties]
            records[j] += [p2_wins, p1_wins, ties]
            
            p1_los = get_los(p1_wins, p2_wins)
            los_matrix[i,j] = p1_los
            los_matrix[j,i] = 1 - p1_los
            
            print('\n')
    
    for i in range(num_agents):
        print(f"{agent_names[i]}: {'-'.join(map(str, records[i]))}")
        
    plot_los_heatmap(agent_names, los_matrix)
            
def graph_game_results(df):
    ax = df.groupby('step').mean()['diff'].rolling(window=10).mean().plot(title="Point difference averages over all games")
    ax.set_xlabel("step")
    ax.set_ylabel("P1 - P2")

#     ax = df[df.game_id == 0][['p1_total', 'p2_total']].plot(title="Reward curves for game 0")
#     ax.set_xlabel("step")
#     ax.set_ylabel("reward")

def plot_los_heatmap(agent_names, los_matrix):
    num_agents = len(agent_names)
    
    order = np.argsort(-np.sum(los_matrix, axis=1))
    sorted_names = [agent_names[x] for x in order]
    sorted_los = los_matrix[order][:,order]
    
    fix, ax =  plt.subplots()
    ax.imshow(sorted_los, cmap='gray', vmin=0, vmax=1.5)

    # We want to show all ticks...
    ax.set_xticks(np.arange(num_agents))
    ax.set_yticks(np.arange(num_agents))

    # ... and label them with the respective list entries
    ax.set_xticklabels(sorted_names)
    ax.set_yticklabels(sorted_names)

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    for i in range(num_agents):
        for j in range(num_agents):
            if i == j:
                continue
            text = ax.text(j, i, "{:.2f}".format(sorted_los[i, j]),
                           ha="center", va="center", color="w")
    plt.show()

# Comparing Agents

In [None]:
# %%time

# agents = [
#     lambda n: EnsembleRegressionAgent(n, keras_file='models/streak_model.h5', scikit_file='models/dtr_streak_model.joblib'),
#     lambda n: KerasRegressionAgent(n, filename='models/streak_model.h5'),
#     lambda n: SklearnRegressionAgent(n, filename='models/dtr_streak_model.joblib'),
#     lambda n: UcbAgent(),
#     lambda n: ThompsonAgent(),
#     lambda n: RandomAgent(),
# ]

agents = [
    lambda n: EnsembleRegressionAgent(n, alpha=0.5, keras_file='models/streak_model.h5', scikit_file='models/dtr_streak_model.joblib'),
#     lambda n: EnsembleRegressionAgent(n, alpha=0.25, keras_file='models/streak_model.h5', scikit_file='models/dtr_streak_model.joblib'),
#     lambda n: EnsembleRegressionAgent(n, alpha=0.75, keras_file='models/streak_model.h5', scikit_file='models/dtr_streak_model.joblib'),
    lambda n: KerasRegressionAgent(n, filename='models/streak_model.h5'),
    lambda n: SklearnRegressionAgent(n, filename='models/dtr_streak_model.joblib'),
    lambda n: PullVegasAgent(100)
]

round_robin(agents, num_games=50)



In [None]:
def compare(agents):
    print(f"P1: {agents[0](100).description()}")
    print(f"P2: {agents[1](100).description()}")
    _, df = compare_agents(agents, num_games=100, min_games=100)
    graph_game_results(df)

In [None]:
compare([agents[0], agents[1]])

In [None]:
compare([agents[0], agents[2]])

In [None]:
compare([agents[0], agents[3]])

In [None]:
compare([agents[2], agents[3]])

# Load Training Data

In [None]:
train_data = pd.read_parquet('training_data.parquet')
print(f"\nLoaded {train_data.shape[0]} training rows")

X = train_data[['step', 'n_pulls', 'n_success', 'n_opp_pulls']]
y = train_data['threshold']
train_data.head()

# Sklearn Models

Take training data from top-tier games and find a model that predicts actual payout rates

In [None]:
import joblib
from sklearn.model_selection import cross_val_score

def cross_val_rmse(regressor, X, y):
    cv = -cross_val_score(regressor, X, y, cv=5, scoring='neg_root_mean_squared_error')
    print(cv)
    print(cv.mean())

def feature_importance(regressor, X, y):
    regressor.fit(X, y)
    for name, score in zip(X.columns, regressor.feature_importances_):
        print(name, score)
    

In [None]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
cross_val_rmse(lr, X, y)

lr.fit(X, y)
joblib.dump(dtr, 'lr_model.joblib')

In [None]:
from sklearn.tree import DecisionTreeRegressor

dtr = DecisionTreeRegressor(min_samples_leaf=100)
cross_val_rmse(dtr, X, y)
feature_importance(dtr, X, y)
dtr.fit(X, y)
joblib.dump(dtr, 'models/dtr_streak_model.joblib')

In [None]:
from sklearn.ensemble import RandomForestRegressor

rfr = RandomForestRegressor(n_estimators=100, min_samples_leaf=20, max_depth=10)
cross_val_rmse(rfr, X, y)

In [None]:
from sklearn.svm import SVR

svr = SVR(kernel='poly', degree=2, C=10, epsilon=0.5)
cross_val_rmse(svr, X, y)

# Keras Models

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

def create_model(n_hidden_layers=1, n_units=10, activation='sigmoid', input_size=4, learning_rate=0.01):
    input_layer = Input(shape=(input_size,))

    for i in range(n_hidden_layers):
        if i == 0:
            m = Dense(n_units, activation='relu')(input_layer)
        else:
            m = Dense(n_units, activation='relu')(m)
            
    m = Dense(1, activation=activation)(m)
    
    model = Model(inputs=[input_layer], outputs=m)
    opt = Adam(learning_rate=learning_rate)
    model.compile(optimizer=opt, loss='mean_squared_error')
    return model

# model = create_model()
# model.fit(X, y, batch_size=10000, epochs=10, validation_split=0.05)

In [None]:
params = {
    'n_hidden_layers': [2, 3, 4],
    'n_units': [3, 5, 8],
    'activation': ['sigmoid'],
    'batch_size': [20000]
}

keras_reg = KerasRegressor(create_model)
search_cv = GridSearchCV(keras_reg, params, cv=3)
search_cv.fit(X, y, epochs=50, batch_size=20000, callbacks=[EarlyStopping(patience=5)], validation_split=0.05)
search_cv.best_params_ # {'activation': 'sigmoid', 'n_hidden_layers': 3, 'n_units': 5}

In [None]:
model = create_model(n_hidden_layers=3, n_units=8, activation='sigmoid', input_size=6, learning_rate=0.01)

# train_data['log_step'] = np.log(train_data.step + 1)
X = train_data[['step', 'n_pulls', 'n_success', 'n_opp_pulls']]
# train_data
# y = train_data['threshold']

model.fit(X, y, batch_size=10000, epochs=100, validation_split=0.05, callbacks=[EarlyStopping(patience=5)])


In [None]:
from sklearn.metrics import mean_squared_error

y_pred_nn = model.predict(X, batch_size=20000)
y_pred_dtr = dtr.predict(X)
y_pred_dtr = np.reshape(y_pred_dtr, (-1, 1))
# model.save('models/model.h5')
print(np.sqrt(mean_squared_error(y_pred_nn, y)))
print(np.sqrt(mean_squared_error(y_pred_dtr, y)))
print(np.sqrt(mean_squared_error((y_pred_nn+y_pred_dtr)/2, y)))


# Testing in Kaggle Environment
Not ideal for performance testing, but double-checks that they'll work online

In [None]:
from kaggle_environments import make
env = make("mab", debug=True)

env.reset()
env.run([ "decision_tree.py", "keras_agent.py"])
print(env)
env.render(mode="ipython", width=800, height=700)


In [None]:
%%time

@ray.remote
def run_trial():
    env = make("mab")
    env.reset()
    env.run(["bayesian_ucb_with_02_opp_and_rand.py", "decision_tree.py"])
    return env.state

result_ids = [run_trial.remote() for i in range(10)]

results = ray.get(result_ids)

In [None]:
%%time

from kaggle_environments import evaluate

results = np.array(evaluate("mab", ["keras_agent.py", "decision_tree.py"], num_episodes=1))
print_stats(results)

In [None]:
klass = UcbAgent
args = {'opp_bonus':0.2, 'decay':0.99}
klass(**args).description()