# PARAMETER
Change model path and the number of model to import in range()

In [1]:
import os
import lightgbm as lgb

NUMBER_OF_MODELS = 1
model_path = '../input/lgballdata/lgb-all-data'

model = lgb.Booster(model_file = os.path.join(model_path, f'model.txt'))
model.save_model(f'model.txt')

<lightgbm.basic.Booster at 0x7f5aae272550>

In [2]:
%%writefile main.py
"""Greedy agent that chooses machine based on maximum expected payout

Uses a trained decision tree model to consider the other player's movements
in the expected payout.

See my other kernel for methodology for generating training data:
https://www.kaggle.com/lebroschar/generate-training-data

"""
import random

import numpy as np
import pandas as pd
import lightgbm as lgb
import os
import sys
import time
import math

TRAIN_FEATS = [
    'round_num', 'n_pulls_self', 'n_success_self', 'n_failure_self',
    'discounted_cumulative_success', 'discounted_total_success', 'n_pulls_opp', 'n_pulls_tot',
    'ratio_self', 'ratio_opp', 'est_1', 'est_2', 'est_3', 'repeat_opp',
    'n_pulls_self_last_10', 'n_pulls_opp_last_10', 'ratio_self_last_10_selected'
]

sys.path.append("/kaggle_simulations/agent")
working_dir = "/kaggle_simulations/agent"


class GreedyStrategy:
    """Implements strategy to maximize expected value

    - Tracks estimated likelihood of payout ratio for each machine
    - Tracks number of pulls on each machine
    - Chooses machine based on maximum expected value
    
    
    """
    def __init__(self, name, agent_num, n_machines):
        """Initialize and train decision tree model

        Args:
           name (str):   Name for the agent
           agent_num (int):   Assigned player number
           n_machines (int):   number of machines in the game
        
        """
        # Record inputs
        self.name = name
        self.agent_num = agent_num
        self.n_machines = n_machines
        
        # Initialize distributions for all machines
        self.n_pulls_self = np.array([0. for _ in range(n_machines)])
        self.n_success_self = np.array([0. for _ in range(n_machines)])
        self.n_failure_self = np.array([0. for _ in range(n_machines)])
        self.discounted_cumulative_success = np.array([0. for _ in range(n_machines)])
        self.discounted_total_success = np.array([0. for _ in range(n_machines)])
        self.n_pulls_opp = np.array([0. for _ in range(n_machines)])
        self.n_pulls_tot = np.array([0. for _ in range(n_machines)])
        self.ratio_self = np.array([0. for _ in range(n_machines)])
        self.ratio_opp = np.array([0. for _ in range(n_machines)])
        self.est_1 = np.array([0. for _ in range(n_machines)])
        self.est_2 = np.array([0. for _ in range(n_machines)])
        self.est_3 = np.array([0. for _ in range(n_machines)])
        self.repeat_opp = np.array([0. for _ in range(n_machines)])
        self.n_pulls_self_last_10 = np.array([0. for _ in range(n_machines)])
        self.n_pulls_opp_last_10 = np.array([0. for _ in range(n_machines)])
        self.ratio_self_last_10_selected = np.array([0. for _ in range(n_machines)])
        
        # Track winnings
        self.last_reward_count = 0

        self.model_lgb = lgb.Booster(model_file = os.path.join(working_dir, f'model.txt'))

        self.round = 0
        self.n_fold = 1
        
        self.decay_rate = .97
        
        # Predict expected reward
        features = np.zeros((self.n_machines, len(TRAIN_FEATS)))
        
        # Predict expected reward
        self.predicts = self.mod_pred(features)
        self.last_reward = 0
        
        self.last_action = [-1, -1]

        #used for n_pulls_self_last_10, n_pulls_opp_last_10
        self.self_memory, self.opp_memory = [], []

        #used for ratio_self_last_10_selected
        self.self_memory_per_action = [[] for _ in range(n_machines)]

    def __call__(self):
        """Choose machine based on maximum expected payout

        Returns:
           <result> (int):  index of machine to pull
        
        """
        est_return = self.predicts
        max_return = np.max(est_return)
        result = np.random.choice(np.where(
            est_return >= max_return)[0])
        
        return int(result)
    
    def firstRound(self):
        result = np.random.choice(range(self.n_machines), 1)
        return int(result)
    
    def pred_lgb(self, obs):
        pred = self.model_lgb.predict(obs)
                
        return pred
    
    def mod_pred(self, obs):
        lgb_pred = self.pred_lgb(obs)
        
        return lgb_pred
    
    def updateDist(self, curr_total_reward, last_m_indices):
        """Updates estimated distribution of payouts"""
        # Compute last reward
        self.round += 1
        
        #compute last reward
        self.last_reward = curr_total_reward - self.last_reward_count
        self.last_reward_count = curr_total_reward

        # Update number of pulls for both machines
        m_index = last_m_indices[self.agent_num]        
        opp_index = last_m_indices[1 - self.agent_num]
        
        #update memory information
        self.self_memory_per_action[m_index] += [self.last_reward]

        self.self_memory += [m_index]
        self.opp_memory += [opp_index]

        #numbers of pulls information
        self.n_pulls_self[m_index] += 1
        self.n_pulls_opp[opp_index] += 1
        
        #numbers of pulls total
        self.n_pulls_tot[m_index] += 1
        self.n_pulls_tot[opp_index] += 1
        
        # Update number of successes and failure
        self.n_success_self[m_index] += self.last_reward
        self.n_failure_self[m_index] += (1 - self.last_reward)
        
        #count discounted success
        self.discounted_cumulative_success[m_index] += (
                        self.last_reward * self.decay_rate ** self.n_pulls_tot[m_index]
        )

        self.discounted_total_success[m_index] = (
                    self.n_success_self[m_index] * self.decay_rate ** self.n_pulls_tot[m_index]
        )

        self.ratio_self[m_index] = self.n_success_self[m_index]/self.n_pulls_self[m_index]
        
        #est 3
        self.est_3[m_index] = (
            self.ratio_self[m_index] * math.pow(self.decay_rate, self.n_pulls_tot[m_index])
        )

        #opponent repeat feature
        if opp_index == self.last_action[1 - self.agent_num]:
            self.repeat_opp[opp_index] += 1
            
        else:
            self.repeat_opp[self.last_action[1 - self.agent_num]] = 0
            
            #new last action
            self.last_action[1 - self.agent_num] = opp_index
                
        # Update predictions for chosen machines
        obs = []
        
        for i, index in enumerate(range(self.n_machines)):
            self.ratio_opp[index] = self.n_pulls_opp[index]/(1 + self.round)

            self.est_1[index] = (
                self.n_success_self[index] - self.n_failure_self[index] + \
                self.n_pulls_opp[index] - 1.5 * (self.n_pulls_opp[index]>0)
            )/self.n_pulls_tot[index] if self.n_pulls_tot[index] != 0 else 0

            self.est_2[index] = (
                (self.n_success_self[index] - self.n_failure_self[index] + self.n_pulls_opp[index] - 1.5 * (self.n_pulls_opp[index]>0)\
                 + self.repeat_opp[index])/self.n_pulls_tot[index]
            ) * math.pow(self.decay_rate, self.n_pulls_tot[index]) if self.n_pulls_tot[index] != 0 else 0
            
            #number of self/opp pulls in last 10 action features
            self.n_pulls_self_last_10[index] = sum([x == index for x in self.self_memory[-10:]])
            self.n_pulls_opp_last_10[index] = sum([x == index for x in self.opp_memory[-10:]])

            #ratio of rewrd in last 10 times i select the action
            replay_self_last_10 = self.self_memory_per_action[index][-10:]
            
            #if never taken then 0 else the ratio of last 10 pulls on this action
            self.ratio_self_last_10_selected[index] = 0 if len(replay_self_last_10) == 0 else sum(replay_self_last_10)/len(replay_self_last_10)

            obs += [
                [
                    self.round,
                    self.n_pulls_self[index],
                    self.n_success_self[index],
                    self.n_failure_self[index],
                    self.discounted_cumulative_success[index],
                    self.discounted_total_success[index],
                    self.n_pulls_opp[index],
                    self.n_pulls_tot[index],
                    self.ratio_self[index],
                    self.ratio_opp[index],
                    self.est_1[index],
                    self.est_2[index],
                    self.est_3[index],
                    self.repeat_opp[index],
                    self.n_pulls_self_last_10[index],
                    self.n_pulls_opp_last_10[index],
                    self.ratio_self_last_10_selected[index],
                ]
            ]

        self.predicts = self.mod_pred(obs)

def agent(observation, configuration):
    global curr_agent
    
    if observation.step == 0:
        # Initialize agent
        curr_agent = GreedyStrategy(
            'Mr. Agent %i' % observation['agentIndex'],
            observation['agentIndex'],
            configuration['banditCount'])
        return curr_agent.firstRound()
            
    else:
        # Update payout ratio distribution with:
        curr_agent.updateDist(observation['reward'], observation['lastActions'])

        return curr_agent()

Writing main.py


In [3]:
!tar cvfz main.py.tar.gz main.py model.txt

main.py
model.txt
