# OpenAI Gym Env

> Create a custom GYM environment to simulate trading strategy.

In [None]:
#| default_exp betting_env

# Import Librairies

In [None]:
#| export
import pandas as pd
import warnings
import gym
import numpy
import numexpr
from pandas import DataFrame
from infi.traceback import pretty_traceback_and_exit_decorator
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

# Betting Environment

In [None]:
class BettingEnv(gym.Env):
    """Base class for sports betting environments.

    Creates an OpenAI Gym environment that supports betting a (small / medium / large) amount
    on a single outcome for a single game.

    Parameters
    ----------
    observation_space : gym.spaces.Box
        The observation space for the environment.
        The observation space shape is (1, N) where N is the number of possible
        outcomes for the game.
        
    action_space : gym.spaces.Discrete
        The action space for the environment.
        The action space is a set of choices that the agent can do.

    balance : float
        The current balance of the environment.

    starting_bank : int, default=10
        The starting bank / balance for the environment.
    """

    metadata = {'render.modes': ['human']}
    # bet size
    BET_SIZE = [0.05, # small bet
                0.4,  # medium bet
                0.7   # large bet
               ]
    # actions
    ACTIONS_LIST = [
        [0, 0, 0, 0, 0], # no bets
        [1, 0, 0, 1, 0], # betting on home team (1x2 and AH)
        [0, 0, 1, 0, 1], # betting on away team (1x2 and AH)
        [0, 1, 0, 1, 0], # betting on draw (1x2 and AH-home)
        [0, 1, 0, 0, 1], # betting on draw (1x2 and AH-away) 
        
    ]
    
    def __init__(self,
                 game_odds,
                 odds_column_names = ['preGame_odds1',
                                      'preGame_odds2',
                                      'preGame_oddsX',
                                      'preGame_ah_home', 
                                      'preGame_ah_away'],
                 starting_bank=100):
        """Initializes a new environment

        Parameters
        ----------
        odds: dataframe of shape (n_games, n_odds)
            A list of games, with their betting odds.
        odds_column_names: list of str
            A list of column names with length == n_odds.
        """

        super().__init__()
        # games df
        self._game = game_odds.copy() 
        # odds columns names
        self._odds_columns_names = odds_column_names
        # odds (1X2 and Asian handicap) values
        self._odds = self._game[self._odds_columns_names].values
        # results
        self._results = self._game['result'].values
        # teams names
        self.teams = self._game[['homeTeamName', 'awayTeamName']]
        # games ids
        self.game_ids = self._game['gameId'] 
        # observation space
        self.observation_space = gym.spaces.Box(low=1., high=float('Inf'),
                                                shape=(1, self._odds.shape[1]), dtype=numpy.float64)
        # actions space
        self.action_space = gym.spaces.Tuple((
            gym.spaces.Discrete(5), # betting action
            gym.spaces.Discrete(3), # betting small or medium or large bet for 1X2
            gym.spaces.Discrete(3), # betting small or medium or large bet for AH
        ))
        # env balance
        self.balance = self.starting_bank = starting_bank
        # current step (game)
        self.current_step = 0
        # bet size for each outcome
        self.bet_size_matrix = None
        
    def _get_current_index(self):
        return self.current_step % self._odds.shape[0]

    def get_odds(self):
        """Returns the odds for the current step.

        Returns
        -------
        odds : numpy.ndarray of shape (1, n_odds)
            The odds for the current step.
        """
        return DataFrame([self._odds[self.current_step]]).values

    def get_bet(self, action):
        """Returns the betting matrix for the action provided.

        Parameters
        ----------
        action : int
            An action provided by the agent.

        Returns
        -------
        bet : array of shape (1, n_odds)
            The betting matrix, where each outcome specified in the action
            has a value of 1 and 0 otherwise.
        """
        return BettingEnv.ACTIONS_LIST[action[0]]
    
    def get_bet_size(self,action):
        """ Returns bet size for the action provided.
        
        Parameters
        ----------
        action : int
            An action provided by the agent.

        Returns
        -------
        bet_size : array of shape (1, n_odds)
            The betting size matrix, where each outcome specified in the action
            has the choosen value by the agent.
        """
        # binary bets
        bet_size_matrix = numpy.ones(shape=self.observation_space.shape)
        # 1X2 bet value
        size_1X2 = env.BET_SIZE[action[1]] * self.balance
        # Asian handicap bet value
        size_ah = env.BET_SIZE[action[2]] * self.balance
        # assign bet value to each outcome
        # 1x2 (small or med or large)
        bet_size_matrix[:,0:3] = bet_size_matrix[:,0:3] * size_1X2
        # Asian Handicap (small or med or large)
        bet_size_matrix[:,3:] = bet_size_matrix[:,3:] * size_ah
            
        return bet_size_matrix
    
    @pretty_traceback_and_exit_decorator
    def step(self, action):
        """Run one timestep of the environment's dynamics. When end of episode is reached,
        you are responsible for calling reset() to reset this environment's state.

        Accepts an action and returns a tuple (observation, reward, done, info).

        Parameters
        ----------
        action : int
            An action provided by the agent.

        Returns
        -------
        observation : dataframe of shape (1, n_odds)
            The agent's observation of the current environment
        reward : float
            The amount of reward returned after previous action
        done : bool
            Whether the episode has ended, in which case further step() calls will return undefined results
        info : dict
            Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)

        """
        # current odds
        odds = self.get_odds()
        # reward
        reward = 0
        # finish
        done = False
        # episode info
        info = self.create_info(action)
        # bet size
        self.bet_size_matrix = self.get_bet_size(action)
        
        #print('action',action)
        #print('odds',odds)
        #print('bet_size',self.bet_size_matrix)
        #print('results', self.get_results())
        
        if self.balance < 1:  # no more money
            done = True
        else:
            bet = self.get_bet(action)
            results = self.get_results()
            if self.legal_bet(bet):  # making sure agent has enough money for the bet
                reward = self.get_reward(bet, odds, results)
                self.balance += reward
                info.update(legal_bet=True)
            else:
                reward = -(bet * self.bet_size_matrix).sum()
               
            info.update(results=results.argmax())
            info.update(reward=reward)
            
            self.current_step += 1
            if self.finish():
                done = True
                odds = numpy.ones(shape=self.observation_space.shape)
            else:
                odds = self.get_odds()
        info.update(done=done)
        return odds, reward, done, info


    def get_reward(self, bet, odds, results):
        """ Calculates the reward

        Parameters
        ----------
        bet : array of shape (1, n_odds)
        odds: dataframe of shape (1, n_odds)
            A games with its betting odds.
        results : array of shape (1, n_odds)

        Returns
        -------
        reward : float
            The amount of reward returned after previous action
        """
        bet_size_matrix = self.bet_size_matrix       
        reward = numexpr.evaluate('sum(bet * bet_size_matrix * results * odds)')
        expense = numexpr.evaluate('sum(bet * bet_size_matrix)')
        return reward - expense

    def reset(self):
        """Resets the state of the environment and returns an initial observation.

        Returns
        -------
        observation : dataframe of shape (1, n_odds)
            the initial observation.
        """
        self.balance = self.starting_bank
        self.current_step = 0
        return self.get_odds()

    def render(self, mode='human'):
        """Outputs the current balance and the current step.

        Returns
        -------
        msg : str
            A string with the current balance, 
            the current step and the current game info.
        """
        index = self._get_current_index()
        teams = self.teams.iloc[index]
        game_id = self.game_ids[index]
        teams = teams.itertuples() if isinstance(teams, pd.DataFrame) else [teams]
        teams_str = ', '.join(['Home Team: {} VS Away Team: {}'.format(row.homeTeamName, row.awayTeamName)
                               for row in teams])

        print('Current balance at step {}: {}'.format(self.current_step, self.balance))
        print('Current game id : {}'.format(game_id))
        print(teams_str)

        
    def finish(self):
        """Checks if the episode has reached an end.

        The episode has reached an end if there are no more games to bet.

        Returns
        -------
        finish : bool
            True if the current_step is equal to n_games, False otherwise
        """
        return self.current_step == self._odds.shape[0]  # no more games left to bet

    def get_results(self):
        """Returns the results matrix for the current step.

        Returns
        -------
        result : array of shape (1, n_odds)
            The result matrix, where the index of the outcome that happened
            value is 1 and the rest of the indexes values are 0.
        """
        result = numpy.zeros(shape=self.observation_space.shape)
        result[numpy.arange(result.shape[0], dtype=numpy.int32),
               numpy.array([self._results[self.current_step]], dtype=numpy.int32)] = 1
        # result for AH
        # home
        if int(self._results[self.current_step]) == 0 :
            result[:,3] = 1
        # away
        elif int(self._results[self.current_step]) == 2 :
            result[:,4] = 1
        # draw
        else:
            result[:,3] = 0.5
            result[:,4] = 0.5
        
        return result

    def legal_bet(self, bet):
        """Checks if the bet is legal.

        Checks that the bet does not exceed the current balance.

        Parameters
        ----------
        bet : array of shape (1, n_odds)
            The bet to check.

        Returns
        -------
        legal : bool
            True if the bet is legal, False otherwise.
        """
        return (bet * self.bet_size_matrix).sum() <= self.balance

    def create_info(self, action):
        """Creates the info dictionary for the given action.

        The info dictionary holds the following information:
            * the verbose action
            * the current step
            * the balance at the start of the current step
            * the relevant odds for the current step
            * the bet size for a single outcome

        Parameters
        ----------
        action : int
            An action provided by the agent.

        Returns
        -------
        info : dict
            The info dictionary.
        """
        return {'current_step': self.current_step,
                'odds': self.get_odds(),
                'bet_action': env.ACTIONS_LIST[action[0]],
                'bet_size_1x2': env.BET_SIZE[action[1]],
                'bet_size_ah': env.BET_SIZE[action[2]],
                'balance': self.balance,
                'reward': 0,
                'legal_bet': False,
                'results': None,
                'done': False}

# Prepare Input

In [None]:
# load data
raw_odds_data =pd.read_csv('Hosts_edges.csv')
# game date
raw_odds_data['gameDate'] = pd.to_datetime(raw_odds_data['gameDate'], dayfirst=True)
# extract specific fields
odds_dataframe = raw_odds_data[['gameId',
                                'homeTeamName', 
                                'awayTeamName', 
                                'preGame_odds1',
                                'preGame_oddsX',
                                'preGame_odds2']]
# add asian handicap 
odds_dataframe['preGame_ah_home'] = odds_dataframe['preGame_odds1']
odds_dataframe['preGame_ah_away'] = odds_dataframe['preGame_odds2']
# map results {homewin -> 0 , awaywin -> 2 , draw -> 1}
odds_dataframe['result'] = raw_odds_data['postGame_tgt_outcome'].map({1.0 : 0.0, 0.0: 2.0, 0.5: 1.0})

In [None]:
odds_dataframe.head()

Unnamed: 0,gameId,homeTeamName,awayTeamName,preGame_odds1,preGame_oddsX,preGame_odds2,preGame_ah_home,preGame_ah_away,result
0,001597ea24d591c49f1d89e9f415042a82c78f2b87bda3...,Amiens,Marseille,4.85,3.47,1.88,4.85,1.88,2.0
1,002b5b08be71f17a9bda4f789160a50e5cdf45b9e685d7...,FC Bayern München,Hannover 96,1.03,26.0,69.0,1.03,69.0,0.0
2,0034edd948828f48e2b27ab340de6a86c4d53d04040bee...,Blackburn Rovers,Sheffield United,3.41,3.55,2.22,3.41,2.22,2.0
3,003a183e60ea92d1a01d39a673353f94007e02455acec0...,Nîmes,Bordeaux,2.02,3.69,3.84,2.02,3.84,0.0
4,004ee2a59c5642a08c80f4861155738e75fae5a9a8f040...,Hertha BSC,FC Augsburg,2.28,3.25,3.3,2.28,3.3,1.0


# Agent - Env

In [None]:
env = BettingEnv(odds_dataframe)
max_steps_limit = odds_dataframe.shape[0]

In [None]:
env.reset()
for _ in range(1, max_steps_limit):
    print(env.render())
    obs, reward, done, info = env.step(env.action_space.sample())
    print('info',info)
    print("----------------------------------------------------")
    if done:
        break

Current balance at step 0: 100
Current game id : 001597ea24d591c49f1d89e9f415042a82c78f2b87bda30229a9821fd05dbb35
Home Team: Amiens VS Away Team: Marseille
None
info {'current_step': 0, 'odds': array([[4.85, 1.88, 3.47, 4.85, 1.88]]), 'bet_action': [0, 1, 0, 0, 1], 'bet_size_1x2': 0.4, 'bet_size_ah': 0.4, 'balance': 100, 'reward': -4.800000000000011, 'legal_bet': True, 'results': 2, 'done': False}
----------------------------------------------------
Current balance at step 1: 95.19999999999999
Current game id : 002b5b08be71f17a9bda4f789160a50e5cdf45b9e685d748cfb42d1365f12c65
Home Team: FC Bayern München VS Away Team: Hannover 96
None
info {'current_step': 1, 'odds': array([[ 1.03, 69.  , 26.  ,  1.03, 69.  ]]), 'bet_action': [0, 0, 0, 0, 0], 'bet_size_1x2': 0.4, 'bet_size_ah': 0.05, 'balance': 95.19999999999999, 'reward': 0.0, 'legal_bet': True, 'results': 0, 'done': False}
----------------------------------------------------
Current balance at step 2: 95.19999999999999
Current game id

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()