# OpenAI Gym Env

> Create a custom GYM environment to simulate trading strategy.

In [None]:
# | default_exp betting_env

# Import Librairies

In [None]:
# | export
import pandas as pd
import warnings
import gym
import numpy
import numexpr
import json
import os
import sys

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)
from betting_env.asian_handicap_pnl import *
from betting_env.datastructure.odds import MarketOdds
from betting_env.config.mongo import mongo_init
from pymatchpred.datastructure.lineup import TeamSheet
from infi.traceback import pretty_traceback_and_exit_decorator
from pandas.core.common import SettingWithCopyWarning

warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

# Betting Environment

Reinforcement Learning is a branch of machine learning (ML) that focuses on the complex and all-encompassing issue of training a system to behave appropriately in a given situation. Only the value of the reward and observations made about the environment are used to drive learning. The generality of this model allows it to be used in a wide range of real-world contexts, from gaming to the improvement of sophisticated industrial procedures.

In this perspective, the environment and the agent are two crucial elements of RL. The environment is the Agent's world where it exists and the Agent can engage in interactions with this environment by taking certain actions which cannot change the environment's laws or dynamics.

The goal of this work is to develop a Deep Reinforcement Learning environment that simulates a betting strategy. The theory underlying this environment is quite straightforward: placing a bet entails selecting a potential outcome, deciding on a stake size, multiplying it by the winning odds, and then deducting the initial wager and any losses.

Here, the agent can choose a discrete action space with the following options for actions: 
- choose a small, medium, or big wager size. And,
- Wagering on the home, draw, or away (1X2 lines), or on the home or away Asian line.

It should be noted that the agent can only choose one action from the 15 preceding suggestions.



In addition, our RL betting environment is a subclass of an OpenAI Gym environment, with an observation space equal to (gameId, home team lineup, away team lineup, betting line(1X2, Asian handicap) and selected odds) and an action space equal to the options available to the agent (the wager size and the chosen outcome). 

A simple action in the environment consists of getting the current observation and placing a bet. The reward (the investment return), which can be positive or negative, is then calculated and deducting the total amount of the wager.

The line that the agent should select will determine the determined amount. In other words, if we bet 1X2 on the line, we can say that the profit can be expressed as follows:

    - profit = (bet * invested_amount * results * odds) - (bet * invested_amount)
             = reward - expense

    with : 
    * bet = the chosen outcome or side (Home win, Draw, Away win)
    * invested_amount = bet size
    * results = postgame outcome
    * odds = 1X2 odds


If the agent selects the Asian handicap, the profit will depend on the outcome of the game's goal-difference and the chosen line (Half Integer Line, Integer Line, Quarter Integer Line). The agent can win the full bet or just the half of it, lose the full bet or just the half of it, or return its stake.

For instance, the profit could be expressed as follows if the agent had won the wager:

    - profit = invested_amount * (odds_ah -1)

If the agent wins the half of the bet :

    - profit = invested_amount * ((odds_ah - 1) * 0.5))

    with : 
    * invested_amount = bet size
    * odds_ah = Asian Handicap odds


When there are no more games to play or the user's bank balance is exhausted, an episode will be concluded. 

In [None]:
# | export
class BettingEnv(gym.Env):
    """Base class for sports betting environments.

    Creates an OpenAI Gym environment that supports betting a (small / medium / large) amount
    on a single outcome for a single game.

    Parameters
    ----------
    observation_space : gym.spaces.Box
        The observation space for the environment.
        The observation space shape is (1, N) where N is the number of possible
        outcomes for the game + len(gameId, 2 lineups, ah line) .

    action_space : gym.spaces.Discrete
        The action space for the environment.
        The action space is a set of choices that the agent can do.

    balance : float
        The current balance of the environment.

    starting_bank : int, default=100
        The starting bank / balance for the environment.
    """

    metadata = {"render.modes": ["human"]}
    # actions
    ACTIONS_LIST = [
        [0, 0, 0, 0, 0],  # no bets
        [1, 0, 0, 0, 0],  # betting on home team (1x2)
        [0, 0, 1, 0, 0],  # betting on away team (1x2)
        [0, 1, 0, 0, 0],  # betting on draw (1x2)
        [0, 0, 0, 1, 0],  # betting on home (Asian Handicap)
        [0, 0, 0, 0, 1],  # betting on away (Asian Handicap)
    ]

    def __init__(
        self,
        game_odds,
        odds_column_names=[
            "preGameOdds1",
            "preGameOdds2",
            "preGameOddsX",
            "preGameAhHome",
            "preGameAhAway",
        ],
        bet_size=[0.05, 0.4, 0.7],
        starting_bank=100,
    ):
        """Initializes a new environment

        Parameters
        ----------
        game_odds: pandas dataframe
            A list of games, with their betting odds.
        odds_column_names: list of str
            A list of column names with length == number of odds.
        bet_size: list
            3 possible bets : small, medium and large
        starting_bank: int
            bank account

        """

        super().__init__()
        # games df
        self._game = game_odds.copy()
        # sort data by date
        if "gameDate" in self._game.columns:
            self._game["gameDate"] = pd.to_datetime(self._game["gameDate"])
            self._game = self._game.sort_values(by="gameDate")
        # odds columns names
        self._odds_columns_names = odds_column_names
        # odds (1X2 and Asian handicap) values
        self._odds = self._game[self._odds_columns_names].values
        # results
        self._results = self._game["result"].values
        # ah lines
        self._lines = self._game["lineId"].values
        # game goal-difference
        self._gd = self._game["postGameGd"].values
        # teams names
        self._teams = self._game[["homeTeamName", "awayTeamName"]]
        # teams lineups
        self._lineups = self._game[["homeTeamLineup", "awayTeamLineup"]].values
        # games ids
        self._game_ids = self._game["gameId"]
        # bet size
        self.bet_size = bet_size  # small bet, medium bet, large bet
        # observation space
        self.observation_space = gym.spaces.Box(
            low=1.0,
            high=float("Inf"),
            shape=(1, (self._odds.shape[1] + 4)),
            dtype=numpy.float64,
        )
        # actions space
        self.action_space = gym.spaces.Tuple(
            (
                gym.spaces.Discrete(len(BettingEnv.ACTIONS_LIST)),  # betting action
                gym.spaces.Discrete(
                    len(self.bet_size)
                ),  # betting small or medium or large bet for the chosen action
            )
        )
        # env balance
        self.balance = self.starting_bank = starting_bank
        # current step (game)
        self.current_step = 0
        # bet size for each outcome
        self.bet_size_matrix = None

    def _get_current_index(self):
        return self.current_step % self._odds.shape[0]

    def get_odds(self):
        """Returns the odds for the current step.

        Returns
        -------
        odds : numpy.ndarray of shape (1, n_odds)
            The odds for the current step.
        """
        return pd.DataFrame([self._odds[self.current_step]]).values

    def get_bet(self, action):
        """Returns the betting matrix for the action provided.

        Parameters
        ----------
        action : int
            An action provided by the agent.

        Returns
        -------
        bet : array of shape (1, n_odds)
            The betting matrix, where each outcome specified in the action
            has a value of 1 and 0 otherwise.
        """
        return BettingEnv.ACTIONS_LIST[action[0]]

    def get_bet_size(self, action):
        """Returns bet size for the action provided.

        Parameters
        ----------
        action : int
            An action provided by the agent.

        Returns
        -------
        bet_size : array of shape (1, n_odds)
            The betting size matrix, where each outcome specified in the action
            has the chosen value by the agent.
        """
        # binary bets
        bet_size_matrix = numpy.zeros(shape=(1, self._odds.shape[1]))
        # if the agent wants to bet
        if action[0] != 0:
            # bet value
            size_bet = self.bet_size[action[1]] * self.balance
            # on which outcome the agent will bet
            bet_index = numpy.where(
                numpy.array(BettingEnv.ACTIONS_LIST[action[0]]) == 1
            )
            # assign bet value to the chosen outcome
            bet_size_matrix[:, bet_index] = bet_size_matrix[:, bet_index] + size_bet

        return bet_size_matrix

    @pretty_traceback_and_exit_decorator
    def step(self, action):
        """Run one timestep of the environment's dynamics. When end of episode is reached,
        you are responsible for calling reset() to reset this environment's state.

        Accepts an action and returns a tuple (observation, reward, done, info).

        Parameters
        ----------
        action : int
            An action provided by the agent.

        Returns
        -------
        observation : dataframe
            The agent's observation of the current environment
        reward : float
            The amount of reward returned after previous action
        done : bool
            Whether the episode has ended, in which case further step() calls will return undefined results
        info : dict
            Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)

        """
        # current odds
        odds = self.get_odds()
        # reward
        reward = 0
        # finish
        done = False
        # episode info
        info = self.create_info(action)
        # bet size
        self.bet_size_matrix = self.get_bet_size(action)

        if self.balance < 1:  # no more money
            done = True
        else:
            # bet action
            bet = self.get_bet(action)
            # game result
            results = self.get_results()
            if self.legal_bet(bet):  # making sure agent has enough money for the bet
                # reward (positive or negative)
                reward = self.get_reward(bet, odds, results)
                # update balance
                self.balance += reward
                info.update(legal_bet=True)
            else:
                reward = -(bet * self.bet_size_matrix).sum()
            # update info
            info.update(results=results.argmax())
            info.update(reward=reward)
            # get the current observation
            obs = self.get_observation()
            # increment step
            self.current_step += 1
            # check if we are finished
            if self.finish():
                done = True
        # update flag
        info.update(done=done)
        # return
        return obs, reward, done, info

    def get_observation(self):
        """return the observation of the current step.

        Returns
        -------
        obs : numpy.ndarray of shape (1, n_odds + 4)
            The observation of the current step.
        """
        # current game
        index = self._get_current_index()
        # current game id
        game_id = self._game_ids[index]
        # current game lineups
        lineups = self._lineups[index]
        # chosen odds
        betting_market = self.get_odds()
        betting_market_1X2 = betting_market[:, 0:3]
        betting_market_ah = betting_market[:, 3:]
        # chosen line (AH line)
        line = self._lines[index]
        # the observation
        obs = {
            "gameId": game_id,
            "home_lineup": lineups[0],
            "away_lineup": lineups[1],
            "odds_1": betting_market_1X2[0][0],
            "odds_X": betting_market_1X2[0][1],
            "odds_2": betting_market_1X2[0][2],
            "line": line,
            "odds_ah_home": betting_market_ah[0][0],
            "odds_ah_away": betting_market_ah[0][1],
        }
        return pd.DataFrame(obs, index=[0]).values

    def get_reward(self, bet, odds, results):
        """Calculates the reward

        Parameters
        ----------
        bet : array of shape (1, n_odds)
        odds: dataframe of shape (1, n_odds)
            A games with its betting odds.
        results : array of shape (1, n_odds)

        Returns
        -------
        reward : float
            The amount of reward returned after previous action
        """
        # agent choice
        bet_index = numpy.argmax(numpy.array(bet))
        # bet size
        bet_size_matrix = self.bet_size_matrix
        # if the action is a AH bet
        if bet_index in [3, 4]:
            # game goal_difference
            obs_gd = (
                self._gd[self.current_step]
                if bet_index == 3
                else -self._gd[self.current_step]
            )
            # ah line
            ah_line = float(
                self._lines[self.current_step]
                if bet_index == 3
                else -self._lines[self.current_step]
            )
            # ah side odds
            ah_odds = (
                odds[:, 3:4][0].item() if bet_index == 3 else odds[:, 4:][0].item()
            )
            # calculate profit
            profit = AsianHandicap.pnl(obs_gd, ah_line, ah_odds)
            profit = (
                0
                if profit is None
                else numexpr.evaluate("sum(bet_size_matrix * profit)")
            )
        else:  # case 1X2
            reward = numexpr.evaluate("sum(bet * bet_size_matrix * results * odds)")
            expense = numexpr.evaluate("sum(bet * bet_size_matrix)")
            profit = reward - expense

        return profit

    def reset(self):
        """Resets the state of the environment and returns an initial observation.

        Returns
        -------
        observation : dataframe
            the initial observation.
        """
        self.balance = self.starting_bank
        self.current_step = 0
        return self.get_observation()

    def render(self, mode="human"):
        """Outputs the current balance and the current step.

        Returns
        -------
        msg : str
            A string with the current balance,
            the current step and the current game info.
        """
        index = self._get_current_index()
        teams = self._teams.iloc[index]
        game_id = self._game_ids[index]
        teams = teams.itertuples() if isinstance(teams, pd.DataFrame) else [teams]
        teams_str = ", ".join(
            [
                "Home Team: {} VS Away Team: {}".format(
                    row.homeTeamName, row.awayTeamName
                )
                for row in teams
            ]
        )

        print("Current balance at step {}: {}".format(self.current_step, self.balance))
        print("Current game id : {}".format(game_id))
        print(teams_str)

    def finish(self):
        """Checks if the episode has reached an end.

        The episode has reached an end if there are no more games to bet.

        Returns
        -------
        finish : bool
            True if the current_step is equal to n_games, False otherwise
        """
        return self.current_step == self._odds.shape[0]  # no more games left to bet

    def get_results(self):
        """Returns the results matrix for the current step.

        Returns
        -------
        result : array of shape (1, n_odds)
            The result matrix, where the index of the outcome that happened
            value is 1 and the rest of the indexes values are 0.
        """
        result = numpy.zeros(shape=(1, self._odds.shape[1]))
        result[
            numpy.arange(result.shape[0], dtype=numpy.int32),
            numpy.array([self._results[self.current_step]], dtype=numpy.int32),
        ] = 1

        return result

    def legal_bet(self, bet):
        """Checks if the bet is legal.

        Checks that the bet does not exceed the current balance.

        Parameters
        ----------
        bet : array of shape (1, n_odds)
            The bet to check.

        Returns
        -------
        legal : bool
            True if the bet is legal, False otherwise.
        """
        return (bet * self.bet_size_matrix).sum() <= self.balance

    def create_info(self, action):
        """Creates the info dictionary for the given action.

        The info dictionary holds the following information:
            * the current step
            * game odds of the current step
            * bet action of the current step
            * bet size of the current step
            * the balance at the start of the current step
            * reward of the current step
            * game result of the current step
            * state of the current step
        Parameters
        ----------
        action : int
            An action provided by the agent.

        Returns
        -------
        info : dict
            The info dictionary.
        """
        return {
            "current_step": self.current_step,
            "odds": self.get_odds(),
            "bet_action": env.ACTIONS_LIST[action[0]],
            "bet_size": self.bet_size[action[1]],
            "balance": self.balance,
            "reward": 0,
            "legal_bet": False,
            "results": None,
            "done": False,
        }

# Prepare Input

Load games data from a csv file

In [None]:
# | include: false

# load data
raw_odds_data = pd.read_csv("Hosts_edges.csv").head(5)
# game date
raw_odds_data["gameDate"] = pd.to_datetime(raw_odds_data["gameDate"])
# extract specific fields
odds_dataframe = raw_odds_data[
    [
        "gameId",
        "gameDate",
        "homeTeamId",
        "homeTeamName",
        "awayTeamId",
        "awayTeamName",
        "preGame_odds1",
        "preGame_oddsX",
        "preGame_odds2",
    ]
].sort_values(by="gameDate")
# change columns names
odds_dataframe.rename(
    columns={
        "preGame_odds1": "preGameOdds1",
        "preGame_oddsX": "preGameOddsX",
        "preGame_odds2": "preGameOdds2",
    },
    inplace=True,
)

Add new features

In [None]:
# | include: false

# initialise connections
mongo_init("prod_atlas")
# add asian handicap
odds_dataframe["preGameAhHome"] = odds_dataframe.apply(
    lambda row: MarketOdds.get_latest(row["gameId"], "asian")["odds1"][0],
    axis="columns",
    result_type="expand",
)
odds_dataframe["preGameAhAway"] = odds_dataframe.apply(
    lambda row: MarketOdds.get_latest(row["gameId"], "asian")["odds2"][0],
    axis="columns",
    result_type="expand",
)
odds_dataframe["lineId"] = odds_dataframe.apply(
    lambda row: MarketOdds.get_latest(row["gameId"], "asian")["line_id"][0],
    axis="columns",
    result_type="expand",
)
# home team lineup
odds_dataframe["homeTeamLineup"] = odds_dataframe.apply(
    lambda row: json.dumps(
        {
            x.name: x.position
            for x in TeamSheet.get_latest(
                ra_team_id=row["homeTeamId"], date=row["gameDate"]
            ).starting
        }
    ),
    axis="columns",
    result_type="expand",
)

# away team lineup
odds_dataframe["awayTeamLineup"] = odds_dataframe.apply(
    lambda row: json.dumps(
        {
            x.name: x.position
            for x in TeamSheet.get_latest(
                ra_team_id=row["awayTeamId"], date=row["gameDate"]
            ).starting
        }
    ),
    axis="columns",
    result_type="expand",
)


# map results {homewin -> 0 , awaywin -> 2 , draw -> 1}
odds_dataframe["result"] = raw_odds_data["postGame_tgt_outcome"].map(
    {1.0: 0.0, 0.0: 2.0, 0.5: 1.0}
)
# gd results
odds_dataframe["postGameGd"] = raw_odds_data["postGame_tgt_gd"]

In [None]:
# | include: false
odds_dataframe.head()

Unnamed: 0,gameId,gameDate,homeTeamId,homeTeamName,awayTeamId,awayTeamName,preGameOdds1,preGameOddsX,preGameOdds2,preGameAhHome,preGameAhAway,lineId,homeTeamLineup,awayTeamLineup,result,postGameGd
2,0034edd948828f48e2b27ab340de6a86c4d53d04040bee...,2018-10-03 18:45:00+00:00,aeb2f56fcedbcf4cd5c780179766996c7bf0b308064541...,Blackburn Rovers,4a104655f366c090e1849b57b87890a68f9400c8dd4a6b...,Sheffield United,3.41,3.55,2.22,2.02,1.9,0.25,"{""David Raya Martin"": ""GK"", ""Elliott Bennett"":...","{""Dean Henderson"": ""GK"", ""Kieron Freeman"": ""MR...",2.0,-2
0,001597ea24d591c49f1d89e9f415042a82c78f2b87bda3...,2018-11-25 20:00:00+00:00,6673572f126d843a06ec48ebf5ed8fcd8234bee469bb11...,Amiens,9551136312d3b08191d8813198a3486995f3b212ff1e01...,Marseille,4.85,3.47,1.88,2.04,1.88,0.5,"{""Regis Gurtner"": ""GK"", ""Emil Krafth"": ""DR"", ""...","{""Steve Mandanda"": ""GK"", ""Bouna Sarr"": ""MR"", ""...",2.0,-2
4,004ee2a59c5642a08c80f4861155738e75fae5a9a8f040...,2018-12-18 19:30:00+00:00,cd4ba7e93005562480c86a8eaec77586bd6d4b534eeae9...,Hertha BSC,579831158152f80a0b90461b86193ba9132ce265690a98...,FC Augsburg,2.28,3.25,3.3,1.99,1.96,-0.25,"{""Rune Jarstein"": ""GK"", ""Peter Pekarik"": ""DR"",...","{""Andreas Luthe"": ""GK"", ""Jonathan Schmid"": ""DR...",1.0,0
3,003a183e60ea92d1a01d39a673353f94007e02455acec0...,2019-04-20 18:00:00+00:00,54402207f254a5a270bcc95b53124165885b824c98fd3e...,Nîmes,21c36d309100f69db52126912d1a3678fe16d1aeee1183...,Bordeaux,2.02,3.69,3.84,1.72,2.25,-0.25,"{""Paul Bernardoni"": ""GK"", ""Renaud Ripart"": ""DR...","{""Benoit Costil"": ""GK"", ""Francois Kamano"": ""MR...",0.0,1
1,002b5b08be71f17a9bda4f789160a50e5cdf45b9e685d7...,2019-05-04 13:30:00+00:00,503fa80a790a8a8760f6a0acc887633335c4be1170caad...,FC Bayern München,2458b801f0667e51562faa1ce3d36d980d3bdb76f8941c...,Hannover 96,1.03,26.0,69.0,2.08,1.85,-3.75,"{""Sven Ulreich"": ""GK"", ""Joshua Kimmich"": ""DR"",...","{""Michael Esser"": ""GK"", ""Oliver Sorg"": ""DR"", ""...",0.0,2


# Agent - Env

Here, we'll set up our betting environment and let the computer program play and make decisions at random.

In [None]:
# | include: false
env = BettingEnv(odds_dataframe)
max_steps_limit = odds_dataframe.shape[0]

In [None]:
# | include: false
env.reset()
for _ in range(0, max_steps_limit):
    print(env.render())
    print("\n Info: \n")
    obs, reward, done, info = env.step(env.action_space.sample())
    print(info)
    print("\n Observation: \n")
    print(obs)
    print("----------------------------------------------------")
    if done:
        break

Current balance at step 0: 100
Current game id : 001597ea24d591c49f1d89e9f415042a82c78f2b87bda30229a9821fd05dbb35
Home Team: Blackburn Rovers VS Away Team: Sheffield United
None

 Info: 

{'current_step': 0, 'odds': array([[3.41, 2.22, 3.55, 2.02, 1.9 ]]), 'bet_action': [0, 0, 0, 0, 1], 'bet_size': 0.7, 'balance': 100, 'reward': array(63.), 'legal_bet': True, 'results': 2, 'done': False}

 Observation: 

[['001597ea24d591c49f1d89e9f415042a82c78f2b87bda30229a9821fd05dbb35'
  '{"David Raya Martin": "GK", "Elliott Bennett": "DR", "Amari\'i Bell": "DL", "Richard Smallwood": "DML", "Darragh Lenihan": "DCR", "Derrick Williams": "DCL", "Harrison Reed": "AMR", "Corry Evans": "DMR", "Danny Graham": "FW", "Bradley Dack": "AMC", "Adam Armstrong": "AML"}'
  '{"Dean Henderson": "GK", "Kieron Freeman": "MR", "Enda Stevens": "ML", "Jack O\'Connell": "DCL", "John Egan": "DC", "Chris Basham": "DCR", "Oliver Norwood": "DMR", "John Fleck": "DML", "Mark Duffy": "AMC", "David McGoldrick": "FWL", "Billy Sha