# OpenAI Gym Env

> Create a custom GYM environment to simulate trading strategy.

In [None]:
# | default_exp betting_env

# Import Librairies

In [None]:
# | export

import pandas as pd
import gym
import numpy as np
import numexpr
import json
import requests
import os
import sys

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)
from betting_env.asian_handicap_pnl import *
from infi.traceback import pretty_traceback_and_exit_decorator

# Observations Output

We provide here a simple class that stores our RL observations. The first format is a numerical numpy array that holds numerical game information and the second format is a user-friendly output to show game information in each observation.

In [None]:
# | export


class Observation:
    def __init__(
        self,
        game_id: int,  # game Id
        lineups: np.ndarray,  # lineups
        lineups_ids: np.ndarray,  # lineups opta Ids
        teams_names: pd.core.series.Series,  # team names
        teams_ids: np.ndarray,  # teams opta Ids
        betting_market: np.ndarray,  # odds
        ah_line: float,  # Asian handicap line
        observation_shape: set,  # observation shape
    ):
        self.game_id = game_id
        self.lineups = lineups
        self.lineups_ids = lineups_ids
        self.teams_names = teams_names
        self.teams_ids = teams_ids
        self.betting_market = betting_market
        self.ah_line = ah_line
        self.shape = observation_shape

    def __call__(self) -> "Observation":
        """numerical output"""
        self.numerical_observation = np.concatenate(
            (
                np.array([self.game_id]).reshape(1, -1),  # opta gameId
                np.array([self.teams_ids]),  # teams Opta Ids
                np.array([self.lineups_ids[0]]),  # home lineup (players opta Id)
                np.array([self.lineups_ids[1]]),  # away lineup (players opta Id)
                self.betting_market,  # odds (1x2 and AH)
            ),
            axis=1,
        ).reshape(self.shape)

        self.dtype = self.numerical_observation.dtype
        return self

    def reshape(self, new_shape: set) -> "Observation":
        """reshape observation"""
        self.numerical_observation = self.numerical_observation.reshape(new_shape)
        return self

    def astype(self, data_type) -> "Observation":
        """cast observation type"""
        self.numerical_observation = self.numerical_observation.astype(data_type)
        return self

    def observation_pretty_output(self) -> pd.DataFrame:
        """User-friendly output"""
        self.observation = {
            "gameId": [self.game_id],
            "homeTeam": [self.teams_names[0]],
            "awayTeam": [self.teams_names[1]],
            "homeLineup": self.lineups[0],
            "awayLineup": self.lineups[1],
            "odds1": self.betting_market[:, 0:3][0][0],
            "oddsX": self.betting_market[:, 0:3][0][1],
            "odds2": self.betting_market[:, 0:3][0][2],
            "ahLine": [self.ah_line],
            "oddsAhHome": self.betting_market[:, 3:][0][0],
            "oddsAhAway": self.betting_market[:, 3:][0][1],
        }

        return pd.DataFrame(self.observation, index=[0])

# Betting Environment

Reinforcement Learning is a branch of machine learning (ML) that focuses on the complex and all-encompassing issue of training a system to behave appropriately in a given situation. Only the value of the reward and observations made about the environment are used to drive learning. The generality of this model allows it to be used in a wide range of real-world contexts, from gaming to the improvement of sophisticated industrial procedures.

In this perspective, the environment and the agent are two crucial elements of RL. The environment is the Agent's world where it exists and the Agent can engage in interactions with this environment by taking certain actions which cannot change the environment's laws or dynamics.

The goal of this work is to develop a Deep Reinforcement Learning environment that simulates a betting strategy. The theory underlying this environment is quite straightforward: placing a bet entails selecting a potential outcome, deciding on a stake size, multiplying it by the winning odds, and then deducting the initial wager and any losses.

Here, the agent can choose a discrete action space with the following options for actions: 
- choose a small, medium, or big wager size. And,
- Wagering on the home, draw, or away (1X2 lines), or on the home or away Asian line.

It should be noted that the agent can only choose one action from the 15 preceding suggestions.



In addition, our RL betting environment is a subclass of an OpenAI Gym environment, with an observation space equal to (gameId, home team lineup, away team lineup, betting line(1X2, Asian handicap) and selected odds) and an action space equal to the options available to the agent (the wager size and the chosen outcome). 

A simple action in the environment consists of getting the current observation and placing a bet. The reward (the investment return), which can be positive or negative, is then calculated and deducting the total amount of the wager.

The line that the agent should select will determine the determined amount. In other words, if we bet 1X2 on the line, we can say that the profit can be expressed as follows:

    - profit = (bet * invested_amount * results * odds) - (bet * invested_amount)
             = reward - expense

    with : 
    * bet = the chosen outcome or side (Home win, Draw, Away win)
    * invested_amount = bet size
    * results = postgame outcome
    * odds = 1X2 odds


If the agent selects the Asian handicap, the profit will depend on the outcome of the game's goal-difference and the chosen line (Half Integer Line, Integer Line, Quarter Integer Line). The agent can win the full bet or just the half of it, lose the full bet or just the half of it, or return its stake.

For instance, the profit could be expressed as follows if the agent had won the wager:

    - profit = invested_amount * (odds_ah -1)

If the agent wins the half of the bet :

    - profit = invested_amount * ((odds_ah - 1) * 0.5))

    with : 
    * invested_amount = bet size
    * odds_ah = Asian Handicap odds


When there are no more games to play or the user's bank balance is exhausted, an episode will be concluded. 

In [None]:
# | export


class BettingEnv(gym.Env):
    """Base class for sports betting environments.

    Creates an OpenAI Gym environment that supports betting a (small / medium / large) amount
    on a single outcome for a single game.

    Parameters
    ----------
    observation_space : gym.spaces.Box
        The observation space for the environment.
        The observation space shape is (1, N) where N is the number of possible
        outcomes for the game + len(gameId, 2 lineups, ah line) .

    action_space : gym.spaces.Discrete
        The action space for the environment.
        The action space is a set of choices that the agent can do.

    balance : float
        The current balance of the environment.

    starting_bank : int, default=100
        The starting bank / balance for the environment.
    """

    metadata = {"render_modes": ["human"]}

    # actions
    ACTIONS_LIST = [
        [0, 0, 0, 0, 0],  # no bets
        [0.05, 0, 0, 0, 0],  # betting on home team (1x2)
        [0.4, 0, 0, 0, 0],  # betting on home team (1x2)
        [0.7, 0, 0, 0, 0],  # betting on home team (1x2)
        [0, 0, 0.05, 0, 0],  # betting on away team (1x2)
        [0, 0, 0.4, 0, 0],  # betting on away team (1x2)
        [0, 0, 0.7, 0, 0],  # betting on away team (1x2)
        [0, 0.05, 0, 0, 0],  # betting on draw (1x2)
        [0, 0.4, 0, 0, 0],  # betting on draw (1x2)
        [0, 0.7, 0, 0, 0],  # betting on draw (1x2)
        [0, 0, 0, 0.05, 0],  # betting on home (Asian Handicap)
        [0, 0, 0, 0.4, 0],  # betting on home (Asian Handicap)
        [0, 0, 0, 0.7, 0],  # betting on home (Asian Handicap)
        [0, 0, 0, 0, 0.05],  # betting on away (Asian Handicap)
        [0, 0, 0, 0, 0.4],  # betting on away (Asian Handicap)
        [0, 0, 0, 0, 0.7],  # betting on away (Asian Handicap)
    ]

    def __init__(
        self,
        game_odds,
        odds_column_names=[
            "preGameOdds1",
            "preGameOdds2",
            "preGameOddsX",
            "preGameAhHome",
            "preGameAhAway",
        ],
        starting_bank=100,
    ):
        """Initializes a new environment

        Parameters
        ----------
        game_odds: pandas dataframe
            A list of games, with their betting odds.
        odds_column_names: list of str
            A list of column names with length == number of odds.
        bet_size: list
            3 possible bets : small, medium and large
        starting_bank: int
            bank account

        """

        super().__init__()
        # games df
        self._game = game_odds.copy()
        # sort data by date
        if "gameDate" in self._game.columns:
            self._game["gameDate"] = pd.to_datetime(self._game["gameDate"])
            self._game = self._game.sort_values(by="gameDate")
        # odds (1X2 and Asian handicap) values
        self._odds = self._game[odds_column_names].values
        # results
        self._results = self._game["result"].values
        # ah lines
        self._lines = self._game["lineId"].values
        # game goal-difference
        self._gd = self._game["postGameGd"].values
        # teams names
        self._teams_names = self._game[["homeTeamName", "awayTeamName"]]
        # teams opta id
        self._teams_ids = self._game[["homeTeamOptaId", "awayTeamOptaId"]].values

        # teams lineups (names and positions)
        self._lineups = self._game[["homeTeamLineup", "awayTeamLineup"]].values
        # teams lineups (opta ids)
        self._lineups_ids = self._game[
            ["homeTeamLineupIds", "awayTeamLineupIds"]
        ].values
        # games ids
        self._game_ids = self._game["optaGameId"].values
        # observation space
        self.observation_space = gym.spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(
                self._odds.shape[1] + 25,
            ),  # 25 = 22(players Ids) + 2(home and away team ids) + 1(gameId)
            dtype=np.float64,
        )
        # actions space
        self.action_space = gym.spaces.Discrete(
            len(BettingEnv.ACTIONS_LIST)
        )  # betting action
        # env balance
        self.balance = self.starting_bank = starting_bank
        # current step (game)
        self.current_step = 0
        # bet size for each outcome
        self.bet_size_matrix = None

    def _get_current_index(self):
        return self.current_step % self._odds.shape[0]

    def get_odds(self):
        """Returns the odds for the current step.

        Returns
        -------
        odds : numpy.ndarray of shape (1, n_odds)
            The odds for the current step.
        """
        return pd.DataFrame([self._odds[self.current_step]]).values

    def get_bet(self, action: int):
        """Returns the betting matrix for the action provided.

        Parameters
        ----------
        action : int
            An action provided by the agent.

        Returns
        -------
        bet : array of shape (1, n_odds)
            The betting matrix, where each outcome specified in the action
            has a value of 1 and 0 otherwise.
        """
        return BettingEnv.ACTIONS_LIST[action]

    @pretty_traceback_and_exit_decorator
    def step(self, action: int):
        """Run one timestep of the environment's dynamics. When end of episode is reached,
        you are responsible for calling reset() to reset this environment's state.

        Accepts an action and returns a tuple (observation, reward, done, info).

        Parameters
        ----------
        action : int
            An action provided by the agent.

        Returns
        -------
        observation : dataframe
            The agent's observation of the current environment
        reward : float
            The amount of reward returned after previous action
        done : bool
            Whether the episode has ended, in which case further step() calls will return undefined results
        info : dict
            Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)

        """
        # init observation
        observation = np.ones(shape=self.observation_space.shape)
        # reward
        reward = 0
        # finish
        done = False
        # episode info
        info = self.create_info(action)

        if self.balance < 1:  # no more money
            done = True
        else:
            # bet action
            bet = self.get_bet(action)
            # game result
            results = self.get_results()
            if self.legal_bet(bet):  # making sure agent has enough money for the bet
                # current odds
                odds = self.get_odds()
                # reward (positive or negative)
                reward = self.get_reward(bet, odds, results)
                # update balance
                self.balance += reward
                info.update(legal_bet=True)
            else:
                reward = -(bet * self.bet_size_matrix).sum()
            # update info
            info.update(results=results.argmax())
            info.update(reward=reward)
            # increment step
            self.current_step += 1
            # check if we are finished
            if self.finish():
                done = True
            else:
                observation = self.get_observation()

        # update flag
        info.update(done=done)
        # return
        return observation, reward, done, info

    def get_observation(self):
        """return the observation of the current step.

        Returns
        -------
        obs : numpy.ndarray of shape (1, n_odds + 22)
            The observation of the current step.
        """
        # current game
        index = self._get_current_index()
        # current game id
        game_id = self._game_ids[index]
        # current game lineups
        lineups = self._lineups[index]
        lineups_ids = self._lineups_ids[index]
        # teams
        teams_names = self._teams_names.iloc[index]
        teams_ids = self._teams_ids[index]
        # 1X2 and AH odds
        betting_market = self.get_odds()
        # chosen line (AH line)
        ah_line = self._lines[index]

        # observation
        observation = Observation(
            game_id,
            lineups,
            lineups_ids,
            teams_names,
            teams_ids,
            betting_market,
            ah_line,
            self.observation_space.shape,
        )
        observation = observation()
        return observation

    def get_reward(self, bet: np.array, odds: np.array, results: np.array):
        """Calculates the reward

        Parameters
        ----------
        bet : array of shape (1, n_odds)
        odds: dataframe of shape (1, n_odds)
            A games with its betting odds.
        results : array of shape (1, n_odds)

        Returns
        -------
        reward : float
            The amount of reward returned after previous action
        """
        # agent choice
        bet_index = np.argmax(np.array(bet))
        # bet size
        bet_size_matrix = self.bet_size_matrix
        # balance
        balance = self.balance
        # if the action is a AH bet
        if bet_index in [3, 4]:
            # game goal_difference
            obs_gd = (
                self._gd[self.current_step]
                if bet_index == 3
                else -self._gd[self.current_step]
            )
            # ah line
            ah_line = float(
                self._lines[self.current_step]
                if bet_index == 3
                else -self._lines[self.current_step]
            )
            # ah side odds
            ah_odds = (
                odds[:, 3:4][0].item() if bet_index == 3 else odds[:, 4:][0].item()
            )
            # calculate profit
            profit = AsianHandicap.pnl(obs_gd, ah_line, ah_odds)
            profit = (
                0 if profit is None else numexpr.evaluate("sum(bet * balance * profit)")
            )
        else:  # case 1X2
            reward = numexpr.evaluate("sum(bet * balance * results * odds)")
            expense = numexpr.evaluate("sum(bet * balance)")
            profit = reward - expense

        return profit

    def reset(self):
        """Resets the state of the environment and returns an initial observation.

        Returns
        -------
        observation : dataframe
            the initial observation.
        """
        self.balance = self.starting_bank
        self.current_step = 0
        return self.get_observation()

    def render(self, mode: str = "human"):
        """Outputs the current balance and the current step.

        Returns
        -------
        msg : str
            A string with the current balance,
            the current step and the current game info.
        """
        index = self._get_current_index()
        teams = self._teams_names.iloc[index]
        game_id = self._game_ids[index]
        teams = teams.itertuples() if isinstance(teams, pd.DataFrame) else [teams]
        teams_str = ", ".join(
            [
                "Home Team: {} VS Away Team: {}".format(
                    row.homeTeamName, row.awayTeamName
                )
                for row in teams
            ]
        )

        print("Current balance at step {}: {}".format(self.current_step, self.balance))
        print("Current game id : {}".format(game_id))
        print(teams_str)

    def finish(self):
        """Checks if the episode has reached an end.

        The episode has reached an end if there are no more games to bet.

        Returns
        -------
        finish : bool
            True if the current_step is equal to n_games, False otherwise
        """
        return self.current_step == self._odds.shape[0]  # no more games left to bet

    def get_results(self):
        """Returns the results matrix for the current step.

        Returns
        -------
        result : array of shape (1, n_odds)
            The result matrix, where the index of the outcome that happened
            value is 1 and the rest of the indexes values are 0.
        """
        result = np.zeros(shape=(1, self._odds.shape[1]))
        result[
            np.arange(result.shape[0], dtype=np.int32),
            np.array([self._results[self.current_step]], dtype=np.int32),
        ] = 1

        return result

    def legal_bet(self, bet: np.array):
        """Checks if the bet is legal.

        Checks that the bet does not exceed the current balance.

        Parameters
        ----------
        bet : array of shape (1, n_odds)
            The bet to check.

        Returns
        -------
        legal : bool
            True if the bet is legal, False otherwise.
        """
        bet_size = sum([b * self.balance for b in bet])
        return bet_size <= self.balance

    def create_info(self, action: int):
        """Creates the info dictionary for the given action.

        The info dictionary holds the following information:
            * the current step
            * game odds of the current step
            * bet action of the current step
            * bet size of the current step
            * the balance at the start of the current step
            * reward of the current step
            * game result of the current step
            * state of the current step
        Parameters
        ----------
        action : int
            An action provided by the agent.

        Returns
        -------
        info : dict
            The info dictionary.
        """
        return {
            "current_step": self.current_step,
            "odds": self.get_odds(),
            "bet_action": env.ACTIONS_LIST[action],
            "balance": self.balance,
            "reward": 0,
            "legal_bet": False,
            "results": None,
            "done": False,
        }

# Load Data

We provide,here, a simple function to load games data hosted in our MongoDb Cluster. You can find bellow the "api-key", the database and the collection name.

In [None]:
# |export

def load_data_from_db(url, database, collection, data_source, projection, api_key):
    """Return Fixtures from Our MongoDb Cluster"""
    # payload
    payload = json.dumps(
        {
            "collection": collection,
            "database": database,
            "dataSource": data_source,
            "projection": projection,
        }
    )
    # headers
    headers = {
        "Content-Type": "application/json",
        "Access-Control-Request-Headers": "*",
        "api-key": api_key,
    }

    # Response
    response = requests.request("POST", url, headers=headers, data=payload)

    return pd.DataFrame(response.json()["documents"])

In [None]:
# |export
url = "https://eu-west-2.aws.data.mongodb-api.com/app/data-nsnqb/endpoint/data/v1/action/find"
collection = "Fixtures"
database = "Football"
data_source = "Cluster0"
projection = {"_id": 0}
api_key = "LaKWy4iHSePqEgJHXwFq92i2lgs6rqNiznWEgKyeJpyIS5OC1l1FzxCun92yuOLu"
fixtures = load_data_from_db(
    url=url,
    database=database,
    collection=collection,
    data_source= data_source,
    projection=projection,
    api_key=api_key
)

In [None]:
# | include: false
fixtures.head()

Unnamed: 0,gameId,optaGameId,gameDate,homeTeamId,homeTeamOptaId,homeTeamName,awayTeamId,awayTeamOptaId,awayTeamName,homeTeamLineup,...,awayTeamLineup,awayTeamLineupIds,preGameOdds1,preGameOddsX,preGameOdds2,preGameAhHome,preGameAhAway,lineId,result,postGameGd
0,0655e244d8d596b5572e86426e2a7ca6178044efa59437...,991013,2018-08-25T14:00:00Z,9ee012a80cade2df55b71580bf5e238bcd6be6f696fdc1...,45,Norwich City,38ca605bcd29a5a37697ca66e533ae817ced71b6bf275c...,2,Leeds United,"{""Ivo Pinto"": ""DR"", ""Timm Klose"": ""DCL"", ""Loui...",...,"{""Mateusz Klich"": ""MCR"", ""Luke Ayling"": ""DR"", ...","[72222, 66588, 98760, 155405, 220037, 61810, 8...",2.62,3.6,2.77,1.92,2.02,0.0,2,-3
1,019c223b4a03917c2f1685beab4d5d278f7bff3913f239...,991018,2018-08-25T14:00:00Z,eb89c068ca204a72408360450847a990c97c5b5ff0ec9f...,110,Stoke City,bbb63e4ea54b0d60b48a1f8440254d7e656dfbfcbef825...,88,Hull City,"{""Bruno Martins Indi"": ""DCL"", ""Peter Crouch"": ...",...,"{""Eric Lichaj"": ""DR"", ""Jordy De Wijs"": ""DCL"", ...","[45139, 173549, 15144, 82771, 240499, 28541, 5...",1.917,3.48,4.62,1.88,2.04,-0.5,0,2
2,0f9ad12eec9f24277ab491f5f26f610eaa918903a34147...,991014,2018-08-25T16:30:00Z,04c71986b6503ba5b09a7098ceb79954d20049f21ba45b...,17,Nottingham Forest,95d3bddc19a15d34a7876dcffc1a3e9bc63d809b69308a...,41,Birmingham City,"{""Matty Cash"": ""AMR"", ""Costel Pantilimon"": ""GK...",...,"{""Kristian Pedersen"": ""DL"", ""Maxime Colin"": ""D...","[229009, 86132, 77800, 114054, 69842, 85365, 8...",2.04,3.41,4.12,2.02,1.9,-0.5,1,0
3,0af3418e3c2c42119592875b44a1b454faca47a6758765...,1001261,2018-08-31T20:15:00Z,39ee7c99b58235a4a19c4ee4d15d2730309d812c0c55d0...,371,Vitoria de Guimaraes,c42d39676623aaa1608de4208d738718ff45072232783f...,6685,Tondela,"{""Douglas"": ""GK"", ""Falaye Sacko"": ""DR"", ""Flore...",...,"{""Claudio Ramos"": ""GK"", ""David Bruno"": ""DR"", ""...","[79404, 209129, 104294, 75928, 222751, 16738, ...",1.85,3.3,4.35,1.96,1.96,-0.5,0,1
4,08eafc6acd2e4d985a50ac118ad9fd8b4313bd7f9e3035...,991031,2018-09-01T14:00:00Z,0db353094ccf93e0005cf378ea862b56e77cacc57b7c5e...,111,Wigan Athletic,58301066042bbdf19de8fe7d41afc53626b5aa79034712...,72,Rotherham United,"{""Reece James"": ""DR"", ""Antonee Robinson"": ""DL""...",...,"{""Marek Rodak"": ""GK"", ""Zak Vyner"": ""DR"", ""Sean...","[155529, 193576, 91915, 204863, 112211, 146426...",1.61,3.7,5.4,2.13,1.8,-1.0,0,1


# Agent - Env

Here, we'll set up our betting environment and let the computer program play and make decisions at random.

In [None]:
# | include: false
env = BettingEnv(fixtures.reset_index())
max_steps_limit = fixtures.shape[0]

In [None]:
# | include: false
env.reset()
for _ in range(0, max_steps_limit):
    print(env.render())
    print("\n Info: \n")
    obs, reward, done, info = env.step(env.action_space.sample())
    print(info)
    print("\n Observation: \n")
    try:
        print(obs.observation_pretty_output().to_dict())
    except:
        print(obs)
    print("----------------------------------------------------")
    if done:
        break

Current balance at step 0: 100
Current game id : 991013
Home Team: Norwich City VS Away Team: Leeds United
None

 Info: 

{'current_step': 0, 'odds': array([[2.62, 2.77, 3.6 , 1.92, 2.02]]), 'bet_action': [0.4, 0, 0, 0, 0], 'balance': 100, 'reward': -40.0, 'legal_bet': True, 'results': 2, 'done': False}

 Observation: 

{'gameId': {0: 991018}, 'homeTeam': {0: 'Stoke City'}, 'awayTeam': {0: 'Hull City'}, 'homeLineup': {0: '{"Bruno Martins Indi": "DCL", "Peter Crouch": "FW", "Jack Butland": "GK", "Saido Berahino": "AMC", "Joe Allen": "DMR", "Mame Diouf": "MR", "Erik Pieters": "DL", "James McClean": "ML", "Cuco Martina": "DR", "Ryan Shawcross": "DCR", "Oghenekaro Etebo": "DML"}'}, 'awayLineup': {0: '{"Eric Lichaj": "DR", "Jordy De Wijs": "DCL", "David Marshall": "GK", "Markus Henriksen": "DMR", "Daniel Batty": "DML", "Fraizer Campbell": "FW", "Evandro Goebel": "AMC", "Stephen Kingsley": "DL", "Jarrod Bowen": "MR", "Todd Kane": "ML", "Reece Burke": "DCR"}'}, 'odds1': {0: 1.917}, 'oddsX': {