# OpenAI Gym Env

> Create a custom GYM environment to simulate trading strategy.

In [None]:
# | default_exp betting_env

# Import Librairies

In [None]:
# | export

import pandas as pd
import gym
import numpy as np
import numexpr
import json
import requests
import os
import sys
from betting_env.asian_handicap_pnl import *
from infi.traceback import pretty_traceback_and_exit_decorator

# Observations Output

We provide here a simple class that stores our RL observations. The first format is a numerical numpy array that holds numerical game information and the second format is a user-friendly output to show game information in each observation.

In [None]:
# | export


class Observation:
    def __init__(
        self,
        game_id: int,  # Game Id.
        lineups: np.ndarray,  # Lineups.
        lineups_ids: np.ndarray,  # Lineups opta Ids.
        teams_names: pd.core.series.Series,  # Team names.
        teams_ids: np.ndarray,  # Teams opta Ids.
        betting_market: np.ndarray,  # Odds.
        ah_line: float,  # Asian handicap line.
        observation_shape: tuple,  # Observation shape.
    ):
        self.game_id = game_id
        self.lineups = lineups
        self.lineups_ids = lineups_ids
        self.teams_names = teams_names
        self.teams_ids = teams_ids
        self.betting_market = betting_market
        self.ah_line = ah_line
        self.shape = observation_shape

    def __call__(self) -> "Observation":
        """numerical output"""
        self.numerical_observation = np.concatenate(
            (
                np.array([self.game_id]).reshape(1, -1),  # Opta gameId.
                np.array([self.teams_ids]),  # Teams Opta Ids.
                np.array([self.lineups_ids[0]]),  # Home lineup (players opta Id).
                np.array([self.lineups_ids[1]]),  # Away lineup (players opta Id).
                self.betting_market,  # Odds (1x2 and AH).
            ),
            axis=1,
        ).reshape(self.shape)

        self.dtype = self.numerical_observation.dtype
        return self

    def reshape(
        self,
        new_shape: tuple,  # new shape
    ) -> "Observation":
        """reshape observation"""
        self.numerical_observation = self.numerical_observation.reshape(new_shape)
        return self

    def astype(
        self,
        data_type: str,  # data type
    ) -> "Observation":
        """cast observation type"""
        self.numerical_observation = self.numerical_observation.astype(data_type)
        return self

    def observation_pretty_output(self) -> pd.DataFrame:
        """User-friendly output"""
        self.observation = {
            "gameId": [self.game_id],
            "homeTeam": [self.teams_names[0]],
            "awayTeam": [self.teams_names[1]],
            "homeLineup": self.lineups[0],
            "awayLineup": self.lineups[1],
            "odds1": self.betting_market[:, 0:3][0][0],
            "oddsX": self.betting_market[:, 0:3][0][1],
            "odds2": self.betting_market[:, 0:3][0][2],
            "ahLine": [self.ah_line],
            "oddsAhHome": self.betting_market[:, 3:][0][0],
            "oddsAhAway": self.betting_market[:, 3:][0][1],
        }

        return pd.DataFrame(self.observation, index=[0])

# Betting Environment

Reinforcement Learning is a branch of machine learning (ML) that focuses on the complex and all-encompassing issue of training a system to behave appropriately in a given situation. Only the value of the reward and observations made about the environment are used to drive learning. The generality of this model allows it to be used in a wide range of real-world contexts, from gaming to the improvement of sophisticated industrial procedures.

In this perspective, the environment and the agent are two crucial elements of RL. The environment is the Agent's world where it exists and the Agent can engage in interactions with this environment by taking certain actions which cannot change the environment's laws or dynamics.

The goal of this work is to develop a Deep Reinforcement Learning environment that simulates a betting strategy. The theory underlying this environment is quite straightforward: placing a bet entails selecting a potential outcome, deciding on a stake size, multiplying it by the winning odds, and then deducting the initial wager and any losses.

Here, the agent can choose a discrete action space with the following options for actions: 
- choose a small, medium, or big wager size. And,
- Wagering on the home, draw, or away (1X2 lines), or on the home or away Asian line.

It should be noted that the agent can only choose one action from the 15 preceding suggestions.



In addition, our RL betting environment is a subclass of an OpenAI Gym environment, with an observation space equal to (gameId, home team lineup, away team lineup, betting line(1X2, Asian handicap) and selected odds) and an action space equal to the options available to the agent (the wager size and the chosen outcome). 

A simple action in the environment consists of getting the current observation and placing a bet. The reward (the investment return), which can be positive or negative, is then calculated and deducting the total amount of the wager.

The line that the agent should select will determine the determined amount. In other words, if we bet 1X2 on the line, we can say that the profit can be expressed as follows:

    - profit = (bet * invested_amount * results * odds) - (bet * invested_amount)
             = reward - expense

    with : 
    * bet = the chosen outcome or side (Home win, Draw, Away win)
    * invested_amount = bet size
    * results = postgame outcome
    * odds = 1X2 odds


If the agent selects the Asian handicap, the profit will depend on the outcome of the game's goal-difference and the chosen line (Half Integer Line, Integer Line, Quarter Integer Line). The agent can win the full bet or just the half of it, lose the full bet or just the half of it, or return its stake.

For instance, the profit could be expressed as follows if the agent had won the wager:

    - profit = invested_amount * (odds_ah -1)

If the agent wins the half of the bet :

    - profit = invested_amount * ((odds_ah - 1) * 0.5))

    with : 
    * invested_amount = bet size
    * odds_ah = Asian Handicap odds


When there are no more games to play or the user's bank balance is exhausted, an episode will be concluded. 


It should be mentioned that we have decided to divide the betting size values into three categories: small, at 5%; medium, at 20%; and large, at 70% of the entire amount of money we have.

In [None]:
# | export


class BettingEnv(gym.Env):
    """OpenAI Gym class for football betting environments."""

    metadata = {"render_modes": ["human"]}
    # Bet size(small, medium, large).
    SMALL_BET, MEDIUM_BET, LARGE_BET = 0.05, 0.2, 0.7
    # Actions.
    ACTIONS_LIST = [
        [0, 0, 0, 0, 0],  # No bets.
        [SMALL_BET, 0, 0, 0, 0],  # Betting on home team (1x2).
        [MEDIUM_BET, 0, 0, 0, 0],  # Betting on home team (1x2).
        [LARGE_BET, 0, 0, 0, 0],  # Betting on home team (1x2).
        [0, 0, SMALL_BET, 0, 0],  # Betting on away team (1x2).
        [0, 0, MEDIUM_BET, 0, 0],  # Betting on away team (1x2).
        [0, 0, LARGE_BET, 0, 0],  # Betting on away team (1x2).
        [0, SMALL_BET, 0, 0, 0],  # Betting on draw (1x2).
        [0, MEDIUM_BET, 0, 0, 0],  # Betting on draw (1x2).
        [0, LARGE_BET, 0, 0, 0],  # Betting on draw (1x2).
        [0, 0, 0, SMALL_BET, 0],  # Betting on home (Asian Handicap).
        [0, 0, 0, MEDIUM_BET, 0],  # Betting on home (Asian Handicap).
        [0, 0, 0, LARGE_BET, 0],  # Betting on home (Asian Handicap).
        [0, 0, 0, 0, SMALL_BET],  # Betting on away (Asian Handicap).
        [0, 0, 0, 0, MEDIUM_BET],  # Betting on away (Asian Handicap).
        [0, 0, 0, 0, LARGE_BET],  # Betting on away (Asian Handicap).
    ]

    def __init__(
        self,
        game_odds: pd.DataFrame,  # Games with their betting odds.
        odds_column_names: list = [
            "preGameOdds1",
            "preGameOdds2",
            "preGameOddsX",
            "preGameAhHome",
            "preGameAhAway",
        ],  # Betting odds column names.
        starting_bank: float = 100.0,  # Starting bank account.
    ) -> None:
        "Initializes a new environment."

        super().__init__()
        # Games df.
        self._game = game_odds.copy()
        # Sort data by date.
        if "gameDate" in self._game.columns:
            self._game["gameDate"] = pd.to_datetime(self._game["gameDate"])
            self._game = self._game.sort_values(by="gameDate")
        # Odds (1X2 and Asian handicap) values.
        self._odds = self._game[odds_column_names].values
        # Results.
        self._results = self._game["result"].values
        # Ah lines.
        self._lines = self._game["lineId"].values
        # Game goal-difference.
        self._gd = self._game["postGameGd"].values
        # Teams names.
        self._teams_names = self._game[["homeTeamName", "awayTeamName"]]
        # Teams opta id.
        self._teams_ids = self._game[["homeTeamOptaId", "awayTeamOptaId"]].values
        # Teams lineups (names and positions).
        self._lineups = self._game[["homeTeamLineup", "awayTeamLineup"]].values
        # Teams lineups (opta ids).
        self._lineups_ids = self._game[
            ["homeTeamLineupIds", "awayTeamLineupIds"]
        ].values
        # Games ids.
        self._game_ids = self._game["optaGameId"].values
        # Observation space.
        self.observation_space = gym.spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(
                self._odds.shape[1] + 25,
            ),  # 25 = 22(players Ids) + 2(home and away team ids) + 1(gameId).
            dtype=np.float64,
        )
        # Actions space.
        self.action_space = gym.spaces.Discrete(
            len(BettingEnv.ACTIONS_LIST)
        )  # Betting action
        # Env balance.
        self.balance = self.starting_bank = starting_bank
        # Current step (game).
        self.current_step = 0
        # Bet size for each outcome.
        self.bet_size_matrix = None

    def _get_current_index(self) -> None:
        "Returns the current index of the current game."
        return self.current_step % self._odds.shape[0]

    def get_odds(self) -> np.ndarray:
        "Returns odds for the current step"
        return pd.DataFrame([self._odds[self.current_step]]).values

    def get_bet(
        self,
        action: int,  # The chosen action by the agent.
    ) -> list:
        "Returns the betting matrix for the provided action."
        return BettingEnv.ACTIONS_LIST[action]

    @pretty_traceback_and_exit_decorator
    def step(
        self,
        action: int,  # The chosen action by the agent.
    ) -> tuple:
        "Run one timestep of the environment's dynamics. It accepts an action and returns a tuple (observation, reward, done, info)"
        # Init observation.
        observation = np.ones(shape=self.observation_space.shape)
        # Reward.
        reward = 0
        # finish
        done = False
        # Episode info.
        info = self.create_info(action)

        # If no more money.
        if self.balance < 1:
            done = True
        else:
            # Bet action.
            bet = self.get_bet(action)
            # Game result.
            results = self.get_results()
            # Making sure agent has enough money for the bet.
            if self.legal_bet(bet):
                # Current odds.
                odds = self.get_odds()
                # Reward (positive or negative).
                reward = self.get_reward(bet, odds, results)
                # Update balance.
                self.balance += reward
                info.update(legal_bet=True)
            else:
                reward = -(bet * self.bet_size_matrix).sum()
            # Update info.
            info.update(results=results.argmax())
            info.update(reward=reward)
            # Increment step.
            self.current_step += 1
            # Check if we are finished.
            if self.finish():
                done = True
            else:
                observation = self.get_observation()

        # Update flag.
        info.update(done=done)
        # Return.
        return observation, reward, done, info

    def get_observation(self) -> "Observation":
        "Returns the observation of the current step."
        # Current game index.
        index = self._get_current_index()
        # Current game id.
        game_id = self._game_ids[index]
        # Current game lineups.
        lineups = self._lineups[index]
        lineups_ids = self._lineups_ids[index]
        # Teams.
        teams_names = self._teams_names.iloc[index]
        teams_ids = self._teams_ids[index]
        # 1X2 and AH odds.
        betting_market = self.get_odds()
        # Chosen line (AH line).
        ah_line = self._lines[index]

        # Observation.
        observation = Observation(
            game_id,
            lineups,
            lineups_ids,
            teams_names,
            teams_ids,
            betting_market,
            ah_line,
            self.observation_space.shape,
        )
        observation = observation()
        return observation

    def get_reward(
        self,
        bet: list,  # The betting matrix for the provided action.
        odds: np.ndarray,  # Odds for the current game.
        results: np.ndarray,  # Game result (real outcome).
    ) -> float:
        "Calculates the reward (the profit)."
        # Agent choice.
        bet_index = np.argmax(np.array(bet))
        # Bet size.
        bet_size_matrix = self.bet_size_matrix
        # Balance.
        balance = self.balance
        # If the action is a AH bet.
        if bet_index in [3, 4]:
            # Game goal_difference.
            obs_gd = (
                self._gd[self.current_step]
                if bet_index == 3
                else -self._gd[self.current_step]
            )
            # Ah line.
            ah_line = float(
                self._lines[self.current_step]
                if bet_index == 3
                else -self._lines[self.current_step]
            )
            # Ah side odds.
            ah_odds = (
                odds[:, 3:4][0].item() if bet_index == 3 else odds[:, 4:][0].item()
            )
            # Calculate profit.
            profit = AsianHandicap.pnl(obs_gd, ah_line, ah_odds)
            profit = (
                0 if profit is None else numexpr.evaluate("sum(bet * balance * profit)")
            )
        else:  # Case 1X2.
            reward = numexpr.evaluate("sum(bet * balance * results * odds)")
            expense = numexpr.evaluate("sum(bet * balance)")
            profit = reward - expense

        return profit

    def reset(self) -> "Observation":
        "Resets the state of the environment and returns an initial observation."
        # Reset balance to initial starting bank.
        self.balance = self.starting_bank
        # Reset initial step to 0.
        self.current_step = 0
        # Return the first observation.
        return self.get_observation()

    def render(
        self,
        mode: str = "human",  # Render mode.
    ) -> None:
        "Outputs the current balance and the current step."
        index = self._get_current_index()
        teams = self._teams_names.iloc[index]
        game_id = self._game_ids[index]
        teams = teams.itertuples() if isinstance(teams, pd.DataFrame) else [teams]
        teams_str = ", ".join(
            [
                "Home Team: {} VS Away Team: {}".format(
                    row.homeTeamName, row.awayTeamName
                )
                for row in teams
            ]
        )

        print("Current balance at step {}: {}".format(self.current_step, self.balance))
        print("Current game id : {}".format(game_id))
        print(teams_str)

    def finish(self) -> bool:
        "Checks if the episode has reached an end."
        # If no more games left to bet.
        return self.current_step == self._odds.shape[0]

    def get_results(self) -> np.ndarray:
        "Returns the results matrix for the current step."
        result = np.zeros(shape=(1, self._odds.shape[1]))
        result[
            np.arange(result.shape[0], dtype=np.int32),
            np.array([self._results[self.current_step]], dtype=np.int32),
        ] = 1
        return result

    def legal_bet(
        self,
        bet: list,  # The betting matrix for the provided action.
    ) -> bool:
        "Checks that the bet does not exceed the current balance."
        bet_size = sum([b * self.balance for b in bet])
        return bet_size <= self.balance

    def create_info(
        self,
        action: int,  # The chosen action by the agent.
    ) -> dict:
        "Creates the info dictionary for the given action."
        return {
            "current_step": self.current_step,
            "odds": self.get_odds(),
            "bet_action": env.ACTIONS_LIST[action],
            "balance": self.balance,
            "reward": 0,
            "legal_bet": False,
            "results": None,
            "done": False,
        }

# Load Data

We provide,here, a simple function to load games data hosted in our MongoDb Cluster. You can find bellow the "api-key", the database and the collection name.

In [None]:
# |export


def load_data_from_db(
    url: str,  # Database url.
    database: str,  # Database name.
    collection: str,  # Collection name.
    data_source: str,  # Cluster name.
    projection: dict,  # Query projection.
    api_key: str,  # Data api key.
) -> pd.DataFrame:
    "Return Fixtures from Our MongoDb Cluster"
    # payload
    payload = json.dumps(
        {
            "collection": collection,
            "database": database,
            "dataSource": data_source,
            "projection": projection,
        }
    )
    # headers
    headers = {
        "Content-Type": "application/json",
        "Access-Control-Request-Headers": "*",
        "api-key": api_key,
    }
    # Response
    response = requests.request("POST", url, headers=headers, data=payload)
    return pd.DataFrame(response.json()["documents"])

In [None]:
# |export

url = "https://eu-west-2.aws.data.mongodb-api.com/app/data-nsnqb/endpoint/data/v1/action/find"
collection = "Fixtures"
database = "Football"
data_source = "Cluster0"
projection = {"_id": 0}
api_key = "LaKWy4iHSePqEgJHXwFq92i2lgs6rqNiznWEgKyeJpyIS5OC1l1FzxCun92yuOLu"
fixtures = load_data_from_db(
    url=url,
    database=database,
    collection=collection,
    data_source=data_source,
    projection=projection,
    api_key=api_key,
)

In [None]:
# | include: false
fixtures.head()

Unnamed: 0,gameId,optaGameId,gameDate,homeTeamId,homeTeamOptaId,homeTeamName,awayTeamId,awayTeamOptaId,awayTeamName,homeTeamLineup,...,awayTeamLineup,awayTeamLineupIds,preGameOdds1,preGameOddsX,preGameOdds2,preGameAhHome,preGameAhAway,lineId,result,postGameGd
0,015ca5bcff8ce975a1155588dbe36173b4dc3bb3f1f05e...,1001280,2018-10-05T17:00:00Z,c1ef274c1b3e453a7d3a288cb8ef27c289fdddc6c76f45...,3084,Santa Clara,b7a6cab3b561513605e9372daf0ca0ab1fe41c96bd000f...,3005,Chaves,"{""Marco"": ""GK"", ""Patrick"": ""DR"", ""Mamadu Cande...",...,"{""Ricardo Nunes"": ""GK"", ""Paulinho"": ""DR"", ""Dja...","[47396, 109323, 168600, 436806, 175642, 182816...",2.42,3.44,3.12,2.07,1.85,-0.25,0,1
1,019c223b4a03917c2f1685beab4d5d278f7bff3913f239...,991018,2018-08-25T14:00:00Z,eb89c068ca204a72408360450847a990c97c5b5ff0ec9f...,110,Stoke City,bbb63e4ea54b0d60b48a1f8440254d7e656dfbfcbef825...,88,Hull City,"{""Bruno Martins Indi"": ""DCL"", ""Peter Crouch"": ...",...,"{""Eric Lichaj"": ""DR"", ""Jordy De Wijs"": ""DCL"", ...","[45139, 173549, 15144, 82771, 240499, 28541, 5...",1.917,3.48,4.62,1.88,2.04,-0.5,0,2
2,10b7a54ed948b199f0e0daa4f6b11227b8a8abf8d8c81e...,985256,2018-10-20T18:00:00Z,6e535e19e2ee3f50ca59e9e866f03eb5411afc29f5bcbe...,1423,Reims,762c4b771e8d00a890067c0c1712c11ce55aa7af617ac0...,2128,Angers,"{""Edouard Mendy"": ""GK"", ""Thomas Foket"": ""DR"", ...",...,"{""Ludovic Butelle"": ""GK"", ""Vincent Manceau"": ""...","[11382, 54518, 57266, 168985, 42416, 45505, 16...",2.34,3.09,3.65,1.97,1.95,-0.25,1,0
3,0e261512ac49f4bf5f3da6eea3ccfdddab0e8a14d75ddc...,987654,2018-09-30T15:00:00Z,86def759261b314ac252c171be125990b091b349a75758...,97,Cardiff City,5e507f48181e32615983eec33f232354bbafc6b535b12f...,90,Burnley,"{""Neil Etheridge"": ""GK"", ""Bruno Ecuele Manga"":...",...,"{""Joe Hart"": ""GK"", ""Matthew Lowton"": ""DR"", ""Ch...","[15749, 68983, 103914, 40145, 17761, 51927, 60...",2.5,3.16,3.32,1.85,2.05,0.0,2,-1
4,03d824210ebdd1b35c34c8e623097566c70ed390e1fbd4...,987653,2018-09-29T14:00:00Z,9e78bbc137fd00c66162080bc9e987e67297643dc50616...,3,Arsenal,5188aa460d039c445603ab1f0743dee14e13b7db3082c0...,57,Watford,"{""Petr Cech"": ""GK"", ""Hector Bellerin"": ""DR"", ""...",...,"{""Ben Foster"": ""GK"", ""Marc Navarro"": ""DR"", ""Jo...","[9089, 220166, 40868, 121599, 41338, 85624, 10...",1.5,4.5,6.2,2.05,1.9,-1.25,0,2


# Agent - Env

Here, we'll set up our betting environment and let the computer program play and make decisions at random.

In [None]:
# | include: false
env = BettingEnv(fixtures.reset_index())
max_steps_limit = fixtures.shape[0]

In [None]:
# | include: false
env.reset()
for _ in range(0, max_steps_limit):
    print(env.render())
    print("\n Info: \n")
    obs, reward, done, info = env.step(env.action_space.sample())
    print(info)
    print("\n Observation: \n")
    try:
        print(obs.observation_pretty_output().to_dict())
    except:
        print(obs)
    print("----------------------------------------------------")
    if done:
        break

Current balance at step 0: 100.0
Current game id : 991013
Home Team: Norwich City VS Away Team: Leeds United
None

 Info: 

{'current_step': 0, 'odds': array([[2.62, 2.77, 3.6 , 1.92, 2.02]]), 'bet_action': [0, 0, 0.7, 0, 0], 'balance': 100.0, 'reward': 182.0, 'legal_bet': True, 'results': 2, 'done': False}

 Observation: 

{'gameId': {0: 991018}, 'homeTeam': {0: 'Stoke City'}, 'awayTeam': {0: 'Hull City'}, 'homeLineup': {0: '{"Bruno Martins Indi": "DCL", "Peter Crouch": "FW", "Jack Butland": "GK", "Saido Berahino": "AMC", "Joe Allen": "DMR", "Mame Diouf": "MR", "Erik Pieters": "DL", "James McClean": "ML", "Cuco Martina": "DR", "Ryan Shawcross": "DCR", "Oghenekaro Etebo": "DML"}'}, 'awayLineup': {0: '{"Eric Lichaj": "DR", "Jordy De Wijs": "DCL", "David Marshall": "GK", "Markus Henriksen": "DMR", "Daniel Batty": "DML", "Fraizer Campbell": "FW", "Evandro Goebel": "AMC", "Stephen Kingsley": "DL", "Jarrod Bowen": "MR", "Todd Kane": "ML", "Reece Burke": "DCR"}'}, 'odds1': {0: 1.917}, 'oddsX