# Initial Analysis

In [1]:
import requests
import urllib
from bs4 import BeautifulSoup
import json
import os
import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm_notebook
import datetime
import time
import importlib
import config
importlib.reload(config)
from config import username, password, application, dbpw
import logging

import matplotlib.pyplot as plt
import seaborn as sns

import pymysql
import sqlalchemy

import xgboost as xgb
import statsmodels.api as sm

from Levenshtein import distance as levenshtein_distance

In [91]:
import plotly.express as px

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
pd.options.mode.chained_assignment = None
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Get Data

In [14]:
connect_string = 'mysql+pymysql://root:'+dbpw+'@localhost/league_arb'
sql_engine = sqlalchemy.create_engine(connect_string)

In [580]:
league_odds = pd.read_sql("SELECT * FROM league_odds", con=sql_engine)
match_odds = pd.read_sql("SELECT * FROM match_odds", con=sql_engine)
match_outcomes = pd.read_sql("SELECT * FROM match_outcomes", con=sql_engine)

In [566]:
league_odds.shape

(64304, 33)

In [567]:
match_odds.shape

(9873, 32)

In [581]:
match_outcomes.shape

(114, 29)

#### Manipulation

In [562]:
pd.options.display.max_columns = 100

In [582]:
league_odds['spread_abs'] = league_odds['lay_price_1'] - league_odds['back_price_1']
league_odds['spread_pc'] = 100*league_odds['spread_abs']/league_odds['back_price_1']

In [583]:
league_odds.tail(10)

Unnamed: 0,runner_name,selection_id,status,back_price_1,back_size_1,back_price_2,back_size_2,back_price_3,back_size_3,lay_price_1,lay_size_1,lay_price_2,lay_size_2,lay_price_3,lay_size_3,last_price_traded,runner_total_matched,market_id,event_name,event_id,competition_name,competition_id,market_type,market_name,market_total_matched,number_of_winners,number_of_runners,number_of_active_runners,book_total_matched,total_available,datetime_extracted,spread_abs,spread_pc
64294,TSG Hoffenheim,11405869,ACTIVE,10.5,10.01,7.6,20.19,5.4,43.85,980.0,8.0,,,,,260.0,0.0,1.230101997,German Bundesliga,605621,German Bundesliga,59,WINNER,Winner,1410.01,1,18,18,3087.48,281020.41,2024-08-20 07:17:40,969.5,9233.333333
64295,Union Berlin,50347,ACTIVE,8.4,10.87,7.6,17.3,5.4,43.85,990.0,3.99,,,,,210.0,0.0,1.230101997,German Bundesliga,605621,German Bundesliga,59,WINNER,Winner,1410.01,1,18,18,3087.48,281020.41,2024-08-20 07:17:40,981.6,11685.714286
64296,Werder Bremen,6555433,ACTIVE,8.4,10.87,7.6,17.3,5.3,89.44,980.0,4.99,,,,,210.0,0.0,1.230101997,German Bundesliga,605621,German Bundesliga,59,WINNER,Winner,1410.01,1,18,18,3087.48,281020.41,2024-08-20 07:17:40,971.6,11566.666667
64297,Freiburg,44520,ACTIVE,8.4,10.85,7.6,17.3,5.4,43.85,990.0,3.99,,,,,160.0,0.0,1.230101997,German Bundesliga,605621,German Bundesliga,59,WINNER,Winner,1410.01,1,18,18,3087.48,281020.41,2024-08-20 07:17:40,981.6,11685.714286
64298,FC Heidenheim,3237590,ACTIVE,8.4,10.87,7.6,17.3,5.4,43.85,990.0,3.99,,,,,210.0,0.0,1.230101997,German Bundesliga,605621,German Bundesliga,59,WINNER,Winner,1410.01,1,18,18,3087.48,281020.41,2024-08-20 07:17:40,981.6,11685.714286
64299,Mainz,64374,ACTIVE,8.4,10.87,7.6,17.3,5.4,43.85,990.0,3.99,,,,,210.0,0.0,1.230101997,German Bundesliga,605621,German Bundesliga,59,WINNER,Winner,1410.01,1,18,18,3087.48,281020.41,2024-08-20 07:17:40,981.6,11685.714286
64300,St Pauli,44526,ACTIVE,8.4,10.38,7.6,17.3,5.4,43.85,990.0,3.99,,,,,310.0,0.0,1.230101997,German Bundesliga,605621,German Bundesliga,59,WINNER,Winner,1410.01,1,18,18,3087.48,281020.41,2024-08-20 07:17:40,981.6,11685.714286
64301,FC Augsburg,10708138,ACTIVE,8.4,10.33,7.6,17.3,5.4,43.85,990.0,3.99,,,,,160.0,0.0,1.230101997,German Bundesliga,605621,German Bundesliga,59,WINNER,Winner,1410.01,1,18,18,3087.48,281020.41,2024-08-20 07:17:40,981.6,11685.714286
64302,Holstein Kiel,247830,ACTIVE,8.4,10.38,7.6,17.3,5.4,43.85,990.0,3.99,,,,,310.0,0.0,1.230101997,German Bundesliga,605621,German Bundesliga,59,WINNER,Winner,1410.01,1,18,18,3087.48,281020.41,2024-08-20 07:17:40,981.6,11685.714286
64303,VfL Bochum,347830,ACTIVE,8.4,10.38,7.6,17.3,5.4,43.85,990.0,3.99,,,,,310.0,0.0,1.230101997,German Bundesliga,605621,German Bundesliga,59,WINNER,Winner,1410.01,1,18,18,3087.48,281020.41,2024-08-20 07:17:40,981.6,11685.714286


In [584]:
league_odds.sort_values('runner_name')['runner_name'].unique()

array(['AC Milan', 'AC Milan ', 'AFC Wimbledon', 'AS Roma', 'Aberdeen',
       'Accrington', 'Alaves', 'Arsenal', 'Aston Villa', 'Atalanta',
       'Athletic Bilbao', 'Atletico Madrid', 'Barcelona', 'Barnsley',
       'Barrow', 'Bayer Leverkusen', 'Bayern Munich', 'Betis',
       'Birmingham', 'Blackburn', 'Blackpool', 'Bologna', 'Bolton',
       'Borussia Dortmund', 'Borussia Monchengladbach', 'Bournemouth',
       'Bradford', 'Brentford', 'Brighton', 'Bristol City',
       'Bristol Rovers', 'Bromley', 'Burnley', 'Burton Albion',
       'Cagliari', 'Cambridge', 'Cardiff', 'Carlisle', 'Cd Leganes',
       'Celta Vigo', 'Celtic', 'Charlton', 'Chelsea', 'Cheltenham',
       'Chesterfield', 'Colchester', 'Como', 'Coventry', 'Crawley Town',
       'Crewe', 'Crystal Palace', 'Derby', 'Doncaster', 'Dundee',
       'Dundee United', 'Eintracht Frankfurt', 'Empoli', 'Espanyol',
       'Everton', 'Exeter', 'FC Augsburg', 'FC Heidenheim', 'Fiorentina',
       'Fleetwood', 'Fleetwood Town', 'Freib

# Initial view of odds changes over time

In [585]:
team = 'Rangers'
market_type = 'WINNER'

In [586]:
league_odds.loc[league_odds['runner_name']==team, 'market_type'].unique()

array(['WINNER'], dtype=object)

In [587]:
include = ['WINNER', 'TOP_4_FINISH_FT'] # , 'TOP_2_FINISH'
price = 'back_price_1'
plot_data = league_odds[(league_odds['runner_name']==team) & (league_odds['market_type'].isin(include))].sort_values('datetime_extracted')
px.line(data_frame=plot_data, x='datetime_extracted', y=price, color='market_type')

In [421]:
(2.88/1.88)/(2.84/1.84)

0.9925082409349717

# Match up teams

In [588]:
bf_team_list = list(set(match_odds['runner_name']))
sl_team_list = list(set(match_outcomes['team_a_name']) | set(match_outcomes['team_b_name']))
bf_team_list.sort()
sl_team_list.sort()

In [589]:
teams_map_existing = pd.read_sql("SELECT * FROM team_names_map", con=sql_engine)

In [590]:
teams_map_additions = pd.DataFrame({'bf_team_names': bf_team_list})
teams_map_additions = teams_map_additions[~teams_map_additions['bf_team_names'].isin(teams_map_existing['bf_team_names'])]
teams_map_additions['sl_team_names'] = None

In [591]:
len(teams_map_additions)

11

In [592]:
lev_max = 10
for i, b in zip(teams_map_additions.index, teams_map_additions['bf_team_names']):
    lev_diffs = [levenshtein_distance(b, s.split(' ')[0]) for s in sl_team_list]
    min_diff = min(lev_diffs)
    if min_diff <= lev_max:
        closest_team = [t for t, d in zip(sl_team_list, lev_diffs) if d == min_diff][0]
        teams_map_additions.at[i, 'sl_team_names'] = closest_team

In [593]:
pd.options.display.max_rows = 100
teams_map_additions

Unnamed: 0,bf_team_names,sl_team_names
8,Atalanta,Atalanta
10,Atletico Madrid,Atlético de Madrid
40,Como,Como
50,Espanyol,Espanyol
68,Juventus,Juventus
72,Lecce,Lecce
75,Leicester,Leicester City
129,Tottenham,Tottenham Hotspur
133,Valladolid,Mallorca
135,Verona,Girona


In [596]:
# manual edits
delete_list = []
teams_map_additions.at[133, 'sl_team_names'] = 'Real Valladolid'
teams_map_additions.at[135, 'sl_team_names'] = 'Hellas Verona'
teams_map_additions = teams_map_additions[~teams_map_additions.index.isin(delete_list)]

In [595]:
[s for s in sl_team_list if s not in list(teams_map_existing['sl_team_names'])]

['Atalanta',
 'Atlético de Madrid',
 'Como',
 'Espanyol',
 'Hellas Verona',
 'Juventus',
 'Lecce',
 'Leicester City',
 'Real Valladolid',
 'Tottenham Hotspur',
 'Villarreal']

In [597]:
teams_map_additions

Unnamed: 0,bf_team_names,sl_team_names
8,Atalanta,Atalanta
10,Atletico Madrid,Atlético de Madrid
40,Como,Como
50,Espanyol,Espanyol
68,Juventus,Juventus
72,Lecce,Lecce
75,Leicester,Leicester City
129,Tottenham,Tottenham Hotspur
133,Valladolid,Real Valladolid
135,Verona,Hellas Verona


In [598]:
teams_map_additions.to_sql(name='team_names_map', con=sql_engine, schema='league_arb', if_exists='append', index=False)

In [599]:
teams_map = pd.read_sql("SELECT * FROM team_names_map", con=sql_engine)

In [600]:
teams_map

Unnamed: 0,bf_team_names,sl_team_names
0,AFC Wimbledon,AFC Wimbledon
1,Aberdeen,Aberdeen
2,Accrington,Accrington Stanley
3,Barnsley,Barnsley
4,Barrow,Barrow
...,...,...
140,Leicester,Leicester City
141,Tottenham,Tottenham Hotspur
142,Valladolid,Real Valladolid
143,Verona,Hellas Verona


In [601]:
competiton_mapping = {
    'Italian Serie A': 'Italian Serie A',
    'Scottish Premiership': 'Scottish Premiership',
    'English Premier League': 'English Premier League',
    'English Championship': 'Sky Bet Championship',
    'English League 1': 'Sky Bet League One',
    'English League 2': 'Sky Bet League Two',
    'Spanish La Liga': 'Spanish La Liga',
    'German Bundesliga': 'German Bundesliga'
}
comps_map = pd.DataFrame({'bf_comp_names': list(competiton_mapping.keys()), 'sl_comp_names': list(competiton_mapping.values())})

In [602]:
comps_map

Unnamed: 0,bf_comp_names,sl_comp_names
0,Italian Serie A,Italian Serie A
1,Scottish Premiership,Scottish Premiership
2,English Premier League,English Premier League
3,English Championship,Sky Bet Championship
4,English League 1,Sky Bet League One
5,English League 2,Sky Bet League Two
6,Spanish La Liga,Spanish La Liga
7,German Bundesliga,German Bundesliga


# Reshape data into useful formats

In [603]:
match_outcomes = match_outcomes.merge(teams_map, how='left', left_on='team_a_name', right_on='sl_team_names').merge(
    teams_map, how='left', left_on='team_b_name', right_on='sl_team_names', suffixes=('_a', '_b'))

In [604]:
match_outcomes = match_outcomes.merge(comps_map, how='left', left_on='competition_name', right_on='sl_comp_names')

In [605]:
match_outcomes['event_name'] = match_outcomes['bf_team_names_a'] + ' v ' + match_outcomes['bf_team_names_b']

#### Get matches with match and league odds before and after

In [653]:
min_time_before_secs = 0
max_time_before_secs = 24*60*60
min_time_after_secs = 4*60*60  # set to after expected finish time plus period for odds to settle
max_time_after_secs = 24*60*60
max_spread_pc_before = 10
max_spread_pc_after = 10

#### Match odds before match

In [654]:
match_odds['time_to_match_secs'] = (pd.to_datetime(match_odds['market_start_time']) - pd.to_datetime(match_odds['datetime_extracted'], utc=True)).dt.total_seconds()

In [655]:
match_odds_cols = ['market_start_time', 'runner_name', 'selection_id', 'event_name', 'competition_name', 'market_type', 'back_price_1', 'lay_price_1', 'datetime_extracted']
match_odds['spread'] = 100*(match_odds['lay_price_1']/match_odds['back_price_1'] - 1)
matches_with_before_after = match_odds[
    match_odds['time_to_match_secs'].between(min_time_before_secs, max_time_before_secs) & (match_odds['spread'] <= max_spread_pc_before)]

matches_with_before_after = matches_with_before_after.sort_values('time_to_match_secs').groupby(
    ['selection_id', 'market_id', 'event_id', 'market_type']).head(1)[match_odds_cols]

In [656]:
matches_with_before_after['market_start_time'] = pd.to_datetime(matches_with_before_after['market_start_time'])
matches_with_before_after = matches_with_before_after.sort_values('market_start_time')

#### League odds before and after match

In [657]:
league_odds['datetime_extracted'] = pd.to_datetime(league_odds['datetime_extracted'], utc=True)
league_odds = league_odds.sort_values('datetime_extracted')

In [658]:
league_cols = ['selection_id', 'event_name', 'competition_name', 'market_type', 'back_price_1', 'lay_price_1', 'datetime_extracted']
league_odds_before = pd.merge_asof(
    league_odds.drop(columns='event_name'), matches_with_before_after[['selection_id', 'market_start_time', 'event_name']],
    by='selection_id', left_on='datetime_extracted', right_on='market_start_time', direction='forward')

league_odds_before['time_to_match_secs'] = (league_odds_before['market_start_time'] - league_odds_before['datetime_extracted']).dt.total_seconds()
league_odds_before['spread'] = 100*(league_odds_before['lay_price_1']/league_odds_before['back_price_1'] - 1)
league_odds_before = league_odds_before[
    league_odds_before['time_to_match_secs'].between(min_time_before_secs, max_time_before_secs) & (league_odds_before['spread'] <= max_spread_pc_before)]
league_odds_before = league_odds_before.sort_values('time_to_match_secs').groupby(
    ['selection_id', 'market_id', 'event_name', 'market_type']).head(1)[league_cols]

In [659]:
league_odds_after = pd.merge_asof(
    league_odds.drop(columns='event_name'), matches_with_before_after[['selection_id', 'market_start_time', 'event_name']],
    by='selection_id', left_on='datetime_extracted', right_on='market_start_time', direction='backward')

league_odds_after['time_after_match_secs'] = (league_odds_after['datetime_extracted'] - league_odds_after['market_start_time']).dt.total_seconds()
league_odds_after['spread'] = 100*(league_odds_after['lay_price_1']/league_odds_after['back_price_1'] - 1)
league_odds_after = league_odds_after[
    league_odds_after['time_after_match_secs'].between(min_time_after_secs, max_time_after_secs) & (league_odds_after['spread'] <= max_spread_pc_after)]
league_odds_after = league_odds_after.sort_values('time_after_match_secs').groupby(
    ['selection_id', 'market_id', 'event_name', 'market_type']).head(1)[league_cols]

In [660]:
league_odds_after_max = pd.merge_asof(
    league_odds.drop(columns='event_name'), matches_with_before_after[['selection_id', 'market_start_time', 'event_name']],
    by='selection_id', left_on='datetime_extracted', right_on='market_start_time', direction='backward')

league_odds_after_max = league_odds_after_max.sort_values('datetime_extracted').groupby(
    ['selection_id', 'market_id', 'event_name', 'market_type']).tail(1)[league_cols]

In [661]:
league_market = 'WINNER'
matches_with_all_odds = matches_with_before_after.merge(
    league_odds_before.loc[
        league_odds_before['market_type']==league_market, ['selection_id', 'event_name', 'market_type', 'back_price_1', 'lay_price_1', 'datetime_extracted']],
    how='left', on=['selection_id', 'event_name'], suffixes=('', '_league')).merge(
    league_odds_after.loc[
        league_odds_after['market_type']==league_market, ['selection_id', 'event_name', 'back_price_1', 'lay_price_1', 'datetime_extracted']],
    how='left', on=['selection_id', 'event_name'], suffixes=('', '_league_after')).merge(
    league_odds_after_max.loc[
        league_odds_after_max['market_type']==league_market, ['selection_id', 'event_name', 'back_price_1', 'lay_price_1', 'datetime_extracted']],
    how='left', on=['selection_id', 'event_name'], suffixes=('', '_league_after_max'))

#### Add results

In [662]:
matches_with_all_odds['match_day'] = matches_with_all_odds['market_start_time'].dt.date
match_outcomes['match_day'] = pd.to_datetime(match_outcomes['match_date']).dt.date

In [663]:
combined_df = matches_with_all_odds.merge(
    match_outcomes[['match_day', 'event_name', 'outcome', 'winner_name', 'winner_short_name', 'result_type', 'team_a_score', 'team_b_score']],
    how='left', on=['match_day', 'event_name'])

In [664]:
combined_df = combined_df[
    combined_df['back_price_1_league'].notnull() & combined_df['back_price_1_league_after'].notnull()]

In [665]:
combined_df = combined_df.merge(
    teams_map.rename(columns={'sl_team_names': 'winner_name', 'bf_team_names': 'bf_winner_name'}), how='left', on='winner_name')

In [666]:
combined_df['back_price_shift_pc'] = 100*(combined_df['back_price_1_league_after']/combined_df['back_price_1_league'] - 1)
combined_df['selection_outcome'] = 'LOSE'
combined_df.loc[(combined_df['runner_name'] == combined_df['bf_winner_name']), 'selection_outcome'] = 'WIN'
combined_df.loc[(combined_df['outcome'] == 'DRAW'), 'selection_outcome'] = 'DRAW'

In [667]:
combined_df.shape

(30, 29)

In [668]:
px.scatter(
    data_frame=combined_df, x='back_price_1', y='back_price_shift_pc', color='selection_outcome', size='back_price_1_league', hover_data=['runner_name'],
    labels={'back_price_1': 'Match Odds', 'back_price_shift_pc': 'League Odds Shift (%)', 'back_price_1_league': 'League Odds (Pre Match)',
            'selection_outcome': 'Match Result', 'runner_name': 'Team'},
    title='League Odds Shift vs Match Odds')

In [669]:
combined_df['goal_deficit'] = abs(combined_df['team_a_score'] - combined_df['team_b_score'])
lose_mask = combined_df['selection_outcome'] == 'LOSE'
combined_df.loc[lose_mask, 'goal_deficit'] = -combined_df.loc[lose_mask, 'goal_deficit']

In [670]:
px.scatter(
    data_frame=combined_df, x='goal_deficit', y='back_price_shift_pc', color='back_price_1_league', size='back_price_1', hover_data=['runner_name'],
    labels={'goal_deficit': 'Winning Margin', 'back_price_1': 'Match Odds', 'back_price_shift_pc': 'League Odds Shift (%)',
            'back_price_1_league': 'League Odds (Pre Match)', 'runner_name': 'Team'},
    title='League Odds Shift vs Winning Margin')

In [671]:
after_col = '_max'
combined_df['lay_price_1_lay_odds'] = combined_df['lay_price_1']/(combined_df['lay_price_1'] - 1)
combined_df['lay_price_1_league_lay_odds'] = combined_df['lay_price_1_league']/(combined_df['lay_price_1_league'] - 1)
combined_df['back_price_1_league_after_lay_odds'] = combined_df['back_price_1_league_after'+after_col]/(combined_df['back_price_1_league_after'+after_col] - 1)

combined_df['£1_match_back_outcome'] = (combined_df['selection_outcome'] == 'WIN')*combined_df['back_price_1']
combined_df['£1_match_lay_outcome'] = (combined_df['selection_outcome'] != 'WIN')*combined_df['lay_price_1_lay_odds']
combined_df['£1_league_back_outcome'] = combined_df['back_price_1_league']/combined_df['lay_price_1_league_after'+after_col]
combined_df['£1_league_lay_outcome'] = combined_df['lay_price_1_league_lay_odds']/combined_df['back_price_1_league_after_lay_odds']

In [672]:
combined_df[combined_df['lay_price_1_league_after'+after_col] / combined_df['back_price_1_league_after'+after_col] < 1.15].groupby('selection_outcome')[
    ['£1_league_back_outcome', '£1_league_lay_outcome']].mean()

Unnamed: 0_level_0,£1_league_back_outcome,£1_league_lay_outcome
selection_outcome,Unnamed: 1_level_1,Unnamed: 2_level_1
DRAW,0.891722,1.00915
LOSE,0.733519,1.01287
WIN,1.034267,0.960442


In [673]:
px.scatter(
    data_frame=combined_df, x='back_price_1', y='£1_league_lay_outcome', color='selection_outcome', size='back_price_1_league', hover_data=['runner_name'],
    labels={'goal_deficit': 'Winning Margin', 'back_price_1': 'Match Odds', 'back_price_shift_pc': 'League Odds Shift (%)',
            'back_price_1_league': 'League Odds (Pre Match)', 'runner_name': 'Team'},
    title='Match Odds vs League Lay Returns')

In [674]:
px.scatter(
    data_frame=combined_df, x='lay_price_1_lay_odds', y='£1_league_back_outcome', color='selection_outcome', size='back_price_1_league', hover_data=['runner_name'],
    labels={'goal_deficit': 'Winning Margin', 'back_price_1': 'Match Odds', 'back_price_shift_pc': 'League Odds Shift (%)',
            'back_price_1_league': 'League Odds (Pre Match)', 'runner_name': 'Team'},
    title='Match Lay Odds vs League Back Returns')

In [689]:
# Test strategies
combined_df['match_back'] = 1
combined_df['match_lay'] = 1
combined_df['league_back'] = 5
combined_df['league_lay'] = 100

combined_df['match_back_winnings'] = combined_df['£1_match_back_outcome']*combined_df['match_back'] - combined_df['match_back']
combined_df['match_lay_winnings'] = combined_df['£1_match_lay_outcome']*combined_df['match_lay'] - combined_df['match_lay']
combined_df['league_back_winnings'] = combined_df['£1_league_back_outcome']*combined_df['league_back'] - combined_df['league_back']
combined_df['league_lay_winnings'] = combined_df['£1_league_lay_outcome']*combined_df['league_lay'] - combined_df['league_lay']

In [693]:
combined_df[['match_back', 'league_lay', 'match_back_winnings', 'league_lay_winnings']].sum(axis=0)

match_back               30.000000
league_lay             3000.000000
match_back_winnings       8.500000
league_lay_winnings     -57.953479
dtype: float64

In [695]:
50/3030

0.0165016501650165

In [691]:
combined_df[['match_lay', 'league_back', 'match_lay_winnings', 'league_back_winnings']].sum(axis=0)

match_lay                30.000000
league_back             150.000000
match_lay_winnings       -3.752226
league_back_winnings    -18.092320
dtype: float64

In [682]:
combined_df

Unnamed: 0,market_start_time,runner_name,selection_id,event_name,competition_name,market_type,back_price_1,lay_price_1,datetime_extracted,market_type_league,back_price_1_league,lay_price_1_league,datetime_extracted_league,back_price_1_league_after,lay_price_1_league_after,datetime_extracted_league_after,back_price_1_league_after_max,lay_price_1_league_after_max,datetime_extracted_league_after_max,match_day,outcome,winner_name,winner_short_name,result_type,team_a_score,team_b_score,bf_winner_name,back_price_shift_pc,selection_outcome,goal_deficit,lay_price_1_lay_odds,lay_price_1_league_lay_odds,back_price_1_league_after_lay_odds,£1_match_back_outcome,£1_match_lay_outcome,£1_league_back_outcome,£1_league_lay_outcome,match_back,match_lay,league_back,league_lay,match_back_winnings,league_back_winnings,match_lay_winnings,league_lay_winnings
0,2024-08-09 19:00:00+00:00,Sheff Utd,48470,Preston v Sheff Utd,English Championship,MATCH_ODDS,2.38,2.4,2024-08-09 16:54:04,WINNER,17.0,18.0,2024-08-09 16:53:51+00:00,15.5,16.0,2024-08-10 07:59:26+00:00,11.0,13.5,2024-08-17 13:57:18+00:00,2024-08-09,WIN,Sheffield United,Sheffield Utd,NORMALRESULT,0,2,Sheff Utd,-8.823529,WIN,2,1.714286,1.058824,1.1,2.38,0.0,1.259259,0.962567,1,0,0,100,1.38,0.0,0.0,-3.743316
1,2024-08-10 11:30:00+00:00,Leeds,48317,Leeds v Portsmouth,English Championship,MATCH_ODDS,1.27,1.28,2024-08-10 11:09:16,WINNER,3.8,3.85,2024-08-10 11:09:12+00:00,4.1,4.2,2024-08-10 15:40:33+00:00,4.8,4.9,2024-08-17 11:26:04+00:00,2024-08-10,DRAW,,,NORMALRESULT,3,3,,7.894737,DRAW,0,4.571429,1.350877,1.263158,0.0,4.571429,0.77551,1.069444,1,0,0,100,-1.0,0.0,0.0,6.944444
2,2024-08-10 11:30:00+00:00,West Brom,1703,QPR v West Brom,English Championship,MATCH_ODDS,2.92,2.94,2024-08-10 11:09:16,WINNER,23.0,25.0,2024-08-09 16:53:51+00:00,17.5,19.0,2024-08-10 19:41:27+00:00,15.0,1000.0,2024-08-17 11:26:04+00:00,2024-08-10,WIN,West Bromwich Albion,West Brom,NORMALRESULT,1,3,West Brom,-23.913043,WIN,2,1.515464,1.041667,1.071429,2.92,0.0,0.023,0.972222,1,0,0,100,1.92,0.0,0.0,-2.777778
3,2024-08-10 14:00:00+00:00,Rangers,52458346,Rangers v Motherwell,Scottish Premiership,MATCH_ODDS,1.2,1.21,2024-08-10 13:40:06,WINNER,4.3,4.6,2024-08-10 13:40:02+00:00,4.1,4.4,2024-08-10 18:11:07+00:00,5.0,5.4,2024-08-20 07:17:40+00:00,2024-08-10,WIN,Rangers,Rangers,NORMALRESULT,2,1,Rangers,-4.651163,WIN,1,5.761905,1.277778,1.25,1.2,0.0,0.796296,1.022222,1,0,0,100,0.2,0.0,0.0,2.222222
4,2024-08-10 14:00:00+00:00,MK Dons,893582,MK Dons v Bradford,English League 2,MATCH_ODDS,2.24,2.26,2024-08-10 13:40:06,WINNER,6.0,6.6,2024-08-10 10:38:54+00:00,7.6,8.0,2024-08-11 09:40:12+00:00,7.0,7.6,2024-08-17 13:57:18+00:00,2024-08-10,WIN,Bradford City,Bradford,NORMALRESULT,1,2,Bradford,26.666667,LOSE,-1,1.793651,1.178571,1.166667,0.0,1.793651,0.789474,1.010204,1,0,0,100,-1.0,0.0,0.0,1.020408
5,2024-08-10 14:00:00+00:00,Doncaster,49205,Doncaster v Accrington,English League 2,MATCH_ODDS,1.58,1.59,2024-08-10 13:40:06,WINNER,8.8,9.0,2024-08-09 16:53:51+00:00,7.4,7.8,2024-08-10 19:11:22+00:00,6.8,7.6,2024-08-17 13:57:18+00:00,2024-08-10,WIN,Doncaster Rovers,Doncaster,NORMALRESULT,4,1,Doncaster,-15.909091,WIN,3,2.694915,1.125,1.172414,1.58,0.0,1.157895,0.959559,1,0,0,100,0.58,0.0,0.0,-4.044118
6,2024-08-10 16:30:00+00:00,Charlton,48349,Wigan v Charlton,English League 1,MATCH_ODDS,2.98,3.0,2024-08-10 16:10:44,WINNER,17.0,18.0,2024-08-10 16:10:40+00:00,15.0,16.0,2024-08-11 12:10:39+00:00,12.0,200.0,2024-08-17 11:26:04+00:00,2024-08-10,WIN,Charlton Athletic,Charlton,NORMALRESULT,0,1,Charlton,-11.764706,WIN,1,1.5,1.058824,1.090909,2.98,0.0,0.085,0.970588,1,0,0,100,1.98,0.0,0.0,-2.941176
7,2024-08-10 16:30:00+00:00,Birmingham,48759,Birmingham v Reading,English League 1,MATCH_ODDS,1.54,1.55,2024-08-10 16:10:44,WINNER,2.94,3.15,2024-08-10 13:40:02+00:00,3.3,3.4,2024-08-11 08:09:54+00:00,3.1,3.25,2024-08-17 13:57:18+00:00,2024-08-10,DRAW,,,NORMALRESULT,1,1,,12.244898,DRAW,0,2.818182,1.465116,1.47619,0.0,2.818182,0.904615,0.992498,1,0,0,100,-1.0,0.0,0.0,-0.750188
8,2024-08-10 16:30:00+00:00,Rotherham,18565,Exeter v Rotherham,English League 1,MATCH_ODDS,2.56,2.58,2024-08-10 16:10:44,WINNER,11.0,11.5,2024-08-10 16:10:40+00:00,14.0,15.0,2024-08-11 14:10:59+00:00,15.0,17.0,2024-08-17 13:57:18+00:00,2024-08-10,WIN,Exeter City,Exeter,NORMALRESULT,1,0,Exeter,27.272727,LOSE,-1,1.632911,1.095238,1.071429,0.0,1.632911,0.647059,1.022222,1,0,0,100,-1.0,0.0,0.0,2.222222
9,2024-08-10 16:30:00+00:00,Bolton,48322,Leyton Orient v Bolton,English League 1,MATCH_ODDS,2.08,2.1,2024-08-10 16:10:44,WINNER,8.0,8.4,2024-08-10 16:10:40+00:00,7.6,8.2,2024-08-11 08:40:01+00:00,7.2,95.0,2024-08-18 12:56:02+00:00,2024-08-10,WIN,Bolton Wanderers,Bolton,NORMALRESULT,1,2,Bolton,-5.0,WIN,1,1.909091,1.135135,1.16129,2.08,0.0,0.084211,0.977477,1,0,0,100,1.08,0.0,0.0,-2.252252


In [370]:
back_min = 1
back_max = 2
interesting_cols = [
    'market_start_time', 'runner_name', 'event_name', 'competition_name', 'winner_name', 'selection_outcome', 'goal_deficit', 'back_price_1', 'lay_price_1',
    'back_price_1_league', 'lay_price_1_league', 'back_price_1_league_after', 'lay_price_1_league_after', 'back_price_1_league_after_max', 'lay_price_1_league_after_max',
    '£1_match_back_outcome', '£1_match_lay_outcome', '£1_league_back_outcome', '£1_league_lay_outcome']
combined_df.loc[combined_df['back_price_1'].between(back_min, back_max), interesting_cols]

Unnamed: 0,market_start_time,runner_name,event_name,competition_name,winner_name,selection_outcome,goal_deficit,back_price_1,lay_price_1,back_price_1_league,lay_price_1_league,back_price_1_league_after,lay_price_1_league_after,back_price_1_league_after_max,lay_price_1_league_after_max,£1_match_back_outcome,£1_match_lay_outcome,£1_league_back_outcome,£1_league_lay_outcome
2,2024-08-10 11:30:00+00:00,Leeds,Leeds v Portsmouth,English Championship,,DRAW,0,1.27,1.28,3.8,3.85,3.9,4.0,3.95,4.0,0.0,4.571429,0.95,1.008883
6,2024-08-10 14:00:00+00:00,Doncaster,Doncaster v Accrington,English League 2,Doncaster Rovers,WIN,3,1.58,1.59,8.8,9.0,7.8,8.6,7.2,7.6,1.58,0.0,1.157895,0.96875
8,2024-08-10 14:00:00+00:00,Rangers,Rangers v Motherwell,Scottish Premiership,Rangers,WIN,1,1.2,1.21,4.3,4.6,4.2,4.3,4.6,4.8,1.2,0.0,0.895833,1.0
11,2024-08-10 16:30:00+00:00,Birmingham,Birmingham v Reading,English League 1,,DRAW,0,1.54,1.55,2.94,3.15,3.3,3.4,2.88,3.05,0.0,2.818182,0.963934,0.956395


In [368]:
combined_df[interesting_cols]

Unnamed: 0,market_start_time,runner_name,event_name,competition_name,winner_name,selection_outcome,goal_deficit,back_price_1,lay_price_1,back_price_1_league,lay_price_1_league,back_price_1_league_after,lay_price_1_league_after,£1_match_back_outcome,£1_match_lay_outcome,£1_league_back_outcome,£1_league_lay_outcome
0,2024-08-09 19:00:00+00:00,Sheff Utd,Preston v Sheff Utd,English Championship,Sheffield United,WIN,2,2.38,2.4,17.0,18.0,15.5,16.0,2.38,0.0,1.096774,0.977376
1,2024-08-10 11:30:00+00:00,Norwich,Oxford Utd v Norwich,English Championship,Oxford United,LOSE,-2,2.42,2.44,19.5,21.0,21.0,970.0,0.0,1.694444,0.325,1.017188
2,2024-08-10 11:30:00+00:00,Leeds,Leeds v Portsmouth,English Championship,,DRAW,0,1.27,1.28,3.8,3.85,3.9,4.0,0.0,4.571429,0.95,1.008883
3,2024-08-10 11:30:00+00:00,West Brom,QPR v West Brom,English Championship,West Bromwich Albion,WIN,2,2.92,2.94,23.0,25.0,18.0,23.0,2.92,0.0,0.884615,0.989583
4,2024-08-10 11:30:00+00:00,Coventry,Stoke v Coventry,English Championship,Stoke City,LOSE,-1,2.94,2.96,14.0,14.5,17.0,23.0,0.0,1.510204,0.736842,1.014403
5,2024-08-10 14:00:00+00:00,MK Dons,MK Dons v Bradford,English League 2,Bradford City,LOSE,-1,2.24,2.26,6.0,6.6,7.2,8.0,0.0,1.793651,0.810811,1.014881
6,2024-08-10 14:00:00+00:00,Doncaster,Doncaster v Accrington,English League 2,Doncaster Rovers,WIN,3,1.58,1.59,8.8,9.0,7.8,8.6,1.58,0.0,1.157895,0.96875
7,2024-08-10 14:00:00+00:00,Carlisle,Gillingham v Carlisle,English League 2,Gillingham,LOSE,-3,4.0,4.1,16.5,17.5,19.0,22.0,0.0,1.322581,0.0165,1.007576
8,2024-08-10 14:00:00+00:00,Rangers,Rangers v Motherwell,Scottish Premiership,Rangers,WIN,1,1.2,1.21,4.3,4.6,4.2,4.3,1.2,0.0,0.895833,1.0
9,2024-08-10 16:30:00+00:00,Rotherham,Exeter v Rotherham,English League 1,Exeter City,LOSE,-1,2.56,2.58,11.0,11.5,11.5,15.0,0.0,1.632911,0.628571,1.017007


# Lay odds change outcome example

In [343]:
start_lay = 15
start_lay_as_back = start_lay/(start_lay - 1)
end_back = 7.5
end_back_as_lay = end_back/(end_back - 1)
lay = 1
liab = start_lay - 1
need_to_pay = start_lay/end_back
lay_change = start_lay_as_back/end_back_as_lay

In [344]:
need_to_pay

2.0

In [345]:
lay_change

0.9285714285714286

In [346]:
lay_change*liab

13.0