# Scope Odds Data, & Merge Game, Image, Odds Data

In [1]:
import pathlib
import sys
import pickle

import numpy as np
import pandas as pd

# Load the "autoreload" extension
%load_ext autoreload
# always reload modules marked with "%aimport"
%autoreload 1
# add the 'src' directory to path to import modules
src_dir = pathlib.Path().cwd().resolve().parent / 'src'
#src_dir = os.path.join(os.getcwd(), os.pardir, 'src')
sys.path.append(src_dir)
# import my class code from the source
# %aimport src-dir.filename

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

PROJECT_DIR = pathlib.Path.cwd().resolve().parent
DATA_DIR = PROJECT_DIR / 'data'
# RAW_DATA_DIR = DATA_DIR / 'raw'
INTERIM_DATA_DIR = DATA_DIR / '02-interim'
REF_DATA_DIR = DATA_DIR / 'reference'

## Scope Odds Data

In [2]:
seasons = [str(year) + '-' + str(year+1) for year in range(2009, 2018)]
seasons

['2009-2010',
 '2010-2011',
 '2011-2012',
 '2012-2013',
 '2013-2014',
 '2014-2015',
 '2015-2016',
 '2016-2017',
 '2017-2018']

In [3]:
country = 'england'
indb_league = 'premier-league'
league = 'english-premier-league'
std_names_fp = REF_DATA_DIR / str(league + '_' + 'std_name_dict.pkl')
indatabet_fn = str('indatabet-free-odds-' + league + '.csv')
football_data_fn = str('merge-1-' + league + '.csv')
save_fn = str('merge-2-' + league + '.csv')

## Indatabet Odds Data

In [4]:
dtypes = {'id_fifa': np.object, 'country': np.object, 'league': np.object,
          'season': np.object, 'h': np.object, 'a': np.object,
          'h_htgoals': np.float64, 'a_htgoals': np.float64,
          'h_ftgoals': np.object, 'a_ftgoals': np.object, 'et_pen_awd': np.object,
          'odds_hwin_pinn': np.float64, 'odds_draw_pinn': np.float64, 'odds_awin_pinn': np.float64,
          'odds_hwin_bet365': np.float64, 'odds_draw_bet365': np.float64, 'odds_awin_bet365': np.float64,
          'odds_ftgoalso2.5_pinn': np.float64, 'odds_ftgoalsu2.5_pinn': np.float64,
          'odds_ftgoalso2.5_bet365': np.float64, 'odds_ftgoalsu2.5_bet365': np.float64}
odds_df = pd.read_csv(INTERIM_DATA_DIR / 'indatabet-free-odds' / 'indatabet-free-odds.csv',
                      parse_dates=['date'], dtype=dtypes, dayfirst=True, index_col=None)
odds_df.head()

Unnamed: 0,date,id_fifa,country,league,season,h,a,h_htgoals,a_htgoals,h_ftgoals,a_ftgoals,et_pen_awd,odds_hwin_pinn,odds_draw_pinn,odds_awin_pinn,odds_hwin_bet365,odds_draw_bet365,odds_awin_bet365,odds_ftgoalso2.5_pinn,odds_ftgoalsu2.5_pinn,odds_ftgoalso2.5_bet365,odds_ftgoalsu2.5_bet365
0,2006-08-16,eur,europe,euro,,belgium,kazakhstan,0.0,0.0,0.0,0.0,,,,,1.1,7.0,15.0,,,,
1,2006-09-02,eur,europe,euro,,spain,liechtenstein,2.0,0.0,4.0,0.0,,,,,1.01,10.0,51.0,,,,
2,2006-09-02,eur,europe,euro,,luxembourg,netherlands,0.0,1.0,0.0,1.0,,,,,51.0,10.0,1.01,,,,
3,2006-09-02,eur,europe,euro,,moldova,greece,0.0,1.0,0.0,1.0,,,,,5.0,3.6,1.57,,,,
4,2006-09-02,eur,europe,euro,,serbia,azerbaijan,0.0,0.0,1.0,0.0,,,,,1.1,7.0,15.0,,,,


In [5]:
odds_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 228764 entries, 0 to 228763
Data columns (total 22 columns):
date                       228764 non-null datetime64[ns]
id_fifa                    228764 non-null object
country                    228764 non-null object
league                     228764 non-null object
season                     174088 non-null object
h                          228764 non-null object
a                          228764 non-null object
h_htgoals                  206228 non-null float64
a_htgoals                  206228 non-null float64
h_ftgoals                  228729 non-null object
a_ftgoals                  228688 non-null object
et_pen_awd                 216 non-null object
odds_hwin_pinn             198094 non-null float64
odds_draw_pinn             198094 non-null float64
odds_awin_pinn             198093 non-null float64
odds_hwin_bet365           221965 non-null float64
odds_draw_bet365           221965 non-null float64
odds_awin_bet365           

In [6]:
### Just get the league and correct seasons


In [7]:
crit1 = odds_df['country'] == country
crit2 = odds_df['league'] ==indb_league
crit3 = odds_df['season'].isin(seasons)
prem_df = odds_df[crit1 & crit2 & crit3].copy(deep=True)
# prem_df = odds_df.iloc[odds_df[odds_df[crit1 & crit2 & crit3].index, :]
prem_df.sort_values(by='date', inplace=True, ascending=True)
prem_df.head()

Unnamed: 0,date,id_fifa,country,league,season,h,a,h_htgoals,a_htgoals,h_ftgoals,a_ftgoals,et_pen_awd,odds_hwin_pinn,odds_draw_pinn,odds_awin_pinn,odds_hwin_bet365,odds_draw_bet365,odds_awin_bet365,odds_ftgoalso2.5_pinn,odds_ftgoalsu2.5_pinn,odds_ftgoalso2.5_bet365,odds_ftgoalsu2.5_bet365
22050,2009-08-15,eng-pl,england,premier-league,2009-2010,everton,arsenal,0.0,3.0,1.0,6.0,,3.52,3.29,2.29,3.2,3.2,2.38,,,,
22051,2009-08-15,eng-pl,england,premier-league,2009-2010,aston-villa,wigan,0.0,1.0,0.0,2.0,,1.81,3.71,5.35,1.67,3.75,5.5,,,,
22052,2009-08-15,eng-pl,england,premier-league,2009-2010,blackburn,manchester-city,0.0,1.0,0.0,2.0,,3.18,3.45,2.38,3.5,3.25,2.2,,,,
22053,2009-08-15,eng-pl,england,premier-league,2009-2010,bolton,sunderland,0.0,1.0,0.0,1.0,,2.25,3.33,3.49,2.25,3.25,2.9,,,,
22054,2009-08-15,eng-pl,england,premier-league,2009-2010,portsmouth,fulham,0.0,1.0,0.0,1.0,,2.81,3.18,2.65,2.75,3.2,2.63,,,,


In [8]:
# Get the standard names dictionary
with open(std_names_fp, 'rb') as handle:
    std_names_d = pickle.load(handle)

In [9]:
# Standardize the team names
prem_df['h'] = prem_df['h'].str.strip().str.lower().str.replace(' ', '-')
prem_df.loc[prem_df['h'].isin(std_names_d.keys()), 'h'] = prem_df['h'].map(std_names_d)
prem_df['a'] = prem_df['a'].str.strip().str.lower().str.replace(' ', '-')
prem_df.loc[prem_df['a'].isin(std_names_d.keys()), 'a'] = prem_df['a'].map(std_names_d)

In [10]:
prem_df.set_index(['season', 'date'], inplace=True)
prem_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,id_fifa,country,league,h,a,h_htgoals,a_htgoals,h_ftgoals,a_ftgoals,et_pen_awd,odds_hwin_pinn,odds_draw_pinn,odds_awin_pinn,odds_hwin_bet365,odds_draw_bet365,odds_awin_bet365,odds_ftgoalso2.5_pinn,odds_ftgoalsu2.5_pinn,odds_ftgoalso2.5_bet365,odds_ftgoalsu2.5_bet365
season,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-2010,2009-08-15,eng-pl,england,premier-league,everton,arsenal,0.0,3.0,1.0,6.0,,3.52,3.29,2.29,3.2,3.2,2.38,,,,
2009-2010,2009-08-15,eng-pl,england,premier-league,aston-villa,wigan-athletic,0.0,1.0,0.0,2.0,,1.81,3.71,5.35,1.67,3.75,5.5,,,,
2009-2010,2009-08-15,eng-pl,england,premier-league,blackburn-rovers,manchester-city,0.0,1.0,0.0,2.0,,3.18,3.45,2.38,3.5,3.25,2.2,,,,
2009-2010,2009-08-15,eng-pl,england,premier-league,bolton-wanderers,sunderland,0.0,1.0,0.0,1.0,,2.25,3.33,3.49,2.25,3.25,2.9,,,,
2009-2010,2009-08-15,eng-pl,england,premier-league,portsmouth,fulham,0.0,1.0,0.0,1.0,,2.81,3.18,2.65,2.75,3.2,2.63,,,,


In [11]:
prem_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 3420 entries, (2009-2010, 2009-08-15 00:00:00) to (2017-2018, 2018-05-13 00:00:00)
Data columns (total 20 columns):
id_fifa                    3420 non-null object
country                    3420 non-null object
league                     3420 non-null object
h                          3420 non-null object
a                          3420 non-null object
h_htgoals                  3420 non-null float64
a_htgoals                  3420 non-null float64
h_ftgoals                  3420 non-null object
a_ftgoals                  3420 non-null object
et_pen_awd                 0 non-null object
odds_hwin_pinn             3415 non-null float64
odds_draw_pinn             3415 non-null float64
odds_awin_pinn             3415 non-null float64
odds_hwin_bet365           3419 non-null float64
odds_draw_bet365           3419 non-null float64
odds_awin_bet365           3419 non-null float64
odds_ftgoalso2.5_pinn      2770 non-null float64
odds_ftgoals

In [12]:
prem_df.drop(columns=['et_pen_awd', 'id_fifa', 'country', 'league'], inplace=True)

In [13]:
prem_df['h'].value_counts()

arsenal                     171
tottenham-hotspur           171
everton                     171
stoke-city                  171
chelsea                     171
manchester-city             171
liverpool                   171
manchester-united           171
west-ham-united             152
sunderland                  152
west-bromwich-albion        152
aston-villa                 133
newcastle-united            133
swansea-city                133
southampton                 114
fulham                       95
crystal-palace               95
wigan-athletic               76
hull-city                    76
norwich-city                 76
burnley                      76
leicester-city               76
wolverhampton-wanderers      57
bournemouth                  57
queens-park-rangers          57
bolton-wanderers             57
watford                      57
blackburn-rovers             57
birmingham-city              38
cardiff-city                 19
middlesbrough                19
brighton

In [14]:
prem_df.to_csv(INTERIM_DATA_DIR / 'football-data' / league / indatabet_fn,
                                   index=[0,1])

In [15]:
odds_df2 = pd.read_csv(INTERIM_DATA_DIR / 'football-data' / league / indatabet_fn,
                                   index_col=[0,1])
odds_df2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,h,a,h_htgoals,a_htgoals,h_ftgoals,a_ftgoals,odds_hwin_pinn,odds_draw_pinn,odds_awin_pinn,odds_hwin_bet365,odds_draw_bet365,odds_awin_bet365,odds_ftgoalso2.5_pinn,odds_ftgoalsu2.5_pinn,odds_ftgoalso2.5_bet365,odds_ftgoalsu2.5_bet365
season,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2009-2010,2009-08-15,everton,arsenal,0.0,3.0,1.0,6.0,3.52,3.29,2.29,3.2,3.2,2.38,,,,
2009-2010,2009-08-15,aston-villa,wigan-athletic,0.0,1.0,0.0,2.0,1.81,3.71,5.35,1.67,3.75,5.5,,,,
2009-2010,2009-08-15,blackburn-rovers,manchester-city,0.0,1.0,0.0,2.0,3.18,3.45,2.38,3.5,3.25,2.2,,,,
2009-2010,2009-08-15,bolton-wanderers,sunderland,0.0,1.0,0.0,1.0,2.25,3.33,3.49,2.25,3.25,2.9,,,,
2009-2010,2009-08-15,portsmouth,fulham,0.0,1.0,0.0,1.0,2.81,3.18,2.65,2.75,3.2,2.63,,,,


In [16]:
odds_df2['h'].value_counts()

arsenal                     171
tottenham-hotspur           171
everton                     171
stoke-city                  171
chelsea                     171
manchester-city             171
liverpool                   171
manchester-united           171
west-ham-united             152
sunderland                  152
west-bromwich-albion        152
aston-villa                 133
newcastle-united            133
swansea-city                133
southampton                 114
fulham                       95
crystal-palace               95
wigan-athletic               76
hull-city                    76
norwich-city                 76
burnley                      76
leicester-city               76
wolverhampton-wanderers      57
bournemouth                  57
queens-park-rangers          57
bolton-wanderers             57
watford                      57
blackburn-rovers             57
birmingham-city              38
cardiff-city                 19
middlesbrough                19
brighton

## Merge Game data and Odds Data

In [17]:
football_df = pd.read_csv(INTERIM_DATA_DIR / 'football-data' / league / football_data_fn, index_col=[0,1])
football_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,a_corners,a_fouls,a_rcards,a_shots,a_sot,a_ycards,a,odds_awin_bet365,odds_draw_bet365,odds_hwin_bet365,odds_awin_BS,odds_draw_BS,odds_hwin_BS,odds_awin_BW,odds_draw_BW,odds_hwin_BW,n_Bb1X2,n_BbAsian,BbAsian_handicap,odds_ftgoalsu2.5_bbmean,odds_ftgoalso2.5_bbmean,odds_awin_bbmean,odds_asianaway_bbmean,odds_asianaway_bbmean.1,odds_draw_bbmean,odds_hwin_bbmean,odds_ftgoalsu2.5_bbmax,odds_ftgoalso2.5_bbmax,odds_awin_bbmax,odds_asianaway_bbmax,odds_asianhome_bbmax,odds_draw_bbmax,odds_hwin_bbmax,n_BbOU,a_ftgoals,h_ftgoals,odds_awin_GB,odds_draw_GB,odds_hwin_GB,h_corners,h_fouls,h_rcards,h_shots,h_sot,a_htgoals,h_htgoals,h_ycards,h,odds_awin_IW,odds_draw_IW,odds_hwin_IW,odds_awin_LB,odds_draw_LB,odds_hwin_LB,odds_awin_pinn,clodds_away_pinn,clodds_draw_pinn,clodds_hwin_pinn,odds_draw_pinn,odds_hwin_pinn,odds_awin_SB,odds_draw_SB,odds_hwin_SB,odds_awin_SJ,odds_draw_SJ,odds_hwin_SJ,odds_awin_VC,odds_draw_VC,odds_hwin_VC,odds_awin_WH,odds_draw_WH,odds_hwin_WH,heatmap_path,shotmap_path
season,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1
2009-2010,2009-08-15,6,14,0,14,7,2,wigan-athletic,5.5,3.6,1.67,5.5,3.5,1.62,5.0,3.4,1.65,37,21.0,0.0,1.81,1.92,5.33,3.99,1.22,3.51,1.66,1.91,2.04,6.05,4.4,1.28,3.77,1.72,32,2,0,5.25,3.5,1.67,4,15,0,11,5,1,0,2,aston-villa,4.9,3.5,1.7,4.5,3.3,1.67,,,,,,,5.0,3.4,1.65,5.5,3.6,1.67,5.0,3.4,1.7,5.5,3.4,1.7,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...
2009-2010,2009-08-15,4,9,0,8,5,1,manchester-city,2.1,3.25,3.6,2.2,3.2,3.3,2.0,3.15,3.4,38,24.0,0.0,1.71,2.04,2.12,1.54,2.38,3.24,3.37,1.78,2.15,2.25,1.6,2.58,3.35,3.78,33,2,0,2.1,3.25,3.4,5,12,0,17,9,1,0,2,blackburn-rovers,2.2,3.2,3.2,2.1,3.2,3.0,,,,,,,2.1,3.25,3.15,2.2,3.25,3.3,2.2,3.2,3.25,2.15,3.2,3.5,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...
2009-2010,2009-08-15,7,10,0,20,13,1,sunderland,3.25,3.25,2.25,3.3,3.2,2.2,3.15,3.15,2.15,38,23.0,0.0,1.66,2.12,3.15,2.23,1.61,3.2,2.24,1.7,2.25,3.4,2.33,1.68,3.31,2.37,33,1,0,3.1,3.25,2.25,4,16,0,11,3,1,0,2,bolton-wanderers,3.2,3.2,2.2,3.0,3.2,2.1,,,,,,,2.9,3.2,2.25,3.1,3.25,2.3,3.1,3.2,2.25,3.2,3.2,2.3,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...
2009-2010,2009-08-15,4,15,0,7,3,2,hull-city,21.0,6.5,1.17,19.0,5.5,1.2,13.0,6.0,1.17,38,22.0,0.0,2.26,1.58,16.39,12.96,1.02,6.26,1.17,2.44,1.67,23.14,17.05,1.03,7.71,1.2,34,1,2,15.0,6.0,1.2,12,13,0,26,12,1,1,1,chelsea,14.0,5.5,1.2,12.0,5.5,1.17,,,,,,,15.0,5.75,1.17,17.0,6.5,1.18,17.0,6.0,1.17,21.0,6.5,1.17,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...
2009-2010,2009-08-15,9,13,0,15,9,0,arsenal,2.3,3.25,3.2,2.2,3.2,3.25,2.25,3.15,2.95,38,24.0,0.0,1.7,2.06,2.28,1.63,2.2,3.21,3.07,1.77,2.2,2.42,1.73,2.27,3.34,3.34,34,6,1,2.3,3.25,3.0,4,11,0,8,5,3,0,0,everton,2.3,3.1,3.1,2.25,3.0,2.88,,,,,,,2.2,3.2,3.0,2.25,3.25,3.25,2.3,3.3,2.9,2.3,3.2,3.2,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...


In [18]:
football_df.reset_index(inplace=True)
football_df.head()
football_df['date'] = pd.to_datetime(football_df['date'])
football_df['date_fdcu'] = football_df['date']
football_df.sort_values(by='date', inplace=True)

num_cols = ['h_corners', 'a_corners', 'h_fouls', 'a_fouls', 'h_shots', 'a_shots',
            'a_sot', 'h_sot', 'h_rcards', 'a_rcards', 'h_ycards', 'a_ycards',
            'h_htgoals', 'a_htgoals', 'h_ftgoals', 'a_ftgoals',
           'n_Bb1X2', 'n_BbOU']
football_df[num_cols] = football_df[num_cols].astype(np.float64)
football_df.head()

Unnamed: 0,season,date,a_corners,a_fouls,a_rcards,a_shots,a_sot,a_ycards,a,odds_awin_bet365,odds_draw_bet365,odds_hwin_bet365,odds_awin_BS,odds_draw_BS,odds_hwin_BS,odds_awin_BW,odds_draw_BW,odds_hwin_BW,n_Bb1X2,n_BbAsian,BbAsian_handicap,odds_ftgoalsu2.5_bbmean,odds_ftgoalso2.5_bbmean,odds_awin_bbmean,odds_asianaway_bbmean,odds_asianaway_bbmean.1,odds_draw_bbmean,odds_hwin_bbmean,odds_ftgoalsu2.5_bbmax,odds_ftgoalso2.5_bbmax,odds_awin_bbmax,odds_asianaway_bbmax,odds_asianhome_bbmax,odds_draw_bbmax,odds_hwin_bbmax,n_BbOU,a_ftgoals,h_ftgoals,odds_awin_GB,odds_draw_GB,odds_hwin_GB,h_corners,h_fouls,h_rcards,h_shots,h_sot,a_htgoals,h_htgoals,h_ycards,h,odds_awin_IW,odds_draw_IW,odds_hwin_IW,odds_awin_LB,odds_draw_LB,odds_hwin_LB,odds_awin_pinn,clodds_away_pinn,clodds_draw_pinn,clodds_hwin_pinn,odds_draw_pinn,odds_hwin_pinn,odds_awin_SB,odds_draw_SB,odds_hwin_SB,odds_awin_SJ,odds_draw_SJ,odds_hwin_SJ,odds_awin_VC,odds_draw_VC,odds_hwin_VC,odds_awin_WH,odds_draw_WH,odds_hwin_WH,heatmap_path,shotmap_path,date_fdcu
0,2009-2010,2009-08-15,6.0,14.0,0.0,14.0,7.0,2.0,wigan-athletic,5.5,3.6,1.67,5.5,3.5,1.62,5.0,3.4,1.65,37.0,21.0,0.0,1.81,1.92,5.33,3.99,1.22,3.51,1.66,1.91,2.04,6.05,4.4,1.28,3.77,1.72,32.0,2.0,0.0,5.25,3.5,1.67,4.0,15.0,0.0,11.0,5.0,1.0,0.0,2.0,aston-villa,4.9,3.5,1.7,4.5,3.3,1.67,,,,,,,5.0,3.4,1.65,5.5,3.6,1.67,5.0,3.4,1.7,5.5,3.4,1.7,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15
1,2009-2010,2009-08-15,4.0,9.0,0.0,8.0,5.0,1.0,manchester-city,2.1,3.25,3.6,2.2,3.2,3.3,2.0,3.15,3.4,38.0,24.0,0.0,1.71,2.04,2.12,1.54,2.38,3.24,3.37,1.78,2.15,2.25,1.6,2.58,3.35,3.78,33.0,2.0,0.0,2.1,3.25,3.4,5.0,12.0,0.0,17.0,9.0,1.0,0.0,2.0,blackburn-rovers,2.2,3.2,3.2,2.1,3.2,3.0,,,,,,,2.1,3.25,3.15,2.2,3.25,3.3,2.2,3.2,3.25,2.15,3.2,3.5,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15
2,2009-2010,2009-08-15,7.0,10.0,0.0,20.0,13.0,1.0,sunderland,3.25,3.25,2.25,3.3,3.2,2.2,3.15,3.15,2.15,38.0,23.0,0.0,1.66,2.12,3.15,2.23,1.61,3.2,2.24,1.7,2.25,3.4,2.33,1.68,3.31,2.37,33.0,1.0,0.0,3.1,3.25,2.25,4.0,16.0,0.0,11.0,3.0,1.0,0.0,2.0,bolton-wanderers,3.2,3.2,2.2,3.0,3.2,2.1,,,,,,,2.9,3.2,2.25,3.1,3.25,2.3,3.1,3.2,2.25,3.2,3.2,2.3,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15
3,2009-2010,2009-08-15,4.0,15.0,0.0,7.0,3.0,2.0,hull-city,21.0,6.5,1.17,19.0,5.5,1.2,13.0,6.0,1.17,38.0,22.0,0.0,2.26,1.58,16.39,12.96,1.02,6.26,1.17,2.44,1.67,23.14,17.05,1.03,7.71,1.2,34.0,1.0,2.0,15.0,6.0,1.2,12.0,13.0,0.0,26.0,12.0,1.0,1.0,1.0,chelsea,14.0,5.5,1.2,12.0,5.5,1.17,,,,,,,15.0,5.75,1.17,17.0,6.5,1.18,17.0,6.0,1.17,21.0,6.5,1.17,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15
4,2009-2010,2009-08-15,9.0,13.0,0.0,15.0,9.0,0.0,arsenal,2.3,3.25,3.2,2.2,3.2,3.25,2.25,3.15,2.95,38.0,24.0,0.0,1.7,2.06,2.28,1.63,2.2,3.21,3.07,1.77,2.2,2.42,1.73,2.27,3.34,3.34,34.0,6.0,1.0,2.3,3.25,3.0,4.0,11.0,0.0,8.0,5.0,3.0,0.0,0.0,everton,2.3,3.1,3.1,2.25,3.0,2.88,,,,,,,2.2,3.2,3.0,2.25,3.25,3.25,2.3,3.3,2.9,2.3,3.2,3.2,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15


In [19]:
football_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3420 entries, 0 to 3419
Data columns (total 77 columns):
season                     3420 non-null object
date                       3420 non-null datetime64[ns]
a_corners                  3420 non-null float64
a_fouls                    3420 non-null float64
a_rcards                   3420 non-null float64
a_shots                    3420 non-null float64
a_sot                      3420 non-null float64
a_ycards                   3420 non-null float64
a                          3420 non-null object
odds_awin_bet365           3420 non-null float64
odds_draw_bet365           3420 non-null float64
odds_hwin_bet365           3420 non-null float64
odds_awin_BS               1520 non-null float64
odds_draw_BS               1520 non-null float64
odds_hwin_BS               1520 non-null float64
odds_awin_BW               3419 non-null float64
odds_draw_BW               3419 non-null float64
odds_hwin_BW               3419 non-null float64
n_Bb1X

In [20]:
odds_df2.reset_index(inplace=True)
odds_df2['date'] = pd.to_datetime(odds_df2['date'])
odds_df2['date_ic'] = odds_df2['date']
odds_df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3420 entries, 0 to 3419
Data columns (total 19 columns):
season                     3420 non-null object
date                       3420 non-null datetime64[ns]
h                          3420 non-null object
a                          3420 non-null object
h_htgoals                  3420 non-null float64
a_htgoals                  3420 non-null float64
h_ftgoals                  3420 non-null float64
a_ftgoals                  3420 non-null float64
odds_hwin_pinn             3415 non-null float64
odds_draw_pinn             3415 non-null float64
odds_awin_pinn             3415 non-null float64
odds_hwin_bet365           3419 non-null float64
odds_draw_bet365           3419 non-null float64
odds_awin_bet365           3419 non-null float64
odds_ftgoalso2.5_pinn      2770 non-null float64
odds_ftgoalsu2.5_pinn      2770 non-null float64
odds_ftgoalso2.5_bet365    3017 non-null float64
odds_ftgoalsu2.5_bet365    3017 non-null float64
date_ic

In [21]:
## Merge

In [22]:
#dfs = sorted(list(df_dict.values()), key=lambda x: len(x), reverse=True)
# merge_asof on date using home team, awat_team, home_goals, and away_goals to match
# merge_asof does a left join, so put longest daf on left, so get max data into merged
merged = pd.merge_asof(football_df, odds_df2,
                       on='date',
                       by=['h', 'a', 'h_ftgoals', 'a_ftgoals'],
                       suffixes=('_ic', '_fdcu'),
                       tolerance=pd.Timedelta(days=2),
                       direction='nearest'
                       )
# # Put a date difference column into the merged df
merged['dates_diff'] = merged['date_ic'] - merged['date_fdcu']
# # Write the merge issues data to a yaml file

merged.sort_values(by='date', ascending=True, inplace=True)

In [23]:
merged.head()

Unnamed: 0,season_ic,date,a_corners,a_fouls,a_rcards,a_shots,a_sot,a_ycards,a,odds_awin_bet365_ic,odds_draw_bet365_ic,odds_hwin_bet365_ic,odds_awin_BS,odds_draw_BS,odds_hwin_BS,odds_awin_BW,odds_draw_BW,odds_hwin_BW,n_Bb1X2,n_BbAsian,BbAsian_handicap,odds_ftgoalsu2.5_bbmean,odds_ftgoalso2.5_bbmean,odds_awin_bbmean,odds_asianaway_bbmean,odds_asianaway_bbmean.1,odds_draw_bbmean,odds_hwin_bbmean,odds_ftgoalsu2.5_bbmax,odds_ftgoalso2.5_bbmax,odds_awin_bbmax,odds_asianaway_bbmax,odds_asianhome_bbmax,odds_draw_bbmax,odds_hwin_bbmax,n_BbOU,a_ftgoals,h_ftgoals,odds_awin_GB,odds_draw_GB,odds_hwin_GB,h_corners,h_fouls,h_rcards,h_shots,h_sot,a_htgoals_ic,h_htgoals_ic,h_ycards,h,odds_awin_IW,odds_draw_IW,odds_hwin_IW,odds_awin_LB,odds_draw_LB,odds_hwin_LB,odds_awin_pinn_ic,clodds_away_pinn,clodds_draw_pinn,clodds_hwin_pinn,odds_draw_pinn_ic,odds_hwin_pinn_ic,odds_awin_SB,odds_draw_SB,odds_hwin_SB,odds_awin_SJ,odds_draw_SJ,odds_hwin_SJ,odds_awin_VC,odds_draw_VC,odds_hwin_VC,odds_awin_WH,odds_draw_WH,odds_hwin_WH,heatmap_path,shotmap_path,date_fdcu,season_fdcu,h_htgoals_fdcu,a_htgoals_fdcu,odds_hwin_pinn_fdcu,odds_draw_pinn_fdcu,odds_awin_pinn_fdcu,odds_hwin_bet365_fdcu,odds_draw_bet365_fdcu,odds_awin_bet365_fdcu,odds_ftgoalso2.5_pinn,odds_ftgoalsu2.5_pinn,odds_ftgoalso2.5_bet365,odds_ftgoalsu2.5_bet365,date_ic,dates_diff
0,2009-2010,2009-08-15,6.0,14.0,0.0,14.0,7.0,2.0,wigan-athletic,5.5,3.6,1.67,5.5,3.5,1.62,5.0,3.4,1.65,37.0,21.0,0.0,1.81,1.92,5.33,3.99,1.22,3.51,1.66,1.91,2.04,6.05,4.4,1.28,3.77,1.72,32.0,2.0,0.0,5.25,3.5,1.67,4.0,15.0,0.0,11.0,5.0,1.0,0.0,2.0,aston-villa,4.9,3.5,1.7,4.5,3.3,1.67,,,,,,,5.0,3.4,1.65,5.5,3.6,1.67,5.0,3.4,1.7,5.5,3.4,1.7,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15,2009-2010,0.0,1.0,1.81,3.71,5.35,1.67,3.75,5.5,,,,,2009-08-15,0 days
1,2009-2010,2009-08-15,4.0,9.0,0.0,8.0,5.0,1.0,manchester-city,2.1,3.25,3.6,2.2,3.2,3.3,2.0,3.15,3.4,38.0,24.0,0.0,1.71,2.04,2.12,1.54,2.38,3.24,3.37,1.78,2.15,2.25,1.6,2.58,3.35,3.78,33.0,2.0,0.0,2.1,3.25,3.4,5.0,12.0,0.0,17.0,9.0,1.0,0.0,2.0,blackburn-rovers,2.2,3.2,3.2,2.1,3.2,3.0,,,,,,,2.1,3.25,3.15,2.2,3.25,3.3,2.2,3.2,3.25,2.15,3.2,3.5,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15,2009-2010,0.0,1.0,3.18,3.45,2.38,3.5,3.25,2.2,,,,,2009-08-15,0 days
2,2009-2010,2009-08-15,7.0,10.0,0.0,20.0,13.0,1.0,sunderland,3.25,3.25,2.25,3.3,3.2,2.2,3.15,3.15,2.15,38.0,23.0,0.0,1.66,2.12,3.15,2.23,1.61,3.2,2.24,1.7,2.25,3.4,2.33,1.68,3.31,2.37,33.0,1.0,0.0,3.1,3.25,2.25,4.0,16.0,0.0,11.0,3.0,1.0,0.0,2.0,bolton-wanderers,3.2,3.2,2.2,3.0,3.2,2.1,,,,,,,2.9,3.2,2.25,3.1,3.25,2.3,3.1,3.2,2.25,3.2,3.2,2.3,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15,2009-2010,0.0,1.0,2.25,3.33,3.49,2.25,3.25,2.9,,,,,2009-08-15,0 days
3,2009-2010,2009-08-15,4.0,15.0,0.0,7.0,3.0,2.0,hull-city,21.0,6.5,1.17,19.0,5.5,1.2,13.0,6.0,1.17,38.0,22.0,0.0,2.26,1.58,16.39,12.96,1.02,6.26,1.17,2.44,1.67,23.14,17.05,1.03,7.71,1.2,34.0,1.0,2.0,15.0,6.0,1.2,12.0,13.0,0.0,26.0,12.0,1.0,1.0,1.0,chelsea,14.0,5.5,1.2,12.0,5.5,1.17,,,,,,,15.0,5.75,1.17,17.0,6.5,1.18,17.0,6.0,1.17,21.0,6.5,1.17,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15,2009-2010,1.0,1.0,1.21,8.4,21.0,1.2,6.5,19.0,,,,,2009-08-15,0 days
4,2009-2010,2009-08-15,9.0,13.0,0.0,15.0,9.0,0.0,arsenal,2.3,3.25,3.2,2.2,3.2,3.25,2.25,3.15,2.95,38.0,24.0,0.0,1.7,2.06,2.28,1.63,2.2,3.21,3.07,1.77,2.2,2.42,1.73,2.27,3.34,3.34,34.0,6.0,1.0,2.3,3.25,3.0,4.0,11.0,0.0,8.0,5.0,3.0,0.0,0.0,everton,2.3,3.1,3.1,2.25,3.0,2.88,,,,,,,2.2,3.2,3.0,2.25,3.25,3.25,2.3,3.3,2.9,2.3,3.2,3.2,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15,2009-2010,0.0,3.0,3.52,3.29,2.29,3.2,3.2,2.38,,,,,2009-08-15,0 days


In [24]:
diffs = merged[merged['dates_diff'].astype(int) > 0]
diffs.iloc[0:5]

Unnamed: 0,season_ic,date,a_corners,a_fouls,a_rcards,a_shots,a_sot,a_ycards,a,odds_awin_bet365_ic,odds_draw_bet365_ic,odds_hwin_bet365_ic,odds_awin_BS,odds_draw_BS,odds_hwin_BS,odds_awin_BW,odds_draw_BW,odds_hwin_BW,n_Bb1X2,n_BbAsian,BbAsian_handicap,odds_ftgoalsu2.5_bbmean,odds_ftgoalso2.5_bbmean,odds_awin_bbmean,odds_asianaway_bbmean,odds_asianaway_bbmean.1,odds_draw_bbmean,odds_hwin_bbmean,odds_ftgoalsu2.5_bbmax,odds_ftgoalso2.5_bbmax,odds_awin_bbmax,odds_asianaway_bbmax,odds_asianhome_bbmax,odds_draw_bbmax,odds_hwin_bbmax,n_BbOU,a_ftgoals,h_ftgoals,odds_awin_GB,odds_draw_GB,odds_hwin_GB,h_corners,h_fouls,h_rcards,h_shots,h_sot,a_htgoals_ic,h_htgoals_ic,h_ycards,h,odds_awin_IW,odds_draw_IW,odds_hwin_IW,odds_awin_LB,odds_draw_LB,odds_hwin_LB,odds_awin_pinn_ic,clodds_away_pinn,clodds_draw_pinn,clodds_hwin_pinn,odds_draw_pinn_ic,odds_hwin_pinn_ic,odds_awin_SB,odds_draw_SB,odds_hwin_SB,odds_awin_SJ,odds_draw_SJ,odds_hwin_SJ,odds_awin_VC,odds_draw_VC,odds_hwin_VC,odds_awin_WH,odds_draw_WH,odds_hwin_WH,heatmap_path,shotmap_path,date_fdcu,season_fdcu,h_htgoals_fdcu,a_htgoals_fdcu,odds_hwin_pinn_fdcu,odds_draw_pinn_fdcu,odds_awin_pinn_fdcu,odds_hwin_bet365_fdcu,odds_draw_bet365_fdcu,odds_awin_bet365_fdcu,odds_ftgoalso2.5_pinn,odds_ftgoalsu2.5_pinn,odds_ftgoalso2.5_bet365,odds_ftgoalsu2.5_bet365,date_ic,dates_diff
118,2009-2010,2009-11-21,4.0,6.0,0.0,11.0,5.0,2.0,everton,9.0,4.5,1.4,8.5,4.33,1.4,9.0,4.6,1.38,37.0,22.0,0.0,1.94,1.8,8.56,6.4,1.1,4.37,1.39,2.02,1.91,10.29,7.1,1.11,4.66,1.45,32.0,0.0,3.0,8.0,4.2,1.4,13.0,4.0,0.0,18.0,11.0,0.0,1.0,2.0,manchester-united,8.5,4.5,1.35,8.0,4.5,1.4,,,,,,,8.0,4.2,1.35,9.0,4.5,1.36,9.0,4.4,1.4,9.0,4.2,1.4,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-11-21,2009-2010,1.0,0.0,1.42,4.61,9.75,1.33,4.33,9.0,,,,,2009-11-22,1 days
253,2009-2010,2010-02-17,9.0,9.0,0.0,8.0,0.0,1.0,bolton-wanderers,3.2,3.25,2.3,3.1,3.2,2.3,3.2,3.2,2.2,38.0,27.0,0.0,1.73,2.05,3.16,2.26,1.61,3.23,2.26,1.81,2.16,3.43,2.42,1.66,3.4,2.35,37.0,0.0,0.0,3.1,3.1,2.3,4.0,6.0,0.0,10.0,1.0,0.0,0.0,0.0,wigan-athletic,3.2,3.2,2.2,3.3,3.3,2.2,,,,,,,3.0,3.2,2.2,3.25,3.25,2.25,3.4,3.25,2.25,3.2,3.2,2.3,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2010-02-17,2009-2010,0.0,0.0,2.29,3.4,3.39,2.25,3.25,3.4,,,,,2010-02-18,1 days
276,2009-2010,2010-03-07,1.0,21.0,0.0,7.0,4.0,1.0,hull-city,10.0,4.5,1.36,9.0,4.5,1.36,8.5,4.5,1.35,39.0,27.0,-1.25,1.98,1.77,9.04,2.03,1.85,4.45,1.37,2.11,1.86,10.92,2.06,1.9,4.94,1.4,36.0,1.0,5.0,9.0,4.5,1.35,4.0,12.0,0.0,18.0,10.0,1.0,2.0,0.0,everton,7.5,4.3,1.4,8.0,4.5,1.4,,,,,,,8.0,4.2,1.35,9.0,4.5,1.36,10.0,4.75,1.36,9.0,4.5,1.36,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2010-03-07,2009-2010,2.0,1.0,1.37,4.96,10.57,1.36,4.33,7.5,,,,,2010-03-08,1 days
369,2009-2010,2010-05-05,7.0,14.0,0.0,10.0,6.0,3.0,tottenham-hotspur,3.8,3.4,2.0,3.8,3.5,1.95,3.7,3.5,1.9,34.0,19.0,0.0,2.03,1.74,3.7,2.62,1.43,3.42,2.0,2.15,1.8,3.85,2.75,1.5,3.6,2.09,31.0,1.0,0.0,3.5,3.4,2.0,9.0,12.0,0.0,10.0,5.0,0.0,0.0,0.0,manchester-city,3.7,3.3,1.9,3.6,3.5,2.0,,,,,,,3.6,3.4,1.9,3.8,3.4,2.0,3.75,3.5,2.05,3.75,3.4,2.0,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2010-05-05,2009-2010,0.0,0.0,2.06,3.61,3.97,1.91,3.4,3.8,,,,,2010-05-06,1 days
386,2010-2011,2010-08-14,7.0,15.0,0.0,12.0,2.0,2.0,west-ham-united,4.0,3.3,2.0,4.33,3.4,1.8,4.1,3.4,1.85,39.0,20.0,0.0,1.75,2.01,4.03,2.78,1.4,3.3,1.96,1.82,2.11,4.33,3.0,1.45,3.4,2.09,35.0,0.0,3.0,3.75,3.25,2.0,16.0,15.0,0.0,23.0,11.0,0.0,2.0,1.0,aston-villa,4.0,3.4,1.8,4.33,3.4,1.83,,,,,,,3.75,3.25,1.9,3.75,3.4,2.0,4.2,3.25,2.0,4.33,3.25,1.91,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2010-08-14,2010-2011,2.0,0.0,1.74,3.88,5.39,1.62,3.6,5.0,2.0,1.91,1.95,1.85,2010-08-15,1 days


In [25]:
merged['dates_diff'].value_counts()

0 days    3393
1 days      27
Name: dates_diff, dtype: int64

In [26]:
merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3420 entries, 0 to 3419
Data columns (total 92 columns):
season_ic                  3420 non-null object
date                       3420 non-null datetime64[ns]
a_corners                  3420 non-null float64
a_fouls                    3420 non-null float64
a_rcards                   3420 non-null float64
a_shots                    3420 non-null float64
a_sot                      3420 non-null float64
a_ycards                   3420 non-null float64
a                          3420 non-null object
odds_awin_bet365_ic        3420 non-null float64
odds_draw_bet365_ic        3420 non-null float64
odds_hwin_bet365_ic        3420 non-null float64
odds_awin_BS               1520 non-null float64
odds_draw_BS               1520 non-null float64
odds_hwin_BS               1520 non-null float64
odds_awin_BW               3419 non-null float64
odds_draw_BW               3419 non-null float64
odds_hwin_BW               3419 non-null float64
n_Bb1X

### Save and Re-open

In [27]:
merged.to_csv(INTERIM_DATA_DIR / 'football-data' / league / save_fn, index=False)

In [28]:
df = pd.read_csv(INTERIM_DATA_DIR / 'football-data' / league / save_fn,
                 parse_dates = ['date', 'date_fdcu', 'date_ic'], index_col=None)
df.head()

Unnamed: 0,season_ic,date,a_corners,a_fouls,a_rcards,a_shots,a_sot,a_ycards,a,odds_awin_bet365_ic,odds_draw_bet365_ic,odds_hwin_bet365_ic,odds_awin_BS,odds_draw_BS,odds_hwin_BS,odds_awin_BW,odds_draw_BW,odds_hwin_BW,n_Bb1X2,n_BbAsian,BbAsian_handicap,odds_ftgoalsu2.5_bbmean,odds_ftgoalso2.5_bbmean,odds_awin_bbmean,odds_asianaway_bbmean,odds_asianaway_bbmean.1,odds_draw_bbmean,odds_hwin_bbmean,odds_ftgoalsu2.5_bbmax,odds_ftgoalso2.5_bbmax,odds_awin_bbmax,odds_asianaway_bbmax,odds_asianhome_bbmax,odds_draw_bbmax,odds_hwin_bbmax,n_BbOU,a_ftgoals,h_ftgoals,odds_awin_GB,odds_draw_GB,odds_hwin_GB,h_corners,h_fouls,h_rcards,h_shots,h_sot,a_htgoals_ic,h_htgoals_ic,h_ycards,h,odds_awin_IW,odds_draw_IW,odds_hwin_IW,odds_awin_LB,odds_draw_LB,odds_hwin_LB,odds_awin_pinn_ic,clodds_away_pinn,clodds_draw_pinn,clodds_hwin_pinn,odds_draw_pinn_ic,odds_hwin_pinn_ic,odds_awin_SB,odds_draw_SB,odds_hwin_SB,odds_awin_SJ,odds_draw_SJ,odds_hwin_SJ,odds_awin_VC,odds_draw_VC,odds_hwin_VC,odds_awin_WH,odds_draw_WH,odds_hwin_WH,heatmap_path,shotmap_path,date_fdcu,season_fdcu,h_htgoals_fdcu,a_htgoals_fdcu,odds_hwin_pinn_fdcu,odds_draw_pinn_fdcu,odds_awin_pinn_fdcu,odds_hwin_bet365_fdcu,odds_draw_bet365_fdcu,odds_awin_bet365_fdcu,odds_ftgoalso2.5_pinn,odds_ftgoalsu2.5_pinn,odds_ftgoalso2.5_bet365,odds_ftgoalsu2.5_bet365,date_ic,dates_diff
0,2009-2010,2009-08-15,6.0,14.0,0.0,14.0,7.0,2.0,wigan-athletic,5.5,3.6,1.67,5.5,3.5,1.62,5.0,3.4,1.65,37.0,21.0,0.0,1.81,1.92,5.33,3.99,1.22,3.51,1.66,1.91,2.04,6.05,4.4,1.28,3.77,1.72,32.0,2.0,0.0,5.25,3.5,1.67,4.0,15.0,0.0,11.0,5.0,1.0,0.0,2.0,aston-villa,4.9,3.5,1.7,4.5,3.3,1.67,,,,,,,5.0,3.4,1.65,5.5,3.6,1.67,5.0,3.4,1.7,5.5,3.4,1.7,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15,2009-2010,0.0,1.0,1.81,3.71,5.35,1.67,3.75,5.5,,,,,2009-08-15,0 days 00:00:00.000000000
1,2009-2010,2009-08-15,4.0,9.0,0.0,8.0,5.0,1.0,manchester-city,2.1,3.25,3.6,2.2,3.2,3.3,2.0,3.15,3.4,38.0,24.0,0.0,1.71,2.04,2.12,1.54,2.38,3.24,3.37,1.78,2.15,2.25,1.6,2.58,3.35,3.78,33.0,2.0,0.0,2.1,3.25,3.4,5.0,12.0,0.0,17.0,9.0,1.0,0.0,2.0,blackburn-rovers,2.2,3.2,3.2,2.1,3.2,3.0,,,,,,,2.1,3.25,3.15,2.2,3.25,3.3,2.2,3.2,3.25,2.15,3.2,3.5,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15,2009-2010,0.0,1.0,3.18,3.45,2.38,3.5,3.25,2.2,,,,,2009-08-15,0 days 00:00:00.000000000
2,2009-2010,2009-08-15,7.0,10.0,0.0,20.0,13.0,1.0,sunderland,3.25,3.25,2.25,3.3,3.2,2.2,3.15,3.15,2.15,38.0,23.0,0.0,1.66,2.12,3.15,2.23,1.61,3.2,2.24,1.7,2.25,3.4,2.33,1.68,3.31,2.37,33.0,1.0,0.0,3.1,3.25,2.25,4.0,16.0,0.0,11.0,3.0,1.0,0.0,2.0,bolton-wanderers,3.2,3.2,2.2,3.0,3.2,2.1,,,,,,,2.9,3.2,2.25,3.1,3.25,2.3,3.1,3.2,2.25,3.2,3.2,2.3,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15,2009-2010,0.0,1.0,2.25,3.33,3.49,2.25,3.25,2.9,,,,,2009-08-15,0 days 00:00:00.000000000
3,2009-2010,2009-08-15,4.0,15.0,0.0,7.0,3.0,2.0,hull-city,21.0,6.5,1.17,19.0,5.5,1.2,13.0,6.0,1.17,38.0,22.0,0.0,2.26,1.58,16.39,12.96,1.02,6.26,1.17,2.44,1.67,23.14,17.05,1.03,7.71,1.2,34.0,1.0,2.0,15.0,6.0,1.2,12.0,13.0,0.0,26.0,12.0,1.0,1.0,1.0,chelsea,14.0,5.5,1.2,12.0,5.5,1.17,,,,,,,15.0,5.75,1.17,17.0,6.5,1.18,17.0,6.0,1.17,21.0,6.5,1.17,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15,2009-2010,1.0,1.0,1.21,8.4,21.0,1.2,6.5,19.0,,,,,2009-08-15,0 days 00:00:00.000000000
4,2009-2010,2009-08-15,9.0,13.0,0.0,15.0,9.0,0.0,arsenal,2.3,3.25,3.2,2.2,3.2,3.25,2.25,3.15,2.95,38.0,24.0,0.0,1.7,2.06,2.28,1.63,2.2,3.21,3.07,1.77,2.2,2.42,1.73,2.27,3.34,3.34,34.0,6.0,1.0,2.3,3.25,3.0,4.0,11.0,0.0,8.0,5.0,3.0,0.0,0.0,everton,2.3,3.1,3.1,2.25,3.0,2.88,,,,,,,2.2,3.2,3.0,2.25,3.25,3.25,2.3,3.3,2.9,2.3,3.2,3.2,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,2009-08-15,2009-2010,0.0,3.0,3.52,3.29,2.29,3.2,3.2,2.38,,,,,2009-08-15,0 days 00:00:00.000000000


In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3420 entries, 0 to 3419
Data columns (total 92 columns):
season_ic                  3420 non-null object
date                       3420 non-null datetime64[ns]
a_corners                  3420 non-null float64
a_fouls                    3420 non-null float64
a_rcards                   3420 non-null float64
a_shots                    3420 non-null float64
a_sot                      3420 non-null float64
a_ycards                   3420 non-null float64
a                          3420 non-null object
odds_awin_bet365_ic        3420 non-null float64
odds_draw_bet365_ic        3420 non-null float64
odds_hwin_bet365_ic        3420 non-null float64
odds_awin_BS               1520 non-null float64
odds_draw_BS               1520 non-null float64
odds_hwin_BS               1520 non-null float64
odds_awin_BW               3419 non-null float64
odds_draw_BW               3419 non-null float64
odds_hwin_BW               3419 non-null float64
n_Bb1X