# Shift Data to Historical Records

In [1]:
import pathlib
import sys

import pandas as pd
import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.mlab as mlab

%matplotlib inline

# Load the "autoreload" extension
%load_ext autoreload
# always reload modules marked with "%aimport"
%autoreload 1
#add the 'src' directory to path to import modules
PROJECT_DIR = pathlib.Path.cwd().resolve().parent
sys.path.append(str(PROJECT_DIR))

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

DATA_DIR = PROJECT_DIR / 'data'
SCOPED_DATA_DIR = DATA_DIR / '03-scoped'
PROCESSED_DATA_DIR = DATA_DIR / '04-processed'

In [2]:
league = 'english-premier-league'
load_fp = SCOPED_DATA_DIR / league / str(league + '-scoped-1.csv')
seasons = ['2009-2010', '2010-2011', '2011-2012']


In [3]:
df_orig = pd.read_csv(load_fp, parse_dates = ['date'], index_col=None)
df_orig = df_orig[df_orig['season'].isin(seasons)]
# Calculate Results column
conditions = [df_orig['h_ftgoals'] > df_orig['a_ftgoals'],
              df_orig['h_ftgoals'] == df_orig['a_ftgoals'],
              df_orig['h_ftgoals'] < df_orig['a_ftgoals']]
choices = ['hwin', 'draw', 'awin']
df_orig['result'] = np.select(conditions, choices, default='not-played')
df_orig.head()

Unnamed: 0,season,date,h,a,h_ftgoals,a_ftgoals,h_htgoals,a_htgoals,h_corners,h_fouls,h_ycards,h_rcards,h_shots,h_sot,a_corners,a_fouls,a_ycards,a_rcards,a_shots,a_sot,odds_hwin_bbmean,odds_draw_bbmean,odds_awin_bbmean,odds_hwin_bbmax,odds_draw_bbmax,odds_awin_bbmax,odds_hwin_WH,odds_draw_WH,odds_awin_WH,odds_ftgoalsu2.5_bbmax,odds_ftgoalso2.5_bbmax,odds_ftgoalsu2.5_bbmean,odds_ftgoalso2.5_bbmean,heatmap_path,shotmap_path,result
0,2009-2010,2009-08-15,aston-villa,wigan-athletic,0.0,2.0,0.0,1.0,4.0,15.0,2.0,0.0,11.0,5.0,6.0,14.0,2.0,0.0,14.0,7.0,1.66,3.51,5.33,1.72,3.77,6.05,1.7,3.4,5.5,1.91,2.04,1.81,1.92,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,awin
1,2009-2010,2009-08-15,blackburn-rovers,manchester-city,0.0,2.0,0.0,1.0,5.0,12.0,2.0,0.0,17.0,9.0,4.0,9.0,1.0,0.0,8.0,5.0,3.37,3.24,2.12,3.78,3.35,2.25,3.5,3.2,2.15,1.78,2.15,1.71,2.04,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,awin
2,2009-2010,2009-08-15,bolton-wanderers,sunderland,0.0,1.0,0.0,1.0,4.0,16.0,2.0,0.0,11.0,3.0,7.0,10.0,1.0,0.0,20.0,13.0,2.24,3.2,3.15,2.37,3.31,3.4,2.3,3.2,3.2,1.7,2.25,1.66,2.12,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,awin
3,2009-2010,2009-08-15,chelsea,hull-city,2.0,1.0,1.0,1.0,12.0,13.0,1.0,0.0,26.0,12.0,4.0,15.0,2.0,0.0,7.0,3.0,1.17,6.26,16.39,1.2,7.71,23.14,1.17,6.5,21.0,2.44,1.67,2.26,1.58,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,hwin
4,2009-2010,2009-08-15,everton,arsenal,1.0,6.0,0.0,3.0,4.0,11.0,0.0,0.0,8.0,5.0,9.0,13.0,0.0,0.0,15.0,9.0,3.07,3.21,2.28,3.34,3.34,2.42,3.2,3.2,2.3,1.77,2.2,1.7,2.06,/data/02-interim/football-data/english-premier...,/data/02-interim/football-data/english-premier...,awin


In [29]:
season_df = df_orig.copy(deep=True)
# 2012-2013 Season ios the first season where we have got the Pinnacle Closing odds
get_season = ['2009-2010']
# # get_season = ['2012-2013', '2013-2014', '2014-2015']
season_df = season_df[season_df['season'].isin(get_season)].sort_values(by=['date'])
keeper_cols = ['h', 'a', 'date', 'h_ftgoals', 'a_ftgoals', 'h_shots', 'a_shots',
               'odds_hwin_bbmean', 'odds_draw_bbmean', 'odds_awin_bbmean' ]
season_df = season_df[keeper_cols]
conditions = [season_df['h_ftgoals'] > season_df['a_ftgoals'],
             season_df['h_ftgoals'] == season_df['a_ftgoals'],
             season_df['h_ftgoals'] < season_df['a_ftgoals']]
choices = ['hwin', 'draw', 'awin']
season_df['result'] = np.select(conditions, choices)
season_df.rename(columns={'date_fdcu': 'date',
                          'odds_hwin_bbmean': 'odds_hwin',
                          'odds_draw_bbmean': 'odds_draw',
                          'odds_awin_bbmean': 'odds_awin'}, inplace=True)
season_df.sort_values(by='date', inplace=True)
season_df.reset_index(drop=True, inplace=True)
season_df.head(50)

Unnamed: 0,h,a,date,h_ftgoals,a_ftgoals,h_shots,a_shots,odds_hwin,odds_draw,odds_awin,result
0,aston-villa,wigan-athletic,2009-08-15,0.0,2.0,11.0,14.0,1.66,3.51,5.33,awin
1,blackburn-rovers,manchester-city,2009-08-15,0.0,2.0,17.0,8.0,3.37,3.24,2.12,awin
2,bolton-wanderers,sunderland,2009-08-15,0.0,1.0,11.0,20.0,2.24,3.2,3.15,awin
3,chelsea,hull-city,2009-08-15,2.0,1.0,26.0,7.0,1.17,6.26,16.39,hwin
4,everton,arsenal,2009-08-15,1.0,6.0,8.0,15.0,3.07,3.21,2.28,awin
5,portsmouth,fulham,2009-08-15,0.0,1.0,16.0,9.0,2.53,3.18,2.73,awin
6,stoke-city,burnley,2009-08-15,2.0,0.0,12.0,9.0,1.91,3.29,4.02,hwin
7,wolverhampton-wanderers,west-ham-united,2009-08-15,0.0,2.0,19.0,16.0,2.57,3.19,2.68,awin
8,manchester-united,birmingham-city,2009-08-16,1.0,0.0,26.0,6.0,1.19,5.91,14.99,hwin
9,tottenham-hotspur,liverpool,2009-08-16,2.0,1.0,17.0,6.0,3.13,3.22,2.23,hwin


In [5]:
season_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 380 entries, 0 to 379
Data columns (total 11 columns):
h            380 non-null object
a            380 non-null object
date         380 non-null datetime64[ns]
h_ftgoals    380 non-null float64
a_ftgoals    380 non-null float64
h_shots      380 non-null float64
a_shots      380 non-null float64
odds_hwin    380 non-null float64
odds_draw    380 non-null float64
odds_awin    380 non-null float64
result       380 non-null object
dtypes: datetime64[ns](1), float64(7), object(3)
memory usage: 32.7+ KB


### Pare down Data

### Shape Season to historical data

In [6]:
def get_historical_records(group, home_or_away):
    features = [col for col in group.columns if '_' in col]
    for feat in features:
        new_feat_cols = [feat + '_' + str(n) for n, feat in zip(range (0,-len(group), -1), [feat]*len(group))]
        for shift_n, new_feat_col in enumerate(new_feat_cols):
            group[new_feat_col] = group[feat]
            group[new_feat_col] = group[new_feat_col].shift(shift_n)
        group.drop(columns=[feat], inplace=True)
    return group


def get_records(df, home_or_away, loc_record):
    feature_cols = [col for col in df.columns if loc_record in col]
    feature_cols.insert(0, home_or_away)
    cut_df = df[feature_cols]
    records = cut_df.groupby(by=home_or_away, sort=True).apply(get_historical_records, home_or_away)
    return records
    
def form_historical_records(df):
    h_teams = df['h'].values
    a_teams = df['a'].values
    # Add
    dates = df['date'].values
    hwin_odds = df['odds_hwin'].values ; draw_odds = df['odds_draw'].values ; awin_odds = df['odds_awin'].values
    result = df['result'].values
    record_dfs = []
    features = []
    for home_or_away in ['h', 'a']:
        for loc_record in ['h_', 'a_']:
            record_df = get_records(df, home_or_away, loc_record)
            new_cols = {col: home_or_away + '_' + col for col in record_df.columns if col not in home_or_away}
            record_df.rename(columns=new_cols, inplace=True)
            record_dfs.append(record_df)
            features.extend([col for col in record_df.columns if loc_record in col])

    full_records = pd.concat(record_dfs, axis=1, sort=True)
    # Drop any duplicate columns - h and a get duplicated as a multiple of number of features
    full_records = full_records.loc[:,~full_records.columns.duplicated()]
    full_records['date'] = dates
    full_records['odds_hwin'] = hwin_odds
    full_records['odds_draw'] = draw_odds
    full_records['odds_awin'] = awin_odds
    full_records['result'] = result

    return full_records

In [7]:
df = season_df.copy(deep=True)
df = form_historical_records(df)
df.head()

Unnamed: 0,h,h_h_ftgoals_0,h_h_ftgoals_-1,h_h_ftgoals_-2,h_h_ftgoals_-3,h_h_ftgoals_-4,h_h_ftgoals_-5,h_h_ftgoals_-6,h_h_ftgoals_-7,h_h_ftgoals_-8,h_h_ftgoals_-9,h_h_ftgoals_-10,h_h_ftgoals_-11,h_h_ftgoals_-12,h_h_ftgoals_-13,h_h_ftgoals_-14,h_h_ftgoals_-15,h_h_ftgoals_-16,h_h_ftgoals_-17,h_h_ftgoals_-18,h_h_shots_0,h_h_shots_-1,h_h_shots_-2,h_h_shots_-3,h_h_shots_-4,h_h_shots_-5,h_h_shots_-6,h_h_shots_-7,h_h_shots_-8,h_h_shots_-9,h_h_shots_-10,h_h_shots_-11,h_h_shots_-12,h_h_shots_-13,h_h_shots_-14,h_h_shots_-15,h_h_shots_-16,h_h_shots_-17,h_h_shots_-18,h_a_ftgoals_0,h_a_ftgoals_-1,h_a_ftgoals_-2,h_a_ftgoals_-3,h_a_ftgoals_-4,h_a_ftgoals_-5,h_a_ftgoals_-6,h_a_ftgoals_-7,h_a_ftgoals_-8,h_a_ftgoals_-9,h_a_ftgoals_-10,h_a_ftgoals_-11,h_a_ftgoals_-12,h_a_ftgoals_-13,h_a_ftgoals_-14,h_a_ftgoals_-15,h_a_ftgoals_-16,h_a_ftgoals_-17,h_a_ftgoals_-18,h_a_shots_0,h_a_shots_-1,h_a_shots_-2,h_a_shots_-3,h_a_shots_-4,h_a_shots_-5,h_a_shots_-6,h_a_shots_-7,h_a_shots_-8,h_a_shots_-9,h_a_shots_-10,h_a_shots_-11,h_a_shots_-12,h_a_shots_-13,h_a_shots_-14,h_a_shots_-15,h_a_shots_-16,h_a_shots_-17,h_a_shots_-18,a,a_h_ftgoals_0,a_h_ftgoals_-1,a_h_ftgoals_-2,a_h_ftgoals_-3,a_h_ftgoals_-4,a_h_ftgoals_-5,a_h_ftgoals_-6,a_h_ftgoals_-7,a_h_ftgoals_-8,a_h_ftgoals_-9,a_h_ftgoals_-10,a_h_ftgoals_-11,a_h_ftgoals_-12,a_h_ftgoals_-13,a_h_ftgoals_-14,a_h_ftgoals_-15,a_h_ftgoals_-16,a_h_ftgoals_-17,a_h_ftgoals_-18,a_h_shots_0,a_h_shots_-1,a_h_shots_-2,a_h_shots_-3,a_h_shots_-4,a_h_shots_-5,a_h_shots_-6,a_h_shots_-7,a_h_shots_-8,a_h_shots_-9,a_h_shots_-10,a_h_shots_-11,a_h_shots_-12,a_h_shots_-13,a_h_shots_-14,a_h_shots_-15,a_h_shots_-16,a_h_shots_-17,a_h_shots_-18,a_a_ftgoals_0,a_a_ftgoals_-1,a_a_ftgoals_-2,a_a_ftgoals_-3,a_a_ftgoals_-4,a_a_ftgoals_-5,a_a_ftgoals_-6,a_a_ftgoals_-7,a_a_ftgoals_-8,a_a_ftgoals_-9,a_a_ftgoals_-10,a_a_ftgoals_-11,a_a_ftgoals_-12,a_a_ftgoals_-13,a_a_ftgoals_-14,a_a_ftgoals_-15,a_a_ftgoals_-16,a_a_ftgoals_-17,a_a_ftgoals_-18,a_a_shots_0,a_a_shots_-1,a_a_shots_-2,a_a_shots_-3,a_a_shots_-4,a_a_shots_-5,a_a_shots_-6,a_a_shots_-7,a_a_shots_-8,a_a_shots_-9,a_a_shots_-10,a_a_shots_-11,a_a_shots_-12,a_a_shots_-13,a_a_shots_-14,a_a_shots_-15,a_a_shots_-16,a_a_shots_-17,a_a_shots_-18,date,odds_hwin,odds_draw,odds_awin,result
0,aston-villa,0.0,,,,,,,,,,,,,,,,,,,11.0,,,,,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,14.0,,,,,,,,,,,,,,,,,,,wigan-athletic,0.0,,,,,,,,,,,,,,,,,,,11.0,,,,,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,14.0,,,,,,,,,,,,,,,,,,,2009-08-15,1.66,3.51,5.33,awin
1,blackburn-rovers,0.0,,,,,,,,,,,,,,,,,,,17.0,,,,,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,8.0,,,,,,,,,,,,,,,,,,,manchester-city,0.0,,,,,,,,,,,,,,,,,,,17.0,,,,,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,8.0,,,,,,,,,,,,,,,,,,,2009-08-15,3.37,3.24,2.12,awin
2,bolton-wanderers,0.0,,,,,,,,,,,,,,,,,,,11.0,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,20.0,,,,,,,,,,,,,,,,,,,sunderland,0.0,,,,,,,,,,,,,,,,,,,11.0,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,20.0,,,,,,,,,,,,,,,,,,,2009-08-15,2.24,3.2,3.15,awin
3,chelsea,2.0,,,,,,,,,,,,,,,,,,,26.0,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,7.0,,,,,,,,,,,,,,,,,,,hull-city,2.0,,,,,,,,,,,,,,,,,,,26.0,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,7.0,,,,,,,,,,,,,,,,,,,2009-08-15,1.17,6.26,16.39,hwin
4,everton,1.0,,,,,,,,,,,,,,,,,,,8.0,,,,,,,,,,,,,,,,,,,6.0,,,,,,,,,,,,,,,,,,,15.0,,,,,,,,,,,,,,,,,,,arsenal,1.0,,,,,,,,,,,,,,,,,,,8.0,,,,,,,,,,,,,,,,,,,6.0,,,,,,,,,,,,,,,,,,,15.0,,,,,,,,,,,,,,,,,,,2009-08-15,3.07,3.21,2.28,awin


In [8]:
df.tail()

Unnamed: 0,h,h_h_ftgoals_0,h_h_ftgoals_-1,h_h_ftgoals_-2,h_h_ftgoals_-3,h_h_ftgoals_-4,h_h_ftgoals_-5,h_h_ftgoals_-6,h_h_ftgoals_-7,h_h_ftgoals_-8,h_h_ftgoals_-9,h_h_ftgoals_-10,h_h_ftgoals_-11,h_h_ftgoals_-12,h_h_ftgoals_-13,h_h_ftgoals_-14,h_h_ftgoals_-15,h_h_ftgoals_-16,h_h_ftgoals_-17,h_h_ftgoals_-18,h_h_shots_0,h_h_shots_-1,h_h_shots_-2,h_h_shots_-3,h_h_shots_-4,h_h_shots_-5,h_h_shots_-6,h_h_shots_-7,h_h_shots_-8,h_h_shots_-9,h_h_shots_-10,h_h_shots_-11,h_h_shots_-12,h_h_shots_-13,h_h_shots_-14,h_h_shots_-15,h_h_shots_-16,h_h_shots_-17,h_h_shots_-18,h_a_ftgoals_0,h_a_ftgoals_-1,h_a_ftgoals_-2,h_a_ftgoals_-3,h_a_ftgoals_-4,h_a_ftgoals_-5,h_a_ftgoals_-6,h_a_ftgoals_-7,h_a_ftgoals_-8,h_a_ftgoals_-9,h_a_ftgoals_-10,h_a_ftgoals_-11,h_a_ftgoals_-12,h_a_ftgoals_-13,h_a_ftgoals_-14,h_a_ftgoals_-15,h_a_ftgoals_-16,h_a_ftgoals_-17,h_a_ftgoals_-18,h_a_shots_0,h_a_shots_-1,h_a_shots_-2,h_a_shots_-3,h_a_shots_-4,h_a_shots_-5,h_a_shots_-6,h_a_shots_-7,h_a_shots_-8,h_a_shots_-9,h_a_shots_-10,h_a_shots_-11,h_a_shots_-12,h_a_shots_-13,h_a_shots_-14,h_a_shots_-15,h_a_shots_-16,h_a_shots_-17,h_a_shots_-18,a,a_h_ftgoals_0,a_h_ftgoals_-1,a_h_ftgoals_-2,a_h_ftgoals_-3,a_h_ftgoals_-4,a_h_ftgoals_-5,a_h_ftgoals_-6,a_h_ftgoals_-7,a_h_ftgoals_-8,a_h_ftgoals_-9,a_h_ftgoals_-10,a_h_ftgoals_-11,a_h_ftgoals_-12,a_h_ftgoals_-13,a_h_ftgoals_-14,a_h_ftgoals_-15,a_h_ftgoals_-16,a_h_ftgoals_-17,a_h_ftgoals_-18,a_h_shots_0,a_h_shots_-1,a_h_shots_-2,a_h_shots_-3,a_h_shots_-4,a_h_shots_-5,a_h_shots_-6,a_h_shots_-7,a_h_shots_-8,a_h_shots_-9,a_h_shots_-10,a_h_shots_-11,a_h_shots_-12,a_h_shots_-13,a_h_shots_-14,a_h_shots_-15,a_h_shots_-16,a_h_shots_-17,a_h_shots_-18,a_a_ftgoals_0,a_a_ftgoals_-1,a_a_ftgoals_-2,a_a_ftgoals_-3,a_a_ftgoals_-4,a_a_ftgoals_-5,a_a_ftgoals_-6,a_a_ftgoals_-7,a_a_ftgoals_-8,a_a_ftgoals_-9,a_a_ftgoals_-10,a_a_ftgoals_-11,a_a_ftgoals_-12,a_a_ftgoals_-13,a_a_ftgoals_-14,a_a_ftgoals_-15,a_a_ftgoals_-16,a_a_ftgoals_-17,a_a_ftgoals_-18,a_a_shots_0,a_a_shots_-1,a_a_shots_-2,a_a_shots_-3,a_a_shots_-4,a_a_shots_-5,a_a_shots_-6,a_a_shots_-7,a_a_shots_-8,a_a_shots_-9,a_a_shots_-10,a_a_shots_-11,a_a_shots_-12,a_a_shots_-13,a_a_shots_-14,a_a_shots_-15,a_a_shots_-16,a_a_shots_-17,a_a_shots_-18,date,odds_hwin,odds_draw,odds_awin,result
375,wolverhampton-wanderers,2.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,2.0,0.0,1.0,1.0,0.0,2.0,1.0,0.0,14.0,14.0,10.0,9.0,12.0,11.0,8.0,6.0,9.0,11.0,12.0,12.0,8.0,8.0,8.0,11.0,12.0,24.0,19.0,1.0,1.0,0.0,0.0,1.0,2.0,0.0,0.0,2.0,3.0,0.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,2.0,11.0,12.0,9.0,22.0,15.0,7.0,10.0,6.0,20.0,12.0,10.0,17.0,13.0,11.0,12.0,12.0,3.0,5.0,16.0,sunderland,2.0,0.0,1.0,3.0,1.0,2.0,1.0,2.0,7.0,2.0,4.0,1.0,1.0,2.0,2.0,2.0,3.0,1.0,0.0,14.0,14.0,10.0,25.0,11.0,16.0,12.0,11.0,23.0,21.0,11.0,16.0,16.0,10.0,19.0,17.0,10.0,13.0,11.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,2.0,2.0,3.0,0.0,0.0,0.0,1.0,2.0,1.0,0.0,1.0,11.0,10.0,9.0,7.0,10.0,6.0,7.0,5.0,7.0,13.0,7.0,8.0,9.0,14.0,14.0,4.0,8.0,9.0,20.0,2010-05-09,2.3,3.25,3.08,hwin
376,west-ham-united,1.0,3.0,1.0,0.0,1.0,1.0,3.0,2.0,0.0,2.0,1.0,0.0,5.0,1.0,2.0,2.0,2.0,2.0,1.0,12.0,9.0,10.0,17.0,17.0,18.0,20.0,13.0,7.0,13.0,6.0,11.0,9.0,16.0,14.0,15.0,20.0,8.0,17.0,1.0,2.0,0.0,1.0,3.0,2.0,0.0,0.0,0.0,0.0,1.0,4.0,3.0,2.0,1.0,2.0,2.0,3.0,2.0,17.0,12.0,9.0,12.0,5.0,15.0,7.0,9.0,8.0,11.0,20.0,18.0,16.0,9.0,13.0,15.0,10.0,19.0,17.0,manchester-city,1.0,0.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,0.0,3.0,3.0,2.0,0.0,1.0,1.0,4.0,0.0,0.0,12.0,9.0,14.0,10.0,12.0,23.0,8.0,8.0,13.0,11.0,19.0,12.0,10.0,7.0,13.0,12.0,21.0,11.0,17.0,1.0,0.0,6.0,2.0,1.0,4.0,1.0,1.0,0.0,3.0,0.0,3.0,2.0,0.0,1.0,1.0,3.0,1.0,2.0,17.0,3.0,20.0,14.0,13.0,8.0,9.0,17.0,8.0,12.0,9.0,14.0,9.0,14.0,5.0,9.0,10.0,12.0,8.0,2010-05-09,4.1,3.51,1.85,draw
377,manchester-united,4.0,3.0,1.0,2.0,3.0,3.0,5.0,4.0,3.0,5.0,3.0,0.0,3.0,2.0,2.0,2.0,4.0,2.0,1.0,18.0,15.0,9.0,10.0,33.0,12.0,21.0,25.0,24.0,23.0,12.0,16.0,18.0,20.0,13.0,17.0,21.0,10.0,26.0,0.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,3.0,1.0,0.0,4.0,9.0,11.0,4.0,8.0,10.0,7.0,8.0,11.0,11.0,4.0,8.0,11.0,2.0,15.0,4.0,10.0,9.0,6.0,stoke-city,4.0,0.0,7.0,0.0,0.0,1.0,1.0,1.0,0.0,2.0,1.0,2.0,0.0,2.0,0.0,1.0,1.0,0.0,4.0,18.0,12.0,29.0,10.0,17.0,10.0,15.0,16.0,6.0,12.0,17.0,12.0,11.0,16.0,22.0,24.0,11.0,8.0,18.0,0.0,1.0,0.0,0.0,1.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,4.0,5.0,3.0,9.0,12.0,8.0,14.0,10.0,4.0,7.0,13.0,5.0,7.0,7.0,8.0,8.0,10.0,13.0,6.0,2010-05-09,1.12,7.98,20.73,hwin
378,aston-villa,0.0,1.0,2.0,1.0,2.0,5.0,1.0,0.0,0.0,0.0,1.0,3.0,1.0,5.0,2.0,1.0,2.0,2.0,0.0,16.0,10.0,10.0,11.0,11.0,15.0,10.0,8.0,17.0,11.0,17.0,13.0,12.0,19.0,7.0,12.0,7.0,9.0,11.0,1.0,0.0,2.0,1.0,2.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,2.0,9.0,16.0,13.0,10.0,7.0,13.0,12.0,12.0,8.0,12.0,13.0,5.0,23.0,13.0,21.0,9.0,17.0,6.0,14.0,blackburn-rovers,0.0,1.0,0.0,0.0,3.0,2.0,3.0,0.0,4.0,1.0,2.0,0.0,3.0,0.0,2.0,5.0,6.0,3.0,2.0,16.0,14.0,6.0,7.0,14.0,12.0,15.0,7.0,12.0,14.0,13.0,9.0,8.0,15.0,20.0,26.0,24.0,18.0,8.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,2.0,0.0,0.0,2.0,0.0,1.0,9.0,12.0,16.0,12.0,10.0,12.0,9.0,8.0,8.0,8.0,8.0,14.0,7.0,15.0,2.0,5.0,7.0,7.0,18.0,2010-05-09,1.55,3.85,6.18,awin
379,everton,1.0,2.0,2.0,2.0,5.0,3.0,2.0,2.0,2.0,2.0,1.0,2.0,0.0,1.0,1.0,1.0,3.0,2.0,1.0,21.0,15.0,10.0,18.0,18.0,11.0,7.0,11.0,13.0,24.0,19.0,15.0,12.0,9.0,13.0,24.0,18.0,26.0,8.0,0.0,1.0,2.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,2.0,1.0,1.0,1.0,0.0,1.0,6.0,10.0,8.0,10.0,6.0,7.0,11.0,14.0,5.0,8.0,12.0,2.0,19.0,9.0,13.0,12.0,8.0,7.0,12.0,15.0,portsmouth,1.0,2.0,0.0,2.0,4.0,1.0,5.0,1.0,2.0,2.0,2.0,1.0,1.0,3.0,0.0,0.0,2.0,4.0,1.0,21.0,14.0,24.0,16.0,23.0,14.0,21.0,14.0,13.0,13.0,21.0,16.0,9.0,10.0,7.0,11.0,7.0,19.0,9.0,0.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,10.0,13.0,6.0,8.0,7.0,13.0,7.0,12.0,10.0,11.0,7.0,9.0,14.0,9.0,8.0,12.0,17.0,9.0,9.0,2010-05-09,1.28,5.23,10.38,hwin


In [9]:
save_fp = PROCESSED_DATA_DIR / league / get_season[0] / str(league + '--' + get_season[0] + '.csv')
df.to_csv(save_fp, index=False)

In [10]:
df_load = pd.read_csv(save_fp, parse_dates = ['date'], index_col=None)
df_load.head()

Unnamed: 0,h,h_h_ftgoals_0,h_h_ftgoals_-1,h_h_ftgoals_-2,h_h_ftgoals_-3,h_h_ftgoals_-4,h_h_ftgoals_-5,h_h_ftgoals_-6,h_h_ftgoals_-7,h_h_ftgoals_-8,h_h_ftgoals_-9,h_h_ftgoals_-10,h_h_ftgoals_-11,h_h_ftgoals_-12,h_h_ftgoals_-13,h_h_ftgoals_-14,h_h_ftgoals_-15,h_h_ftgoals_-16,h_h_ftgoals_-17,h_h_ftgoals_-18,h_h_shots_0,h_h_shots_-1,h_h_shots_-2,h_h_shots_-3,h_h_shots_-4,h_h_shots_-5,h_h_shots_-6,h_h_shots_-7,h_h_shots_-8,h_h_shots_-9,h_h_shots_-10,h_h_shots_-11,h_h_shots_-12,h_h_shots_-13,h_h_shots_-14,h_h_shots_-15,h_h_shots_-16,h_h_shots_-17,h_h_shots_-18,h_a_ftgoals_0,h_a_ftgoals_-1,h_a_ftgoals_-2,h_a_ftgoals_-3,h_a_ftgoals_-4,h_a_ftgoals_-5,h_a_ftgoals_-6,h_a_ftgoals_-7,h_a_ftgoals_-8,h_a_ftgoals_-9,h_a_ftgoals_-10,h_a_ftgoals_-11,h_a_ftgoals_-12,h_a_ftgoals_-13,h_a_ftgoals_-14,h_a_ftgoals_-15,h_a_ftgoals_-16,h_a_ftgoals_-17,h_a_ftgoals_-18,h_a_shots_0,h_a_shots_-1,h_a_shots_-2,h_a_shots_-3,h_a_shots_-4,h_a_shots_-5,h_a_shots_-6,h_a_shots_-7,h_a_shots_-8,h_a_shots_-9,h_a_shots_-10,h_a_shots_-11,h_a_shots_-12,h_a_shots_-13,h_a_shots_-14,h_a_shots_-15,h_a_shots_-16,h_a_shots_-17,h_a_shots_-18,a,a_h_ftgoals_0,a_h_ftgoals_-1,a_h_ftgoals_-2,a_h_ftgoals_-3,a_h_ftgoals_-4,a_h_ftgoals_-5,a_h_ftgoals_-6,a_h_ftgoals_-7,a_h_ftgoals_-8,a_h_ftgoals_-9,a_h_ftgoals_-10,a_h_ftgoals_-11,a_h_ftgoals_-12,a_h_ftgoals_-13,a_h_ftgoals_-14,a_h_ftgoals_-15,a_h_ftgoals_-16,a_h_ftgoals_-17,a_h_ftgoals_-18,a_h_shots_0,a_h_shots_-1,a_h_shots_-2,a_h_shots_-3,a_h_shots_-4,a_h_shots_-5,a_h_shots_-6,a_h_shots_-7,a_h_shots_-8,a_h_shots_-9,a_h_shots_-10,a_h_shots_-11,a_h_shots_-12,a_h_shots_-13,a_h_shots_-14,a_h_shots_-15,a_h_shots_-16,a_h_shots_-17,a_h_shots_-18,a_a_ftgoals_0,a_a_ftgoals_-1,a_a_ftgoals_-2,a_a_ftgoals_-3,a_a_ftgoals_-4,a_a_ftgoals_-5,a_a_ftgoals_-6,a_a_ftgoals_-7,a_a_ftgoals_-8,a_a_ftgoals_-9,a_a_ftgoals_-10,a_a_ftgoals_-11,a_a_ftgoals_-12,a_a_ftgoals_-13,a_a_ftgoals_-14,a_a_ftgoals_-15,a_a_ftgoals_-16,a_a_ftgoals_-17,a_a_ftgoals_-18,a_a_shots_0,a_a_shots_-1,a_a_shots_-2,a_a_shots_-3,a_a_shots_-4,a_a_shots_-5,a_a_shots_-6,a_a_shots_-7,a_a_shots_-8,a_a_shots_-9,a_a_shots_-10,a_a_shots_-11,a_a_shots_-12,a_a_shots_-13,a_a_shots_-14,a_a_shots_-15,a_a_shots_-16,a_a_shots_-17,a_a_shots_-18,date,odds_hwin,odds_draw,odds_awin,result
0,aston-villa,0.0,,,,,,,,,,,,,,,,,,,11.0,,,,,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,14.0,,,,,,,,,,,,,,,,,,,wigan-athletic,0.0,,,,,,,,,,,,,,,,,,,11.0,,,,,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,14.0,,,,,,,,,,,,,,,,,,,2009-08-15,1.66,3.51,5.33,awin
1,blackburn-rovers,0.0,,,,,,,,,,,,,,,,,,,17.0,,,,,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,8.0,,,,,,,,,,,,,,,,,,,manchester-city,0.0,,,,,,,,,,,,,,,,,,,17.0,,,,,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,8.0,,,,,,,,,,,,,,,,,,,2009-08-15,3.37,3.24,2.12,awin
2,bolton-wanderers,0.0,,,,,,,,,,,,,,,,,,,11.0,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,20.0,,,,,,,,,,,,,,,,,,,sunderland,0.0,,,,,,,,,,,,,,,,,,,11.0,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,20.0,,,,,,,,,,,,,,,,,,,2009-08-15,2.24,3.2,3.15,awin
3,chelsea,2.0,,,,,,,,,,,,,,,,,,,26.0,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,7.0,,,,,,,,,,,,,,,,,,,hull-city,2.0,,,,,,,,,,,,,,,,,,,26.0,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,7.0,,,,,,,,,,,,,,,,,,,2009-08-15,1.17,6.26,16.39,hwin
4,everton,1.0,,,,,,,,,,,,,,,,,,,8.0,,,,,,,,,,,,,,,,,,,6.0,,,,,,,,,,,,,,,,,,,15.0,,,,,,,,,,,,,,,,,,,arsenal,1.0,,,,,,,,,,,,,,,,,,,8.0,,,,,,,,,,,,,,,,,,,6.0,,,,,,,,,,,,,,,,,,,15.0,,,,,,,,,,,,,,,,,,,2009-08-15,3.07,3.21,2.28,awin


In [11]:
df_load.tail()

Unnamed: 0,h,h_h_ftgoals_0,h_h_ftgoals_-1,h_h_ftgoals_-2,h_h_ftgoals_-3,h_h_ftgoals_-4,h_h_ftgoals_-5,h_h_ftgoals_-6,h_h_ftgoals_-7,h_h_ftgoals_-8,h_h_ftgoals_-9,h_h_ftgoals_-10,h_h_ftgoals_-11,h_h_ftgoals_-12,h_h_ftgoals_-13,h_h_ftgoals_-14,h_h_ftgoals_-15,h_h_ftgoals_-16,h_h_ftgoals_-17,h_h_ftgoals_-18,h_h_shots_0,h_h_shots_-1,h_h_shots_-2,h_h_shots_-3,h_h_shots_-4,h_h_shots_-5,h_h_shots_-6,h_h_shots_-7,h_h_shots_-8,h_h_shots_-9,h_h_shots_-10,h_h_shots_-11,h_h_shots_-12,h_h_shots_-13,h_h_shots_-14,h_h_shots_-15,h_h_shots_-16,h_h_shots_-17,h_h_shots_-18,h_a_ftgoals_0,h_a_ftgoals_-1,h_a_ftgoals_-2,h_a_ftgoals_-3,h_a_ftgoals_-4,h_a_ftgoals_-5,h_a_ftgoals_-6,h_a_ftgoals_-7,h_a_ftgoals_-8,h_a_ftgoals_-9,h_a_ftgoals_-10,h_a_ftgoals_-11,h_a_ftgoals_-12,h_a_ftgoals_-13,h_a_ftgoals_-14,h_a_ftgoals_-15,h_a_ftgoals_-16,h_a_ftgoals_-17,h_a_ftgoals_-18,h_a_shots_0,h_a_shots_-1,h_a_shots_-2,h_a_shots_-3,h_a_shots_-4,h_a_shots_-5,h_a_shots_-6,h_a_shots_-7,h_a_shots_-8,h_a_shots_-9,h_a_shots_-10,h_a_shots_-11,h_a_shots_-12,h_a_shots_-13,h_a_shots_-14,h_a_shots_-15,h_a_shots_-16,h_a_shots_-17,h_a_shots_-18,a,a_h_ftgoals_0,a_h_ftgoals_-1,a_h_ftgoals_-2,a_h_ftgoals_-3,a_h_ftgoals_-4,a_h_ftgoals_-5,a_h_ftgoals_-6,a_h_ftgoals_-7,a_h_ftgoals_-8,a_h_ftgoals_-9,a_h_ftgoals_-10,a_h_ftgoals_-11,a_h_ftgoals_-12,a_h_ftgoals_-13,a_h_ftgoals_-14,a_h_ftgoals_-15,a_h_ftgoals_-16,a_h_ftgoals_-17,a_h_ftgoals_-18,a_h_shots_0,a_h_shots_-1,a_h_shots_-2,a_h_shots_-3,a_h_shots_-4,a_h_shots_-5,a_h_shots_-6,a_h_shots_-7,a_h_shots_-8,a_h_shots_-9,a_h_shots_-10,a_h_shots_-11,a_h_shots_-12,a_h_shots_-13,a_h_shots_-14,a_h_shots_-15,a_h_shots_-16,a_h_shots_-17,a_h_shots_-18,a_a_ftgoals_0,a_a_ftgoals_-1,a_a_ftgoals_-2,a_a_ftgoals_-3,a_a_ftgoals_-4,a_a_ftgoals_-5,a_a_ftgoals_-6,a_a_ftgoals_-7,a_a_ftgoals_-8,a_a_ftgoals_-9,a_a_ftgoals_-10,a_a_ftgoals_-11,a_a_ftgoals_-12,a_a_ftgoals_-13,a_a_ftgoals_-14,a_a_ftgoals_-15,a_a_ftgoals_-16,a_a_ftgoals_-17,a_a_ftgoals_-18,a_a_shots_0,a_a_shots_-1,a_a_shots_-2,a_a_shots_-3,a_a_shots_-4,a_a_shots_-5,a_a_shots_-6,a_a_shots_-7,a_a_shots_-8,a_a_shots_-9,a_a_shots_-10,a_a_shots_-11,a_a_shots_-12,a_a_shots_-13,a_a_shots_-14,a_a_shots_-15,a_a_shots_-16,a_a_shots_-17,a_a_shots_-18,date,odds_hwin,odds_draw,odds_awin,result
375,wolverhampton-wanderers,2.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,2.0,0.0,1.0,1.0,0.0,2.0,1.0,0.0,14.0,14.0,10.0,9.0,12.0,11.0,8.0,6.0,9.0,11.0,12.0,12.0,8.0,8.0,8.0,11.0,12.0,24.0,19.0,1.0,1.0,0.0,0.0,1.0,2.0,0.0,0.0,2.0,3.0,0.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,2.0,11.0,12.0,9.0,22.0,15.0,7.0,10.0,6.0,20.0,12.0,10.0,17.0,13.0,11.0,12.0,12.0,3.0,5.0,16.0,sunderland,2.0,0.0,1.0,3.0,1.0,2.0,1.0,2.0,7.0,2.0,4.0,1.0,1.0,2.0,2.0,2.0,3.0,1.0,0.0,14.0,14.0,10.0,25.0,11.0,16.0,12.0,11.0,23.0,21.0,11.0,16.0,16.0,10.0,19.0,17.0,10.0,13.0,11.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,2.0,2.0,3.0,0.0,0.0,0.0,1.0,2.0,1.0,0.0,1.0,11.0,10.0,9.0,7.0,10.0,6.0,7.0,5.0,7.0,13.0,7.0,8.0,9.0,14.0,14.0,4.0,8.0,9.0,20.0,2010-05-09,2.3,3.25,3.08,hwin
376,west-ham-united,1.0,3.0,1.0,0.0,1.0,1.0,3.0,2.0,0.0,2.0,1.0,0.0,5.0,1.0,2.0,2.0,2.0,2.0,1.0,12.0,9.0,10.0,17.0,17.0,18.0,20.0,13.0,7.0,13.0,6.0,11.0,9.0,16.0,14.0,15.0,20.0,8.0,17.0,1.0,2.0,0.0,1.0,3.0,2.0,0.0,0.0,0.0,0.0,1.0,4.0,3.0,2.0,1.0,2.0,2.0,3.0,2.0,17.0,12.0,9.0,12.0,5.0,15.0,7.0,9.0,8.0,11.0,20.0,18.0,16.0,9.0,13.0,15.0,10.0,19.0,17.0,manchester-city,1.0,0.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,0.0,3.0,3.0,2.0,0.0,1.0,1.0,4.0,0.0,0.0,12.0,9.0,14.0,10.0,12.0,23.0,8.0,8.0,13.0,11.0,19.0,12.0,10.0,7.0,13.0,12.0,21.0,11.0,17.0,1.0,0.0,6.0,2.0,1.0,4.0,1.0,1.0,0.0,3.0,0.0,3.0,2.0,0.0,1.0,1.0,3.0,1.0,2.0,17.0,3.0,20.0,14.0,13.0,8.0,9.0,17.0,8.0,12.0,9.0,14.0,9.0,14.0,5.0,9.0,10.0,12.0,8.0,2010-05-09,4.1,3.51,1.85,draw
377,manchester-united,4.0,3.0,1.0,2.0,3.0,3.0,5.0,4.0,3.0,5.0,3.0,0.0,3.0,2.0,2.0,2.0,4.0,2.0,1.0,18.0,15.0,9.0,10.0,33.0,12.0,21.0,25.0,24.0,23.0,12.0,16.0,18.0,20.0,13.0,17.0,21.0,10.0,26.0,0.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,3.0,1.0,0.0,4.0,9.0,11.0,4.0,8.0,10.0,7.0,8.0,11.0,11.0,4.0,8.0,11.0,2.0,15.0,4.0,10.0,9.0,6.0,stoke-city,4.0,0.0,7.0,0.0,0.0,1.0,1.0,1.0,0.0,2.0,1.0,2.0,0.0,2.0,0.0,1.0,1.0,0.0,4.0,18.0,12.0,29.0,10.0,17.0,10.0,15.0,16.0,6.0,12.0,17.0,12.0,11.0,16.0,22.0,24.0,11.0,8.0,18.0,0.0,1.0,0.0,0.0,1.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,4.0,5.0,3.0,9.0,12.0,8.0,14.0,10.0,4.0,7.0,13.0,5.0,7.0,7.0,8.0,8.0,10.0,13.0,6.0,2010-05-09,1.12,7.98,20.73,hwin
378,aston-villa,0.0,1.0,2.0,1.0,2.0,5.0,1.0,0.0,0.0,0.0,1.0,3.0,1.0,5.0,2.0,1.0,2.0,2.0,0.0,16.0,10.0,10.0,11.0,11.0,15.0,10.0,8.0,17.0,11.0,17.0,13.0,12.0,19.0,7.0,12.0,7.0,9.0,11.0,1.0,0.0,2.0,1.0,2.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,2.0,9.0,16.0,13.0,10.0,7.0,13.0,12.0,12.0,8.0,12.0,13.0,5.0,23.0,13.0,21.0,9.0,17.0,6.0,14.0,blackburn-rovers,0.0,1.0,0.0,0.0,3.0,2.0,3.0,0.0,4.0,1.0,2.0,0.0,3.0,0.0,2.0,5.0,6.0,3.0,2.0,16.0,14.0,6.0,7.0,14.0,12.0,15.0,7.0,12.0,14.0,13.0,9.0,8.0,15.0,20.0,26.0,24.0,18.0,8.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,2.0,0.0,0.0,2.0,0.0,1.0,9.0,12.0,16.0,12.0,10.0,12.0,9.0,8.0,8.0,8.0,8.0,14.0,7.0,15.0,2.0,5.0,7.0,7.0,18.0,2010-05-09,1.55,3.85,6.18,awin
379,everton,1.0,2.0,2.0,2.0,5.0,3.0,2.0,2.0,2.0,2.0,1.0,2.0,0.0,1.0,1.0,1.0,3.0,2.0,1.0,21.0,15.0,10.0,18.0,18.0,11.0,7.0,11.0,13.0,24.0,19.0,15.0,12.0,9.0,13.0,24.0,18.0,26.0,8.0,0.0,1.0,2.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,2.0,1.0,1.0,1.0,0.0,1.0,6.0,10.0,8.0,10.0,6.0,7.0,11.0,14.0,5.0,8.0,12.0,2.0,19.0,9.0,13.0,12.0,8.0,7.0,12.0,15.0,portsmouth,1.0,2.0,0.0,2.0,4.0,1.0,5.0,1.0,2.0,2.0,2.0,1.0,1.0,3.0,0.0,0.0,2.0,4.0,1.0,21.0,14.0,24.0,16.0,23.0,14.0,21.0,14.0,13.0,13.0,21.0,16.0,9.0,10.0,7.0,11.0,7.0,19.0,9.0,0.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,10.0,13.0,6.0,8.0,7.0,13.0,7.0,12.0,10.0,11.0,7.0,9.0,14.0,9.0,8.0,12.0,17.0,9.0,9.0,2010-05-09,1.28,5.23,10.38,hwin


In [15]:
df_load.info(verbose=True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 380 entries, 0 to 379
Data columns (total 159 columns):
h                  380 non-null object
h_h_ftgoals_0      380 non-null float64
h_h_ftgoals_-1     360 non-null float64
h_h_ftgoals_-2     340 non-null float64
h_h_ftgoals_-3     320 non-null float64
h_h_ftgoals_-4     300 non-null float64
h_h_ftgoals_-5     280 non-null float64
h_h_ftgoals_-6     260 non-null float64
h_h_ftgoals_-7     240 non-null float64
h_h_ftgoals_-8     220 non-null float64
h_h_ftgoals_-9     200 non-null float64
h_h_ftgoals_-10    180 non-null float64
h_h_ftgoals_-11    160 non-null float64
h_h_ftgoals_-12    140 non-null float64
h_h_ftgoals_-13    120 non-null float64
h_h_ftgoals_-14    100 non-null float64
h_h_ftgoals_-15    80 non-null float64
h_h_ftgoals_-16    60 non-null float64
h_h_ftgoals_-17    40 non-null float64
h_h_ftgoals_-18    20 non-null float64
h_h_shots_0        380 non-null float64
h_h_shots_-1       360 non-null float64
h_h_shots_-2       

In [13]:
df_load.describe()

Unnamed: 0,h_h_ftgoals_0,h_h_ftgoals_-1,h_h_ftgoals_-2,h_h_ftgoals_-3,h_h_ftgoals_-4,h_h_ftgoals_-5,h_h_ftgoals_-6,h_h_ftgoals_-7,h_h_ftgoals_-8,h_h_ftgoals_-9,h_h_ftgoals_-10,h_h_ftgoals_-11,h_h_ftgoals_-12,h_h_ftgoals_-13,h_h_ftgoals_-14,h_h_ftgoals_-15,h_h_ftgoals_-16,h_h_ftgoals_-17,h_h_ftgoals_-18,h_h_shots_0,h_h_shots_-1,h_h_shots_-2,h_h_shots_-3,h_h_shots_-4,h_h_shots_-5,h_h_shots_-6,h_h_shots_-7,h_h_shots_-8,h_h_shots_-9,h_h_shots_-10,h_h_shots_-11,h_h_shots_-12,h_h_shots_-13,h_h_shots_-14,h_h_shots_-15,h_h_shots_-16,h_h_shots_-17,h_h_shots_-18,h_a_ftgoals_0,h_a_ftgoals_-1,h_a_ftgoals_-2,h_a_ftgoals_-3,h_a_ftgoals_-4,h_a_ftgoals_-5,h_a_ftgoals_-6,h_a_ftgoals_-7,h_a_ftgoals_-8,h_a_ftgoals_-9,h_a_ftgoals_-10,h_a_ftgoals_-11,h_a_ftgoals_-12,h_a_ftgoals_-13,h_a_ftgoals_-14,h_a_ftgoals_-15,h_a_ftgoals_-16,h_a_ftgoals_-17,h_a_ftgoals_-18,h_a_shots_0,h_a_shots_-1,h_a_shots_-2,h_a_shots_-3,h_a_shots_-4,h_a_shots_-5,h_a_shots_-6,h_a_shots_-7,h_a_shots_-8,h_a_shots_-9,h_a_shots_-10,h_a_shots_-11,h_a_shots_-12,h_a_shots_-13,h_a_shots_-14,h_a_shots_-15,h_a_shots_-16,h_a_shots_-17,h_a_shots_-18,a_h_ftgoals_0,a_h_ftgoals_-1,a_h_ftgoals_-2,a_h_ftgoals_-3,a_h_ftgoals_-4,a_h_ftgoals_-5,a_h_ftgoals_-6,a_h_ftgoals_-7,a_h_ftgoals_-8,a_h_ftgoals_-9,a_h_ftgoals_-10,a_h_ftgoals_-11,a_h_ftgoals_-12,a_h_ftgoals_-13,a_h_ftgoals_-14,a_h_ftgoals_-15,a_h_ftgoals_-16,a_h_ftgoals_-17,a_h_ftgoals_-18,a_h_shots_0,a_h_shots_-1,a_h_shots_-2,a_h_shots_-3,a_h_shots_-4,a_h_shots_-5,a_h_shots_-6,a_h_shots_-7,a_h_shots_-8,a_h_shots_-9,a_h_shots_-10,a_h_shots_-11,a_h_shots_-12,a_h_shots_-13,a_h_shots_-14,a_h_shots_-15,a_h_shots_-16,a_h_shots_-17,a_h_shots_-18,a_a_ftgoals_0,a_a_ftgoals_-1,a_a_ftgoals_-2,a_a_ftgoals_-3,a_a_ftgoals_-4,a_a_ftgoals_-5,a_a_ftgoals_-6,a_a_ftgoals_-7,a_a_ftgoals_-8,a_a_ftgoals_-9,a_a_ftgoals_-10,a_a_ftgoals_-11,a_a_ftgoals_-12,a_a_ftgoals_-13,a_a_ftgoals_-14,a_a_ftgoals_-15,a_a_ftgoals_-16,a_a_ftgoals_-17,a_a_ftgoals_-18,a_a_shots_0,a_a_shots_-1,a_a_shots_-2,a_a_shots_-3,a_a_shots_-4,a_a_shots_-5,a_a_shots_-6,a_a_shots_-7,a_a_shots_-8,a_a_shots_-9,a_a_shots_-10,a_a_shots_-11,a_a_shots_-12,a_a_shots_-13,a_a_shots_-14,a_a_shots_-15,a_a_shots_-16,a_a_shots_-17,a_a_shots_-18,odds_hwin,odds_draw,odds_awin
count,380.0,360.0,340.0,320.0,300.0,280.0,260.0,240.0,220.0,200.0,180.0,160.0,140.0,120.0,100.0,80.0,60.0,40.0,20.0,380.0,360.0,340.0,320.0,300.0,280.0,260.0,240.0,220.0,200.0,180.0,160.0,140.0,120.0,100.0,80.0,60.0,40.0,20.0,380.0,360.0,340.0,320.0,300.0,280.0,260.0,240.0,220.0,200.0,180.0,160.0,140.0,120.0,100.0,80.0,60.0,40.0,20.0,380.0,360.0,340.0,320.0,300.0,280.0,260.0,240.0,220.0,200.0,180.0,160.0,140.0,120.0,100.0,80.0,60.0,40.0,20.0,380.0,360.0,340.0,320.0,300.0,280.0,260.0,240.0,220.0,200.0,180.0,160.0,140.0,120.0,100.0,80.0,60.0,40.0,20.0,380.0,360.0,340.0,320.0,300.0,280.0,260.0,240.0,220.0,200.0,180.0,160.0,140.0,120.0,100.0,80.0,60.0,40.0,20.0,380.0,360.0,340.0,320.0,300.0,280.0,260.0,240.0,220.0,200.0,180.0,160.0,140.0,120.0,100.0,80.0,60.0,40.0,20.0,380.0,360.0,340.0,320.0,300.0,280.0,260.0,240.0,220.0,200.0,180.0,160.0,140.0,120.0,100.0,80.0,60.0,40.0,20.0,380.0,380.0,380.0
mean,1.697368,1.691667,1.676471,1.728125,1.726667,1.692857,1.684615,1.7125,1.727273,1.755,1.772222,1.79375,1.807143,1.791667,1.8,1.775,1.65,1.35,1.1,13.728947,13.713889,13.685294,13.765625,13.85,13.785714,13.892308,13.970833,14.077273,14.17,14.166667,14.1875,14.264286,14.483333,14.61,14.8,14.8,14.5,14.25,1.073684,1.088889,1.076471,1.084375,1.073333,1.067857,1.057692,1.095833,1.154545,1.18,1.177778,1.24375,1.214286,1.233333,1.23,1.2625,1.283333,1.325,1.5,10.718421,10.722222,10.773529,10.78125,10.746667,10.789286,10.8,10.866667,10.877273,10.945,10.911111,11.06875,11.135714,11.166667,11.24,11.375,11.666667,12.125,11.9,1.697368,1.675,1.708824,1.7125,1.733333,1.685714,1.7,1.704167,1.75,1.765,1.783333,1.75625,1.814286,1.791667,1.78,1.8,1.616667,1.225,1.05,13.728947,13.713889,13.764706,13.765625,13.843333,13.817857,13.919231,14.029167,14.118182,14.225,14.2,14.15625,14.228571,14.608333,14.7,14.6375,14.55,14.725,13.9,1.073684,1.086111,1.076471,1.071875,1.07,1.075,1.046154,1.091667,1.145455,1.18,1.194444,1.23125,1.228571,1.225,1.22,1.25,1.283333,1.3,1.45,10.718421,10.716667,10.758824,10.80625,10.803333,10.796429,10.826923,10.845833,10.831818,10.91,10.922222,11.14375,11.185714,11.233333,11.29,11.5625,11.6,11.8,12.6,2.805289,3.907868,5.179316
std,1.4675,1.432584,1.420885,1.431122,1.408985,1.40351,1.414403,1.42477,1.455012,1.426587,1.437062,1.475574,1.512045,1.389612,1.414214,1.466719,1.400061,1.210001,1.209611,5.379793,5.444972,5.477044,5.505034,5.532588,5.44215,5.531582,5.499637,5.531845,5.466306,5.537581,5.669279,5.831705,5.820831,5.863593,6.032402,6.398623,6.401122,6.273377,1.122964,1.140732,1.144451,1.126375,1.10394,1.119573,1.118698,1.12929,1.140266,1.159449,1.158672,1.190882,1.180085,1.179185,1.162156,1.219501,1.354111,1.508735,1.638356,4.510577,4.505095,4.56599,4.545378,4.505941,4.561239,4.643092,4.721416,4.70096,4.763974,4.713334,4.739931,4.768317,4.821753,4.660429,4.742416,5.027607,5.431095,4.876366,1.4675,1.434722,1.443298,1.426957,1.443086,1.3968,1.406823,1.425993,1.451263,1.421223,1.438788,1.469809,1.515321,1.389612,1.439557,1.512814,1.391154,1.097491,0.998683,5.379793,5.427037,5.5306,5.493063,5.519693,5.465995,5.47698,5.578199,5.574292,5.47212,5.554605,5.670787,5.854769,5.732549,5.902405,6.158752,6.320333,6.259301,6.223047,1.122964,1.139724,1.144451,1.116065,1.093505,1.119115,1.110578,1.131488,1.14546,1.163775,1.153558,1.182801,1.177382,1.170093,1.151021,1.216969,1.354111,1.505545,1.700619,4.510577,4.510928,4.574594,4.535471,4.541668,4.563136,4.613733,4.699905,4.735361,4.792362,4.734818,4.716953,4.792959,4.839844,4.68243,4.687294,5.022661,5.224204,5.413434,1.99092,1.026133,4.437064
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,6.0,1.09,3.17,1.26
25%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,10.75,9.75,9.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.75,8.0,7.75,8.0,8.0,8.0,8.0,8.75,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.75,10.0,10.0,10.0,10.75,11.0,10.0,9.75,9.0,8.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,8.0,8.0,8.0,8.0,8.0,8.0,7.75,7.75,1.55,3.26,2.3
50%,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,12.5,12.0,12.0,12.0,12.5,12.5,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,15.0,13.5,13.5,16.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.5,10.5,10.5,11.0,9.5,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,12.5,12.0,12.0,12.0,12.5,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,15.0,12.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.5,10.5,11.0,11.0,11.0,10.0,11.0,11.5,2.15,3.44,3.42
75%,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.25,2.25,2.0,2.0,2.25,2.0,2.0,2.0,1.25,17.0,17.0,17.0,17.0,17.25,17.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,19.0,19.0,19.0,19.25,19.0,18.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,13.0,13.0,13.0,13.25,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.25,16.0,17.0,16.25,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.25,2.0,2.25,2.0,2.0,2.25,2.0,2.0,1.25,17.0,17.0,17.0,17.0,17.0,17.0,17.25,18.0,18.0,18.0,18.0,18.0,18.0,19.0,19.0,19.0,19.0,19.0,18.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,13.0,13.0,13.0,13.25,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,15.0,16.0,16.0,18.0,3.0875,4.16,6.31
max,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,6.0,6.0,6.0,6.0,4.0,4.0,33.0,33.0,33.0,33.0,33.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,27.0,27.0,27.0,27.0,27.0,26.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,26.0,26.0,26.0,26.0,20.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,6.0,6.0,6.0,6.0,4.0,4.0,33.0,33.0,33.0,33.0,33.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,28.0,27.0,27.0,27.0,27.0,27.0,26.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,26.0,26.0,26.0,26.0,20.0,12.35,9.03,23.31


In [33]:
df_load.iloc[44:50]

Unnamed: 0,h,h_h_ftgoals_0,h_h_ftgoals_-1,h_h_ftgoals_-2,h_h_ftgoals_-3,h_h_ftgoals_-4,h_h_ftgoals_-5,h_h_ftgoals_-6,h_h_ftgoals_-7,h_h_ftgoals_-8,h_h_ftgoals_-9,h_h_ftgoals_-10,h_h_ftgoals_-11,h_h_ftgoals_-12,h_h_ftgoals_-13,h_h_ftgoals_-14,h_h_ftgoals_-15,h_h_ftgoals_-16,h_h_ftgoals_-17,h_h_ftgoals_-18,h_h_shots_0,h_h_shots_-1,h_h_shots_-2,h_h_shots_-3,h_h_shots_-4,h_h_shots_-5,h_h_shots_-6,h_h_shots_-7,h_h_shots_-8,h_h_shots_-9,h_h_shots_-10,h_h_shots_-11,h_h_shots_-12,h_h_shots_-13,h_h_shots_-14,h_h_shots_-15,h_h_shots_-16,h_h_shots_-17,h_h_shots_-18,h_a_ftgoals_0,h_a_ftgoals_-1,h_a_ftgoals_-2,h_a_ftgoals_-3,h_a_ftgoals_-4,h_a_ftgoals_-5,h_a_ftgoals_-6,h_a_ftgoals_-7,h_a_ftgoals_-8,h_a_ftgoals_-9,h_a_ftgoals_-10,h_a_ftgoals_-11,h_a_ftgoals_-12,h_a_ftgoals_-13,h_a_ftgoals_-14,h_a_ftgoals_-15,h_a_ftgoals_-16,h_a_ftgoals_-17,h_a_ftgoals_-18,h_a_shots_0,h_a_shots_-1,h_a_shots_-2,h_a_shots_-3,h_a_shots_-4,h_a_shots_-5,h_a_shots_-6,h_a_shots_-7,h_a_shots_-8,h_a_shots_-9,h_a_shots_-10,h_a_shots_-11,h_a_shots_-12,h_a_shots_-13,h_a_shots_-14,h_a_shots_-15,h_a_shots_-16,h_a_shots_-17,h_a_shots_-18,a,a_h_ftgoals_0,a_h_ftgoals_-1,a_h_ftgoals_-2,a_h_ftgoals_-3,a_h_ftgoals_-4,a_h_ftgoals_-5,a_h_ftgoals_-6,a_h_ftgoals_-7,a_h_ftgoals_-8,a_h_ftgoals_-9,a_h_ftgoals_-10,a_h_ftgoals_-11,a_h_ftgoals_-12,a_h_ftgoals_-13,a_h_ftgoals_-14,a_h_ftgoals_-15,a_h_ftgoals_-16,a_h_ftgoals_-17,a_h_ftgoals_-18,a_h_shots_0,a_h_shots_-1,a_h_shots_-2,a_h_shots_-3,a_h_shots_-4,a_h_shots_-5,a_h_shots_-6,a_h_shots_-7,a_h_shots_-8,a_h_shots_-9,a_h_shots_-10,a_h_shots_-11,a_h_shots_-12,a_h_shots_-13,a_h_shots_-14,a_h_shots_-15,a_h_shots_-16,a_h_shots_-17,a_h_shots_-18,a_a_ftgoals_0,a_a_ftgoals_-1,a_a_ftgoals_-2,a_a_ftgoals_-3,a_a_ftgoals_-4,a_a_ftgoals_-5,a_a_ftgoals_-6,a_a_ftgoals_-7,a_a_ftgoals_-8,a_a_ftgoals_-9,a_a_ftgoals_-10,a_a_ftgoals_-11,a_a_ftgoals_-12,a_a_ftgoals_-13,a_a_ftgoals_-14,a_a_ftgoals_-15,a_a_ftgoals_-16,a_a_ftgoals_-17,a_a_ftgoals_-18,a_a_shots_0,a_a_shots_-1,a_a_shots_-2,a_a_shots_-3,a_a_shots_-4,a_a_shots_-5,a_a_shots_-6,a_a_shots_-7,a_a_shots_-8,a_a_shots_-9,a_a_shots_-10,a_a_shots_-11,a_a_shots_-12,a_a_shots_-13,a_a_shots_-14,a_a_shots_-15,a_a_shots_-16,a_a_shots_-17,a_a_shots_-18,date,odds_hwin,odds_draw,odds_awin,result
44,birmingham-city,0.0,0.0,1.0,,,,,,,,,,,,,,,,,11.0,8.0,9.0,,,,,,,,,,,,,,,,,1.0,0.0,0.0,,,,,,,,,,,,,,,,,12.0,13.0,9.0,,,,,,,,,,,,,,,,,aston-villa,0.0,1.0,,,,,,,,,,,,,,,,,,11.0,21.0,,,,,,,,,,,,,,,,,,1.0,3.0,,,,,,,,,,,,,,,,,,12.0,7.0,,,,,,,,,,,,,,,,,,2009-09-13,3.28,3.25,2.2,awin
45,fulham,2.0,0.0,,,,,,,,,,,,,,,,,,14.0,4.0,,,,,,,,,,,,,,,,,,1.0,2.0,,,,,,,,,,,,,,,,,,6.0,12.0,,,,,,,,,,,,,,,,,,everton,2.0,1.0,,,,,,,,,,,,,,,,,,14.0,8.0,,,,,,,,,,,,,,,,,,1.0,0.0,,,,,,,,,,,,,,,,,,6.0,17.0,,,,,,,,,,,,,,,,,,2009-09-13,2.53,3.2,2.78,hwin
46,aston-villa,2.0,2.0,0.0,,,,,,,,,,,,,,,,,7.0,9.0,11.0,,,,,,,,,,,,,,,,,0.0,0.0,2.0,,,,,,,,,,,,,,,,,17.0,6.0,14.0,,,,,,,,,,,,,,,,,portsmouth,2.0,4.0,1.0,,,,,,,,,,,,,,,,,7.0,19.0,9.0,,,,,,,,,,,,,,,,,0.0,1.0,0.0,,,,,,,,,,,,,,,,,17.0,9.0,9.0,,,,,,,,,,,,,,,,,2009-09-19,1.4,4.13,9.09,hwin
47,bolton-wanderers,1.0,2.0,0.0,,,,,,,,,,,,,,,,,11.0,8.0,11.0,,,,,,,,,,,,,,,,,1.0,3.0,1.0,,,,,,,,,,,,,,,,,10.0,26.0,20.0,,,,,,,,,,,,,,,,,stoke-city,1.0,0.0,4.0,,,,,,,,,,,,,,,,,11.0,8.0,18.0,,,,,,,,,,,,,,,,,1.0,0.0,0.0,,,,,,,,,,,,,,,,,10.0,13.0,6.0,,,,,,,,,,,,,,,,,2009-09-19,2.09,3.24,3.55,draw
48,arsenal,4.0,4.0,,,,,,,,,,,,,,,,,,23.0,19.0,,,,,,,,,,,,,,,,,,0.0,1.0,,,,,,,,,,,,,,,,,,8.0,9.0,,,,,,,,,,,,,,,,,,wigan-athletic,4.0,2.0,0.0,,,,,,,,,,,,,,,,,23.0,26.0,11.0,,,,,,,,,,,,,,,,,0.0,1.0,2.0,,,,,,,,,,,,,,,,,8.0,12.0,14.0,,,,,,,,,,,,,,,,,2009-09-19,1.24,5.47,12.96,hwin
49,hull-city,0.0,1.0,1.0,,,,,,,,,,,,,,,,,16.0,12.0,9.0,,,,,,,,,,,,,,,,,1.0,0.0,5.0,,,,,,,,,,,,,,,,,14.0,20.0,18.0,,,,,,,,,,,,,,,,,birmingham-city,0.0,2.0,1.0,,,,,,,,,,,,,,,,,16.0,20.0,26.0,,,,,,,,,,,,,,,,,1.0,1.0,0.0,,,,,,,,,,,,,,,,,14.0,11.0,6.0,,,,,,,,,,,,,,,,,2009-09-19,2.35,3.22,3.01,awin


## Functions to Cut Data to Defined Shape prior to PreProcessing

In [27]:
import re

def get_n_past_games(df_orig, past_games=2, dropna_rows=True):
    """
    Restrict historical records to the last n games
    If n = 2, then will get
    home team
        last 2 games played at home ie. h_h_feature-1, h_h_feature-2
        last 2 games played away ie h_a_feature-1, h_a_feature-2
    away team
        last 2 games played at home ie a_h_feature-1, a_h_feature-2
        last 2 games played away ie a_a_feature-1, a_a_feature-2
    where -1 is the most recent game prior to the current game, and -2 is the game before that
    the current games result is in result
    """
    df = df_orig.copy(deep=True)
    keeper_cols = [col for col in df.columns if not col.split('_')[-1].isalpha() \
               and int(col.split('_')[-1]) > -(hist_games+1) \
               or col.isalpha() \
               or 'odds' in col]
    
    # Rename the target columns to start with prefix 'res_'
    res_cols = [col for col in df.columns if not col.split('_')[-1].isalpha() \
                and int(col.split('_')[-1]) == 0]
    res_cols.append('result')
    res_cols = [col.replace('0', 'res') for col in res_cols]
    print(res_cols)
    print(res_cols)
    df = df[keeper_cols]
    return df
    
df_cut = get_n_past_games(df_load)
# # Restrict Historical records to last 4 games per team
# df_cut = df_load.copy(deep=True)
# hist_games = 2
# keeper_cols = [col for col in df.columns if not col.split('_')[-1].isalpha() \
#                and int(col.split('_')[-1]) > -(hist_games+1) \
#                and int(col.split('_')[-1]) < 0 \
#                or col.isalpha() \
#                or 'odds' in col]
# df_cut = df_cut[keeper_cols]
df_cut.head()

['h_h_ftgoals_res', 'h_h_shots_res', 'h_a_ftgoals_res', 'h_a_shots_res', 'a_h_ftgoals_res', 'a_h_shots_res', 'a_a_ftgoals_res', 'a_a_shots_res', 'result']
['h_h_ftgoals_res', 'h_h_shots_res', 'h_a_ftgoals_res', 'h_a_shots_res', 'a_h_ftgoals_res', 'a_h_shots_res', 'a_a_ftgoals_res', 'a_a_shots_res', 'result']


Unnamed: 0,h,h_h_ftgoals_0,h_h_ftgoals_-1,h_h_ftgoals_-2,h_h_shots_0,h_h_shots_-1,h_h_shots_-2,h_a_ftgoals_0,h_a_ftgoals_-1,h_a_ftgoals_-2,h_a_shots_0,h_a_shots_-1,h_a_shots_-2,a,a_h_ftgoals_0,a_h_ftgoals_-1,a_h_ftgoals_-2,a_h_shots_0,a_h_shots_-1,a_h_shots_-2,a_a_ftgoals_0,a_a_ftgoals_-1,a_a_ftgoals_-2,a_a_shots_0,a_a_shots_-1,a_a_shots_-2,date,odds_hwin,odds_draw,odds_awin,result
0,aston-villa,0.0,,,11.0,,,2.0,,,14.0,,,wigan-athletic,0.0,,,11.0,,,2.0,,,14.0,,,2009-08-15,1.66,3.51,5.33,awin
1,blackburn-rovers,0.0,,,17.0,,,2.0,,,8.0,,,manchester-city,0.0,,,17.0,,,2.0,,,8.0,,,2009-08-15,3.37,3.24,2.12,awin
2,bolton-wanderers,0.0,,,11.0,,,1.0,,,20.0,,,sunderland,0.0,,,11.0,,,1.0,,,20.0,,,2009-08-15,2.24,3.2,3.15,awin
3,chelsea,2.0,,,26.0,,,1.0,,,7.0,,,hull-city,2.0,,,26.0,,,1.0,,,7.0,,,2009-08-15,1.17,6.26,16.39,hwin
4,everton,1.0,,,8.0,,,6.0,,,15.0,,,arsenal,1.0,,,8.0,,,6.0,,,15.0,,,2009-08-15,3.07,3.21,2.28,awin


In [19]:
import re

def get_n_past_games(df_orig, past_games=2, dropna_rows=True):
    """
    Restrict historical records to the last n games
    If n = 2, then will get
    home team
        last 2 games played at home ie. h_h_feature-1, h_h_feature-2
        last 2 games played away ie h_a_feature-1, h_a_feature-2
    away team
        last 2 games played at home ie a_h_feature-1, a_h_feature-2
        last 2 games played away ie a_a_feature-1, a_a_feature-2
    where -1 is the most recent game prior to the current game, and -2 is the game before that
    the current games result is in result
    """
    df = df_orig.copy(deep=True)
    keeper_cols = [col for col in df.columns if not col.split('_')[-1].isalpha() \
               and int(col.split('_')[-1]) > -(hist_games+1) \
               and int(col.split('_')[-1]) < 0 \
               or col.isalpha() \
               or 'odds' in col]
df_cut = df_cut[keeper_cols]
    

# Restrict Historical records to last 4 games per team
df_cut = df_load.copy(deep=True)
hist_games = 2
keeper_cols = [col for col in df.columns if not col.split('_')[-1].isalpha() \
               and int(col.split('_')[-1]) > -(hist_games+1) \
               and int(col.split('_')[-1]) < 0 \
               or col.isalpha() \
               or 'odds' in col]
df_cut = df_cut[keeper_cols]
df_cut.head(10)

Unnamed: 0,h,h_h_ftgoals_-1,h_h_ftgoals_-2,h_h_shots_-1,h_h_shots_-2,h_a_ftgoals_-1,h_a_ftgoals_-2,h_a_shots_-1,h_a_shots_-2,a,a_h_ftgoals_-1,a_h_ftgoals_-2,a_h_shots_-1,a_h_shots_-2,a_a_ftgoals_-1,a_a_ftgoals_-2,a_a_shots_-1,a_a_shots_-2,date,odds_hwin,odds_draw,odds_awin,result
0,aston-villa,,,,,,,,,wigan-athletic,,,,,,,,,2009-08-15,1.66,3.51,5.33,awin
1,blackburn-rovers,,,,,,,,,manchester-city,,,,,,,,,2009-08-15,3.37,3.24,2.12,awin
2,bolton-wanderers,,,,,,,,,sunderland,,,,,,,,,2009-08-15,2.24,3.2,3.15,awin
3,chelsea,,,,,,,,,hull-city,,,,,,,,,2009-08-15,1.17,6.26,16.39,hwin
4,everton,,,,,,,,,arsenal,,,,,,,,,2009-08-15,3.07,3.21,2.28,awin
5,portsmouth,,,,,,,,,fulham,,,,,,,,,2009-08-15,2.53,3.18,2.73,awin
6,stoke-city,,,,,,,,,burnley,,,,,,,,,2009-08-15,1.91,3.29,4.02,hwin
7,wolverhampton-wanderers,,,,,,,,,west-ham-united,,,,,,,,,2009-08-15,2.57,3.19,2.68,awin
8,manchester-united,,,,,,,,,birmingham-city,,,,,,,,,2009-08-16,1.19,5.91,14.99,hwin
9,tottenham-hotspur,,,,,,,,,liverpool,,,,,,,,,2009-08-16,3.13,3.22,2.23,hwin
