Permalink
Switch branches/tags
Nothing to show
Find file Copy path
362d103 Feb 20, 2018
0 contributors

Users who have contributed to this file

98 lines (74 sloc) 3.62 KB
import csv
import datetime
from functools import reduce
class Dataset:
def __init__(self, file_path):
self.raw_results = []
self.processed_results = []
with open(file_path) as stream:
reader = csv.DictReader(stream)
for row in reader:
row['Date'] = datetime.datetime.strptime(row['Date'], '%d/%m/%y')
self.raw_results.append(row)
for result in self.raw_results:
home_statistics = self.get_statistics(result['HomeTeam'], result['Date'])
if home_statistics is None:
continue
away_statistics = self.get_statistics(result['AwayTeam'], result['Date'])
if away_statistics is None:
continue
processed_result = {
'result': result['FTR'],
'odds-home': float(result['B365H']),
'odds-draw': float(result['B365D']),
'odds-away': float(result['B365A']),
}
for label, statistics in [('home', home_statistics), ('away', away_statistics)]:
for key in statistics.keys():
processed_result[label + '-' + key] = statistics[key]
self.processed_results.append(processed_result)
# Filter results to only contain matches played in by a given team, before a given date
def filter(self, team, date):
def filter_fn(result):
return (
result['HomeTeam'] == team or
result['AwayTeam'] == team
) and (result['Date'] < date)
return list(filter(filter_fn, self.raw_results))
# Calculate team statistics
def get_statistics(self, team, date, matches=10):
recent_results = self.filter(team, date)
if len(recent_results) < matches:
return None
# This function maps a result to a set of performance measures roughly scaled between -1 and 1
def map_fn(result):
if result['HomeTeam'] == team:
team_letter, opposition_letter = 'H', 'A'
opposition = result['AwayTeam']
else:
team_letter, opposition_letter = 'A', 'H'
opposition = result['HomeTeam']
goals = int(result['FT{}G'.format(team_letter)])
shots = int(result['{}S'.format(team_letter)])
shots_on_target = int(result['{}ST'.format(team_letter)])
shot_accuracy = shots_on_target / shots if shots > 0 else 0
opposition_goals = int(result['FT{}G'.format(opposition_letter)])
opposition_shots = int(result['{}S'.format(opposition_letter)])
opposition_shots_on_target = int(result['{}ST'.format(opposition_letter)])
return {
'wins': 1 if result['FTR'] == team_letter else 0,
'draws': 1 if result['FTR'] == 'D' else 0,
'losses': 1 if result['FTR'] == opposition_letter else 0,
'goals': int(result['FT{}G'.format(team_letter)]),
'opposition-goals': int(result['FT{}G'.format(opposition_letter)]),
'shots': int(result['{}S'.format(team_letter)]),
'shots-on-target': int(result['{}ST'.format(team_letter)]),
'opposition-shots': int(result['{}S'.format(opposition_letter)]),
'opposition-shots-on-target': int(result['{}ST'.format(opposition_letter)]),
}
def reduce_fn(x, y):
result = {}
for key in x.keys():
result[key] = x[key] + y[key]
return result
return reduce(reduce_fn, map(map_fn, recent_results[-matches:]))