# Data handling for football analytics

Goal of the notebook is to handle all necessery data for the project. Should download data, should sort data and create data sets for analytics.

## Imports below

In [1]:
# Add the imports needed
import requests
import yaml
import os
import json

# Constants needed

In [2]:
SAVE_DIRECTORY = r'C:\Users\theo_\Documents\Fotboll\FootballAnalyticsML'
API_URL = r'https://fantasy.premierleague.com/api/bootstrap-static/'

# Fetch data from API

In [114]:
def get_api_response(url):
        """Get response object from the API"""
        return requests.get(url)

def clean_api_response(resp):
    """Extract only the content from a response object"""
    return resp.content.decode('UTF-8')

def get_fantasy_data():
    """Get a random fact from the API"""
    return json.loads(clean_api_response(get_api_response(API_URL)))

fd = get_fantasy_data()
print("Data downloaded")
print('Data keys:')
print("{}".format(fd.keys()))

Data downloaded
Data keys:
dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])


# Team mapper

In [115]:
team_map = dict()
for td in fd['teams']:
    team_map[td['id']] = td['name']
type_map = dict()
for td in fd['element_types']:
    type_map[td['id']] = td['plural_name']

# Example player data
Shows an example of the data used for the analysis

In [116]:
for k, v in fd['elements'][0].items():
    print('%s : %s' % (k,v))

chance_of_playing_next_round : None
chance_of_playing_this_round : None
code : 69140
cost_change_event : 0
cost_change_event_fall : 0
cost_change_start : 0
cost_change_start_fall : 0
dreamteam_count : 0
element_type : 2
ep_next : None
ep_this : None
event_points : 0
first_name : Shkodran
form : 0.0
id : 1
in_dreamteam : False
news : 
news_added : None
now_cost : 55
photo : 69140.jpg
points_per_game : 2.6
second_name : Mustafi
selected_by_percent : 0.6
special : False
squad_number : None
status : a
team : 1
team_code : 3
total_points : 80
transfers_in : 0
transfers_in_event : 0
transfers_out : 0
transfers_out_event : 0
value_form : 0.0
value_season : 0.0
web_name : Mustafi
minutes : 2611
goals_scored : 2
assists : 0
clean_sheets : 6
goals_conceded : 40
own_goals : 0
penalties_saved : 0
penalties_missed : 0
yellow_cards : 9
red_cards : 0
saves : 0
bonus : 5
bps : 475
influence : 718.6
creativity : 106.0
threat : 252.0
ict_index : 107.8


# Player classes

In [80]:
class Player:
    def __init__(self, player_data):
        self.pd = player_data
    def __str__(self):
        return '{} ({}) {},  cost: {}, points: {}, roi: {:.2f}, played {} min'.format(
            self.get_name(), self.get_position(), self.get_team(), self.get_cost(),
            self.get_points(), self.get_roi(), self.get_time_played())
    def get_name(self):
        return '%s %s' % (self.pd['first_name'], self.pd['second_name'])
    def get_team(self):
        return team_map[self.pd['team']]
    def get_cost(self):
        return self.pd['now_cost']/10.0
    def get_points(self):
        return self.pd['total_points']
    def get_roi(self):
        return self.get_points()/self.get_cost()
    def get_time_played(self):
        return self.pd['minutes']
    def get_position(self):
         return type_map[self.pd['element_type']]

# Extract list of players

In [93]:
players = [Player(d) for d in fd['elements']]
print('Player data extracted')

Player data extracted


In [150]:
roi_players = sorted(players,key=lambda x: (x.get_roi()), reverse=True)
point_players = sorted(players,key=lambda x: (x.get_points()), reverse=True)
roi_lowcost_players =  sorted([p for p in players if p.get_cost() < 5.5],key=lambda x: (x.get_roi()), reverse=True)

In [144]:
def print_top_positions(sorted_list, position, n_players):
    p = 0
    for player in sorted_list:
        if player.get_position() == position:
            print(player)
            p += 1
        if p == n_players:
            break

def print_player_list(sorted_list, n_players=10):
    for position in positions:
        print('Getting top: {}'.format(position))
        print('')
        print_top_positions(sorted_list,position, n_players)
        print(110*'-')
            
            
positions = ['Goalkeepers', 'Defenders', 'Midfielders', 'Forwards']


# Top roi players for each position

In [146]:
print_player_list(roi_players, 12)

Getting top: Goalkeepers

Alisson Ramses Becker (Goalkeepers) Liverpool,  cost: 6.0, points: 176, roi: 29.33, played 3420 min
Jordan Pickford (Goalkeepers) Everton,  cost: 5.5, points: 161, roi: 29.27, played 3420 min
Lukasz Fabianski (Goalkeepers) West Ham,  cost: 5.0, points: 143, roi: 28.60, played 3420 min
Ederson Santana de Moraes (Goalkeepers) Man City,  cost: 6.0, points: 169, roi: 28.17, played 3420 min
Hugo Lloris (Goalkeepers) Spurs,  cost: 5.5, points: 145, roi: 26.36, played 2970 min
Martin Dubravka (Goalkeepers) Newcastle,  cost: 5.0, points: 131, roi: 26.20, played 3420 min
Kepa Arrizabalaga (Goalkeepers) Chelsea,  cost: 5.5, points: 142, roi: 25.82, played 3240 min
Ben Foster (Goalkeepers) Watford,  cost: 5.0, points: 129, roi: 25.80, played 3420 min
Kasper Schmeichel (Goalkeepers) Leicester,  cost: 5.0, points: 120, roi: 24.00, played 3420 min
Rui Pedro dos Santos Patrício (Goalkeepers) Wolves,  cost: 5.0, points: 118, roi: 23.60, played 3330 min
Mathew Ryan (Goalkeeper

# Top point players for each positon

In [149]:
print_player_list(point_players, 10)

Getting top: Goalkeepers

Alisson Ramses Becker (Goalkeepers) Liverpool,  cost: 6.0, points: 176, roi: 29.33, played 3420 min
Ederson Santana de Moraes (Goalkeepers) Man City,  cost: 6.0, points: 169, roi: 28.17, played 3420 min
Jordan Pickford (Goalkeepers) Everton,  cost: 5.5, points: 161, roi: 29.27, played 3420 min
Hugo Lloris (Goalkeepers) Spurs,  cost: 5.5, points: 145, roi: 26.36, played 2970 min
Lukasz Fabianski (Goalkeepers) West Ham,  cost: 5.0, points: 143, roi: 28.60, played 3420 min
Kepa Arrizabalaga (Goalkeepers) Chelsea,  cost: 5.5, points: 142, roi: 25.82, played 3240 min
Martin Dubravka (Goalkeepers) Newcastle,  cost: 5.0, points: 131, roi: 26.20, played 3420 min
Ben Foster (Goalkeepers) Watford,  cost: 5.0, points: 129, roi: 25.80, played 3420 min
Kasper Schmeichel (Goalkeepers) Leicester,  cost: 5.0, points: 120, roi: 24.00, played 3420 min
David de Gea (Goalkeepers) Man Utd,  cost: 5.5, points: 120, roi: 21.82, played 3420 min
---------------------------------------

# Roi low cost

In [151]:
print_player_list(roi_lowcost_players)

Getting top: Goalkeepers

Lukasz Fabianski (Goalkeepers) West Ham,  cost: 5.0, points: 143, roi: 28.60, played 3420 min
Martin Dubravka (Goalkeepers) Newcastle,  cost: 5.0, points: 131, roi: 26.20, played 3420 min
Ben Foster (Goalkeepers) Watford,  cost: 5.0, points: 129, roi: 25.80, played 3420 min
Kasper Schmeichel (Goalkeepers) Leicester,  cost: 5.0, points: 120, roi: 24.00, played 3420 min
Rui Pedro dos Santos Patrício (Goalkeepers) Wolves,  cost: 5.0, points: 118, roi: 23.60, played 3330 min
Mathew Ryan (Goalkeepers) Brighton,  cost: 4.5, points: 104, roi: 23.11, played 3060 min
Jonas Lössl (Goalkeepers) Everton,  cost: 4.5, points: 99, roi: 22.00, played 2777 min
Bernd Leno (Goalkeepers) Arsenal,  cost: 5.0, points: 106, roi: 21.20, played 2835 min
Alex McCarthy (Goalkeepers) Southampton,  cost: 4.5, points: 73, roi: 16.22, played 2250 min
Vicente Guaita (Goalkeepers) Crystal Palace,  cost: 5.0, points: 81, roi: 16.20, played 1754 min
---------------------------------------------