<a href="https://colab.research.google.com/github/Ianfm94/Premier_League_Stats/blob/master/Fantasy_Web_Scraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Credit to Hank Deaton for the original source for the below code,
link = https://medium.com/@hmdeaton/how-to-scrape-fantasy-premier-league-fpl-player-data-on-a-mac-using-the-api-python-and-cron-a88587ae7628


In [0]:
# Python Script to web-scrape the Fantasy PL Website

# Credit to Hank Deaton for the original source for the below code,
# link = https://medium.com/@hmdeaton/how-to-scrape-fantasy-premier-league-fpl-player-data-on-a-mac-using-the-api-python-and-cron-a88587ae7628

import requests
import json
import numpy as np
import pandas as pd
import datetime

# Creating dictionaries to explain the FPL codes for team_names,
# player positions and player status.
team_names = {1:'Arsenal', 2:'Aston Villa', 3:'Bournemouth',
              4:'Brighton & Hove Albion', 5:'Burnley', 6:'Chelsea',
              7:'Crystal Palace', 8:'Everton', 9:'Leicester City',
              10:'Liverpool', 11:'Manchester City', 12:'Manchester United',
              13:'Newcastle United', 14:'Norwich City', 15:'Sheffield United',
              16:'Southampton', 17:'Tottenham Hotspurs', 18:'Watford',
              19:'West Ham United', 20:'Wolverhampton Wanderers'}
player_positions = {1:'Goalkeeper', 2:'Defender',
                   3:'Midfielder', 4:'Striker'}
player_status = {'a':'available', 'd':'doubtful',
                 'i':'injured', 'n':'not available',
                 's':'suspended', 'u':'unavailable'}

# Creating empty lists needed later on
team_nm, plyr_position, plyr_status, currnt_cost = [], [], [], []
cost_change_strt, cost_change_strt_fall = [], []
blank_list_1, blank_list_2, blank_list_3, blank_list_4 = [], [], [], []
mins_per_90, goals_per_90, assists_per_90 = [], [], []

# Make a get request to get the latest player data from the FPL API
link = "https://fantasy.premierleague.com/api/bootstrap-static/"
response = requests.get(link)

# Convert JSON data to a python object
data = json.loads(response.text)

# Change to Python String, in order to remove non-English characters
data = json.dumps(data)

# Converting non-english characters to english
data = data.replace('\\u00ed', 'i')
data = data.replace('\\u00e9', 'e')
data = data.replace('\\u00d6', 'O')
data = data.replace('\\u00e1', 'a')
data = data.replace('\\u00d8', 'O')
data = data.replace('\\u00eb', 'e')
data = data.replace('\\u00fc', 'u')
data = data.replace('\\u00df', 'ss')
data = data.replace('\\u00f6', 'o')
data = data.replace('\\u00c7', 'o')
data = data.replace('\\u00e4', 'a')
data = data.replace('\\u00e7', 'c')
data = data.replace('\\u00ef', 'i')
data = data.replace('\\u00fa', 'u')
data = data.replace('\\u00e3', 'a')
data = data.replace('\\u00c1', 'A')
data = data.replace('\\u00f3', 'o')
data = data.replace('\\u0161', 's')
data = data.replace('\\u0107', 'c')
data = data.replace('\\u00ef', 'i')
data = data.replace('\\u00f8', 'o')
data = data.replace('\\u00f1', 'n')

# Convert back to Python object
data = json.loads(data)

# Initialize array to hold ALL player data
# This will be a 2D array where each row is a different player
all_players = []

# Loop through each player in the data
for i in data["elements"]:
    first_name = i['first_name']
    second_name = i['second_name']
    team_name = i['team']
    player_pos = i['element_type']
    minutes_played = i['minutes']
    goals_scored = i['goals_scored']
    goals_conceded = i['goals_conceded']
    assists = i['assists']
    clean_sheets = i['clean_sheets']
    penalties_missed = i['penalties_missed']
    penalties_saved = i['penalties_saved']
    num_saves = i['saves']
    own_goals = i['own_goals']
    yellow_cards = i['yellow_cards']
    red_cards = i['red_cards']
    points_per_game = i['points_per_game']
    total_bonus_pts = i['bonus']
    total_points = i['total_points']
    bps_system = i['bps']
    form = ['form']
    in_dreamteam = i['in_dreamteam']
    selected_by_percent = i['selected_by_percent']
    creativity = i['creativity']
    ict_index = i['ict_index']
    influence = i['influence']
    threat = i['threat']
    status = i['status']
    chance_of_playing_next_round = i['chance_of_playing_next_round']
    chance_of_playing_this_round = i['chance_of_playing_this_round']
    cost_change_event = i['cost_change_event']
    cost_change_event_fall = i['cost_change_event_fall']
    cost_change_start = i['cost_change_start']
    cost_change_start_fall = i['cost_change_start_fall']
    dreamteam_count = i['dreamteam_count']
    ep_next = i['ep_next']
    ep_this = i['ep_this']
    event_points = i['event_points']
    news = i['news']
    news_added = i['news_added']
    current_cost = i['now_cost']
    web_name = i['web_name']
    transfers_in = i['transfers_in']
    transfers_in_event = i['transfers_in_event']
    transfers_out = i['transfers_out']
    transfers_out_event = i['transfers_out_event']
    value_form = i['value_form']
    value_season = i['value_season']
    player_code = i['code']
    player_id = i['id']
    special = i['special']

# Create a 1D array of the current players stats
    individual_stats = [first_name, second_name, team_name,
        player_pos, minutes_played, goals_scored, goals_conceded,
        assists, clean_sheets, penalties_missed, penalties_saved,
        num_saves, own_goals, yellow_cards, red_cards,
        points_per_game, total_bonus_pts, total_points, bps_system,
        form, in_dreamteam, selected_by_percent, creativity,
        ict_index, influence, threat, status,
        chance_of_playing_next_round, chance_of_playing_this_round,
        cost_change_event, cost_change_event_fall,
        cost_change_start, cost_change_start_fall,
        dreamteam_count, ep_next, ep_this,
        event_points, news, news_added, current_cost,
        web_name, transfers_in, transfers_in_event,
        transfers_out, transfers_out_event,
        value_form, value_season,
        player_code, player_id, special]

# Append the player array to a 2D array of all players
    all_players.append(individual_stats)

# Convert the 2D array to a numpy array
all_players = np.array(all_players)

# Convert the numpy array to a pandas dataframe (table)
dataset = pd.DataFrame({'first_name': all_players[:, 0],
                'second_name': all_players[:, 1],
                'team_name': all_players[:, 2],
                'player_pos': all_players[:, 3],
                'minutes_played': all_players[:, 4],
                'goals_scored': all_players[:, 5],
                'goals_conceded': all_players[:, 6],
                'assists': all_players[:, 7],
                'clean_sheets': all_players[:, 8],
                'penalties_missed': all_players[:, 9],
                'penalties_saved': all_players[:, 10],
                'num_saves': all_players[:, 11],
                'own_goals': all_players[:, 12],
                'yellow_cards': all_players[:, 13],
                'red_cards': all_players[:, 14],
                'points_per_game': all_players[:, 15],
                'total_bonus_pts': all_players[:, 16],
                'total_points': all_players[:, 17],
                'bps_system': all_players[:, 18],
                'form': all_players[:, 19],
                'in_dreamteam': all_players[:, 20],
                'selected_by_percent': all_players[:, 21],
                'creativity': all_players[:, 22],
                'ict_index': all_players[:, 23],
                'influence': all_players[:, 24],
                'threat': all_players[:, 25],
                'status': all_players[:, 26],
                'chance_of_playing_next_round': all_players[:, 27],
                'chance_of_playing_this_round': all_players[:, 28],
                'cost_change_event': all_players[:, 29],
                'cost_change_event_fall': all_players[:, 30],
                'cost_change_start': all_players[:, 31],
                'cost_change_start_fall': all_players[:, 32],
                'dreamteam_count': all_players[:, 33],
                'ep_next': all_players[:, 34],
                'ep_this': all_players[:, 35],
                'event_points': all_players[:, 36],
                'news': all_players[:, 37],
                'news_added': all_players[:, 38],
                'current_cost': all_players[:, 39],
                'web_name': all_players[:, 40],
                'transfers_in': all_players[:, 41],
                'transfers_in_event': all_players[:, 42],
                'transfers_out': all_players[:, 43],
                'transfers_out_event': all_players[:, 44],
                'value_form': all_players[:, 45],
                'value_season': all_players[:, 46],
                'player_code': all_players[:, 47],
                'player_id': all_players[:, 48],
                'special': all_players[:, 49]})

# Changing team names, player_positions, player_status,
#
for i in dataset['team_name']:
    for k, v in team_names.items():
        if i == k:
            team_nm.append(v)

for i in dataset['player_pos']:
    for k, v in player_positions.items():
        if i == k:
            plyr_position.append(v)

for i in dataset['status']:
    for k, v in player_status.items():
        if i == k:
            plyr_status.append(v)

for i in dataset['current_cost']:
    value = round(i/10, 4)
    currnt_cost.append(value)

for i in dataset['cost_change_start']:
    value = round(i/10, 4)
    cost_change_strt.append(value)

for i in dataset['cost_change_start_fall']:
    value = round(i/10, 4)
    cost_change_strt_fall.append(value)

for i in dataset['chance_of_playing_next_round']:
    if i == None:
        blank = "0"
        blank_list_1.append(blank)
    else:
        blank_list_1.append(i)

for i in dataset['chance_of_playing_this_round']:
    if i == None:
        blank = "0"
        blank_list_2.append(blank)
    else:
        blank_list_2.append(i)

for i in dataset['news']:
    if i == "":
        blank = "N/A"
        blank_list_3.append(blank)
    else:
        blank_list_3.append(i)

for i in dataset['news_added']:
    if i == None:
        blank = "N/A"
        blank_list_4.append(blank)
    else:
        blank_list_4.append(i)

for i in dataset['minutes_played']:
    if i == "0":
        blank = "0"
        mins_per_90.append(blank)
    else:
        blank = round((i/90), 4)
        mins_per_90.append(blank)

for i in range(len(mins_per_90)):
    for j in range(len(dataset['goals_scored'])):
        if i == j:
            if mins_per_90[i] == 0:
                blank = 0  
                goals_per_90.append(blank)
            elif dataset['goals_scored'][j] == 0:
                blank = 0    
                goals_per_90.append(blank)
            else:
                blank = dataset['goals_scored'][j] / mins_per_90[i]
                blank = round(blank, 4)
                goals_per_90.append(blank)

for i in range(len(mins_per_90)):
    for j in range(len(dataset['assists'])):
        if i == j:
            if mins_per_90[i] == 0:
                blank = 0  
                assists_per_90.append(blank)
            elif dataset['assists'][j] == 0:
                blank = 0    
                assists_per_90.append(blank)
            else:
                blank = dataset['assists'][j] / mins_per_90[i]
                blank = round(blank, 4)
                assists_per_90.append(blank)

dict_ = {'Team_Name':team_nm,
         'Player_Position':plyr_position,
         'Player_Status':plyr_status,
         'Player_Cost':currnt_cost,
         'Next_Round':blank_list_1,
         'This_Round':blank_list_2,
         'News':blank_list_3,
         'News_Added':blank_list_4,
         'Cost_Change_Start':cost_change_strt,
         'Cost_Change_Start_Fall':cost_change_strt_fall,
         'Mins_Per_90':mins_per_90,
         'Goals_Per_90':goals_per_90,
         'Assists_Per_90':assists_per_90}

dataset_1 = pd.DataFrame(dict_, columns=['Team_Name', 'Player_Position',
                                         'Player_Status', 'Player_Cost',
                                         'Next_Round', 'This_Round',
                                         'News', 'News_Added',
                                         'Cost_Change_Start',
                                         'Cost_Change_Start_Fall',
                                         'Mins_Per_90', 'Goals_Per_90',
                                         'Assists_Per_90'])

# Updating old dataframe with updated values
dataset = dataset.assign(team_name = dataset_1['Team_Name'])
dataset = dataset.assign(player_pos = dataset_1['Player_Position'])
dataset = dataset.assign(status = dataset_1['Player_Status'])
dataset = dataset.assign(current_cost = dataset_1['Player_Cost'])
dataset = dataset.assign(chance_of_playing_next_round = dataset_1['Next_Round'])
dataset = dataset.assign(chance_of_playing_this_round = dataset_1['This_Round'])
dataset = dataset.assign(news = dataset_1['News'])
dataset = dataset.assign(news_added = dataset_1['News_Added'])
dataset = dataset.assign(cost_change_start = dataset_1['Cost_Change_Start'])
dataset = dataset.assign(cost_change_start_fall = dataset_1['Cost_Change_Start_Fall'])
dataset['Mins_Per_90'] = dataset_1['Mins_Per_90']
dataset['Goals_Per_90'] = dataset_1['Goals_Per_90']
dataset['Assists_Per_90'] = dataset_1['Assists_Per_90']

# Generate a unique filename based on date
filename = 'C:\\Users\\Ianm9\\OneDrive\\Desktop\\P_Drive\\Data_Analysis\\Sports_Analysis\\Soccer\\Premier_League\\Web_Scraping\\2019_2020\\' + \
           str(datetime.datetime.today().date()) + '_updated_fpl_stats.csv'

# Save the table of data as a CSV
dataset.to_csv(index=False,  path_or_buf=filename)