In [5]:
# handling datasets
import pandas as pd
from pandas import DataFrame

# copying data from online
import json
import requests

# to handle dates
from datetime import datetime

# ensures that all columns of the dataframe are displayed
pd.pandas.set_option('display.max_columns', None)

In [2]:
# Set up dataframe structure

# columns lists
points = []
minsPlayed = []
goalsScored = []
assists = []
cleanSheets = []
goalsConceded = []
ownGoals = []
penSaved = []
penMissed = []
yelCards = []
redCards = []
saves = []
bonus = []
bonusPointSystem = []
influence = []
creativity = []
threat = []
ictIndex = []
netTransfers = []
selectedBy = []
costGBP = []
gameDate = []
playerName = []
oppositionTeam = []

# A dictionary is needed to convert number into string team names
oppositionTeamDict = {1: 'Arsenal', 2: 'Aston Villa', 3: 'Brighton and Hove Albion', 4: 'Burnley', 5: 'Chelsea',
                     6: 'Crystal Palace', 7: 'Everton', 8: 'Fulham', 9: 'Leicester City', 10: 'Leeds', 11: 'Liverpool',
                     12: 'Manchester City', 13: 'Manchester United', 14: 'Newcastle United', 15: 'Sheffield United',
                     16: 'Southampton', 17: 'Tottenham Hotspur', 18: 'West Bromwich Albion', 19: 'West Ham United',
                     20: 'Wolverhampton Wanderers'}

# create the dataframe dictionary
ffmlDataDict = {'points': points, 'minsPlayed': minsPlayed, 'goalsScored': goalsScored, 'assists': assists,
                'cleanSheets': cleanSheets, 'goalsConceded': goalsConceded, 'ownGoals': ownGoals, 'penSaved': penSaved,
                'penMissed': penMissed, 'yelCards': yelCards, 'redCards': redCards, 'saves': saves, 'bonus': bonus,
                'bonusPointSystem': bonusPointSystem, 'influence': influence, 'creativity': creativity, 'threat': threat,
                'ictIndex': ictIndex, 'netTransfers': netTransfers, 'selectedBy': selectedBy, 'costGBP': costGBP,
                'gameDate': gameDate, 'playerName': playerName, 'oppositionTeam': oppositionTeam
               }

In [3]:
# webscrapping now
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
ajax_url = 'https://fantasy.premierleague.com/api/element-summary/{}/'
# someone on stackoverflow helped me with the two lines above

data = requests.get(url).json()

for e in data['elements']:

    try:
        # uncomment this to print all player data:
        # print(json.dumps(player_data, indent=4))
        
        player_data = requests.get(ajax_url.format(e['id'])).json()#categorised by the player id
        # stackoverflow helped here as well
        
        
        for gw in range(38): # each team plays 38 matches per season
                # IMPORTANT
                # what if some players don't have data for all sections
                # what if index gw is out of range?? - use try and except
            try:
                #sort columns
                gameDateElement = datetime.strptime(json.dumps(player_data['history'][gw]['kickoff_time'][:10]), '"%Y-%m-%d"').date()
                gameDate.append(gameDateElement)#appends the game date in yyyy/mm/dd format

                playerName.append((e['first_name'], e['second_name']))

                oppositionTeamElement = int(json.dumps(player_data['history'][gw]['opponent_team']))
                oppositionTeam.append(oppositionTeamDict[oppositionTeamElement]) #uses the team dict to output correct team name not just indexes

                points.append(int(json.dumps(player_data['history'][gw]['total_points'])))
                minsPlayed.append(int(json.dumps(player_data['history'][gw]['minutes'])))
                goalsScored.append(int(json.dumps(player_data['history'][gw]['goals_scored'])))
                assists.append(int(json.dumps(player_data['history'][gw]['assists'])))
                cleanSheets.append(int(json.dumps(player_data['history'][gw]['clean_sheets'])))
                goalsConceded.append(int(json.dumps(player_data['history'][gw]['goals_conceded'])))
                ownGoals.append(int(json.dumps(player_data['history'][gw]['own_goals'])))
                penSaved.append(int(json.dumps(player_data['history'][gw]['penalties_saved'])))
                penMissed.append(int(json.dumps(player_data['history'][gw]['penalties_missed'])))
                yelCards.append(int(json.dumps(player_data['history'][gw]['yellow_cards'])))
                redCards.append(int(json.dumps(player_data['history'][gw]['red_cards'])))
                saves.append(int(json.dumps(player_data['history'][gw]['saves'])))
                bonus.append(int(json.dumps(player_data['history'][gw]['bonus'])))
                bonusPointSystem.append(int(json.dumps(player_data['history'][0]['bps'])))

                influenceElement = json.dumps(player_data['history'][0]['influence'])
                influenceElement = float(influenceElement[1:-1]) #getting rid of the double quotes
                influence.append(influenceElement)
                
                creativityElement = json.dumps(player_data['history'][0]['creativity'])
                creativityElement = float(creativityElement[1:-1]) #getting rid of the double quotes
                creativity.append(creativityElement)
                
                threatElement = json.dumps(player_data['history'][0]['threat'])
                threatElement = float(threatElement[1:-1]) #getting rid of the double quotes
                threat.append(threatElement)
                
                ictIndexElement = json.dumps(player_data['history'][0]['ict_index'])
                ictIndexElement = float(ictIndexElement[1:-1])
                ictIndex.append(ictIndexElement)

                netTransfers.append(int(json.dumps(player_data['history'][0]['transfers_balance'])))
                selectedBy.append(int(json.dumps(player_data['history'][0]['selected'])))
                
                costGBP.append(float(json.dumps(player_data['history'][0]['value']))/10)

                # uncomment this to print all player data:
                #print(json.dumps(player_data, indent=4))    
                #print('-' * 80)
                
            except IndexError: continue

    except IndexError: continue


In [11]:
# dataframe is put together now

ffmlDf = DataFrame(ffmlDataDict)
ffmlDf['gameDate'] = pd.to_datetime(ffmlDf['gameDate']) # this may be redundant

# so i don't just see the same player's stats when i use head()
# sort dataframe by gameDate
ffmlDf = ffmlDf.sort_values(by='gameDate')

In [12]:
ffmlDf.head()

Unnamed: 0,points,minsPlayed,goalsScored,assists,cleanSheets,goalsConceded,ownGoals,penSaved,penMissed,yelCards,redCards,saves,bonus,bonusPointSystem,influence,creativity,threat,ictIndex,netTransfers,selectedBy,costGBP,gameDate,playerName,oppositionTeam
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,76656,7.0,2020-09-12,"(Mesut, Özil)",Fulham
6378,6,90,0,0,1,0,0,0,0,0,0,0,0,27,26.0,0.1,2.0,2.8,0,88657,5.0,2020-09-12,"(Federico, Fernández)",West Ham United
6394,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,3326,4.5,2020-09-12,"(Ciaran, Clark)",West Ham United
6410,7,90,0,0,1,0,0,0,0,0,0,3,0,26,27.0,0.0,0.0,2.7,0,13715,5.0,2020-09-12,"(Karl, Darlow)",West Ham United
6426,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,27245,5.0,2020-09-12,"(Martin, Dubravka)",West Ham United


In [13]:
# save the df to a csv file for use
ffmlDf.to_csv('ffmlDf_20-21')