In [1]:
import config
import requests
import time
from pprint import pprint
from pymongo import MongoClient
from tqdm.auto import tqdm

In [14]:
BASE_URL = 'https://fantasy.premierleague.com/api/'
API_DATA = requests.get(BASE_URL+'bootstrap-static/').json()

In [15]:
def get_fixtures():
    '''Get all fixtures and update MongoDB'''

    # get all fixture data
    data = requests.get(BASE_URL+'fixtures/').json()

    for fixture in data:
        for key in ['code', 'finished_provisional', 'minutes',
                    'provisional_start_time', 'started', 'stats', 'pulse_id']:
            del fixture[key]

    API_DATA['fixtures'] = data

    return True

In [16]:
get_fixtures()

True

In [17]:
# Bootstrap-static data
pprint(API_DATA, depth=1)

{'element_stats': [...],
 'element_types': [...],
 'elements': [...],
 'events': [...],
 'fixtures': [...],
 'game_settings': {...},
 'phases': [...],
 'teams': [...],
 'total_players': 11431471}


In [18]:
# connect to MongoDB
client = MongoClient(f'mongodb+srv://{config.username}:{config.password}'
                     f'@{config.database}/?retryWrites=true&w=majority')
db = client.get_database('raw_2022-23')

In [19]:
def get_bootstrap_static():
    '''Get all data from the bootstrap-static endpoint and update MongoDB
    
    Bootstrap-static data includes the following:
      - elements (players)
      - element_types (positions)
      - events (gameweeks)
      - teams
    '''

    # delete unnecessary keys
    for key in ['element_stats', 'game_settings', 'total_players', 'phases']:
        del API_DATA[key]

    # iterate over each endpoint
    for endpoint in API_DATA.keys():
        print(f'Updating {endpoint.upper()} data')
        data = API_DATA[endpoint].copy()

        # get number of docs, cols in collection
        n_docs = db.get_collection(endpoint).count_documents({})
        n_cols = len(db[endpoint].find_one({})) if n_docs else 0
        print(f'{endpoint} shape: {n_docs} docs X {n_cols} cols')

        # update docs in each collection
        for row in tqdm(data):
            row['_id'] = row.pop('id')  # rename id field
            filter_query = {'_id': row['_id']}
            update_data = {'$set': row}
            db[endpoint].update_one(filter_query, update_data, upsert=True)

        # print number of new docs, cols
        print(f'Added {db[endpoint].count_documents({}) - n_docs} new docs')
        print(f'Added {len(db[endpoint].find_one({})) - n_cols} new cols\n')

    return True

In [20]:
get_bootstrap_static()

Updating events data
events shape: 38 docs X 23 cols


  0%|          | 0/38 [00:00<?, ?it/s]

Added 0 new docs
Added 0 new cols

Updating teams data
teams shape: 20 docs X 21 cols


  0%|          | 0/20 [00:00<?, ?it/s]

Added 0 new docs
Added 0 new cols

Updating elements data
elements shape: 764 docs X 88 cols


  0%|          | 0/764 [00:00<?, ?it/s]

Added 0 new docs
Added 0 new cols

Updating element_types data
element_types shape: 4 docs X 11 cols


  0%|          | 0/4 [00:00<?, ?it/s]

Added 0 new docs
Added 0 new cols

Updating fixtures data
fixtures shape: 380 docs X 10 cols


  0%|          | 0/380 [00:00<?, ?it/s]

Added 0 new docs
Added 0 new cols



True

In [28]:
def get_element_summary():
    '''Get all past season or gameweek info for a given player_id,
       wait between requests to avoid API rate limit'''
    
    # iterate through all players
    for element in tqdm(API_DATA['elements']):
        element_id = element['id']

        success = False
        # try until a result is returned
        while not success:
            try:
                # send GET request to BASE_URL/api/element-summary/{PID}/
                data = requests.get(
                    BASE_URL + 'element-summary/' + str(element_id) + '/').json()
                success = True
            except:
                # wait a bit to avoid API rate limits, if needed
                time.sleep(.3)

        # to do update MongoDB
        for row in data:
            db['element_gameweek_history']
    
    return data

{'fixtures': [...], 'history': [...], 'history_past': [...]}


In [39]:
get_element_summary(1)['history']

[{'element': 1,
  'fixture': 1,
  'opponent_team': 7,
  'total_points': 0,
  'was_home': False,
  'kickoff_time': '2022-08-05T19:00:00Z',
  'team_h_score': 0,
  'team_a_score': 2,
  'round': 1,
  'minutes': 0,
  'goals_scored': 0,
  'assists': 0,
  'clean_sheets': 0,
  'goals_conceded': 0,
  'own_goals': 0,
  'penalties_saved': 0,
  'penalties_missed': 0,
  'yellow_cards': 0,
  'red_cards': 0,
  'saves': 0,
  'bonus': 0,
  'bps': 0,
  'influence': '0.0',
  'creativity': '0.0',
  'threat': '0.0',
  'ict_index': '0.0',
  'starts': 0,
  'expected_goals': '0.00',
  'expected_assists': '0.00',
  'expected_goal_involvements': '0.00',
  'expected_goals_conceded': '0.00',
  'value': 45,
  'transfers_balance': 0,
  'selected': 23970,
  'transfers_in': 0,
  'transfers_out': 0},
 {'element': 1,
  'fixture': 11,
  'opponent_team': 10,
  'total_points': 0,
  'was_home': True,
  'kickoff_time': '2022-08-13T14:00:00Z',
  'team_h_score': 4,
  'team_a_score': 2,
  'round': 2,
  'minutes': 0,
  'goals_s