# Extract Data

### Building file to extract data

In [1]:
import requests
import pandas as pd
import os

In [2]:
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
response = requests.get(url)
json = response.json()
json.keys()

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])

In [3]:
for key in json.keys():
    print(key)

events
game_settings
phases
teams
total_players
elements
element_stats
element_types


In [4]:
get_raw_data_script_file = os.path.join(os.path.pardir,'src','data','get_raw_data.py')

In [11]:
%%writefile $get_raw_data_script_file
# -*- coding: utf-8 -*-
import os
import requests
import logging
import pandas as pd

def extract_data(key, url, file_path):
    response = requests.get(url)
    json = response.json()
    df = pd.DataFrame(json[key])
    df.to_csv(file_path)
       

def main(project_dir):
    # get logger
    logger = logging.getLogger(__name__)
    logger.info('getting raw data')
        
    
    # urls
    train_url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
    
    # file paths
    raw_data_path = os.path.join(project_dir,'data','raw')
    elements_data_path = os.path.join(raw_data_path,'elements.csv')
    element_types_data_path = os.path.join(raw_data_path,'element_types.csv')
    teams_data_path = os.path.join(raw_data_path,'teams.csv')

    # extract data
    extract_data('elements', train_url, elements_data_path)
    extract_data('element_types', train_url, element_types_data_path)
    extract_data('teams', train_url, teams_data_path)

    logger.info('downloaded raw training data')
    
if __name__ == '__main__':
    # get root directory
    project_dir = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)
    
    # setup logger
    log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    logging.basicConfig(level=logging.INFO, format=log_fmt)
    
    main(project_dir)
    
    

Overwriting ..\src\data\get_raw_data.py


In [12]:
!python $get_raw_data_script_file

2020-10-01 19:01:56,307 - __main__ - INFO - getting raw data
2020-10-01 19:01:56,592 - __main__ - INFO - downloaded raw training data
