In [1]:
import json
import requests

import os

In [2]:
endpoint_legacy = 'https://wikimedia.org/api/rest_v1/metrics/legacy/pagecounts/aggregate/{project}/{access-site}/{granularity}/{start}/{end}'
endpoint_pageviews = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/{project}/{access}/{agent}/{granularity}/{start}/{end}'

# Common constants for Legacy and Pageviews
project_domain = 'en.wikipedia.org'
granularity = 'monthly'

# Constants for Legacy
access_desktop_legacy, access_mobile_legacy = 'desktop_site', 'mobile-site'
start_time_legacy = 2007120100 # December 2007
end_time_legacy = 2016080100 # Ending July 2016

# Constants for Pageviews
access_desktop_pageviews, access_mobileapp_pageviews, access_mobileweb_pageviews = 'desktop', 'mobile-app', 'mobile-web'
agent_pageviews = 'user'
start_time_pageviews = 2015070100 # July 2015
end_time_pageviews = 2021100100 # Ending September 2021

headers = {
    'User-Agent': 'https://github.com/StaceyWheeler',
    'From': 'swheele@uw.edu'
}

RAW_DATA_PATH = '../data/raw'
# PROCESSED_DATA_PATH = '../data/processed'
# VISUALIZATIONS_DATA_PATH = '../data/visualizations'

# PROCESSED_CSV_FILE_NAME = 'en-wikipedia_traffic_200712-202109.csv'
# GRAPH_FILE_NAME = 'en-wikipedia_traffic_200712-202109.png'

In [3]:
def api_call(endpoint,parameters):
    call = requests.get(endpoint.format(**parameters), headers=headers)
    response = call.json()
    
    return response

In [4]:
def get_data():
    
    for access in [access_desktop_legacy, access_mobile_legacy]:
        
        parameters_legacy = {
                                'project': project_domain,
                                'access-site': access,
                                'granularity': granularity,
                                'start': start_time_legacy,
                                'end': end_time_legacy
                            }
        
        monthly_legacy = api_call(endpoint_legacy, parameters_legacy)
        
        legacy_file_name = f'pagecounts_{access}_{start_time_legacy}-{end_time_legacy}.json'
        file_path = os.path.join(RAW_DATA_PATH, legacy_file_name) 
        with open(file_path, 'w') as f:
            json.dump(monthly_legacy, f)
            
    for access in [access_desktop_pageviews, access_mobileapp_pageviews, access_mobileweb_pageviews]:
            
        parameters_pageviews = {
                                    'project': project_domain,
                                    'access': access,
                                    'agent': agent_pageviews,
                                    'granularity': granularity,
                                    'start': start_time_pageviews,
                                    'end': end_time_pageviews
                                }
            
        monthly_pageviews = api_call(endpoint_pageviews, parameters_pageviews)
        
        pageviews_file_name = f'pagecounts_{access}_{start_time_pageviews}-{end_time_pageviews}.json'
        file_path = os.path.join(RAW_DATA_PATH, pageviews_file_name) 
        with open(file_path, 'w') as f:
            json.dump(monthly_pageviews, f)

In [5]:
get_data()