# Wikipedia API Library Example


In [2]:
# original way to call Wikipedia API
import requests
import pandas as pd

endpoint_legacy = 'https://wikimedia.org/api/rest_v1/metrics/legacy/pagecounts/aggregate/{project}/{access-site}/{granularity}/{start}/{end}'
endpoint_pageviews = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/{project}/{access}/{agent}/{granularity}/{start}/{end}'

headers = {
    'User-Agent': 'https://github.com/IvyLinMS',
    'From': 'ivylin@uw.edu'
}

pagecounts_mobile_site_params = {
    "project" : "en.wikipedia.org",
     "access-site" : "mobile-site",
     "granularity" : "monthly",
     "start" : "2007120100",
     # for end use 1st day of month following final month of data
     "end" : "2015070100"
}

pageviews_mobile_app_params = {
    "project" : "en.wikipedia.org",
    "access" : "mobile-app",
    "agent" : "user",
    "granularity" : "monthly",
    "start" : "2015070100",
    # for end use 1st day of month following final month of data
    "end" : '2021090100'                       
}

# Method to call API and dump result into json file
def api_call(endpoint,parameters):
    call = requests.get(endpoint.format(**parameters), headers=headers)
    response = call.json()
    return response

legacy_data = api_call(endpoint_legacy, pagecounts_mobile_site_params)
legacy_df =  pd.DataFrame.from_dict(legacy_data["items"])
pageview_data = api_call(endpoint_pageviews, pageviews_mobile_app_params)
pageview_df= pd.DataFrame.from_dict(pageview_data["items"])
df =  pd.concat([legacy_df, pageview_df], ignore_index=True)
df

Unnamed: 0,project,access-site,granularity,timestamp,count,access,agent,views
0,en.wikipedia,mobile-site,monthly,2014100100,3.091547e+09,,,
1,en.wikipedia,mobile-site,monthly,2014110100,3.027490e+09,,,
2,en.wikipedia,mobile-site,monthly,2014120100,3.278950e+09,,,
3,en.wikipedia,mobile-site,monthly,2015010100,3.485302e+09,,,
4,en.wikipedia,mobile-site,monthly,2015020100,3.091534e+09,,,
...,...,...,...,...,...,...,...,...
78,en.wikipedia,,monthly,2021040100,,mobile-app,user,162260497.0
79,en.wikipedia,,monthly,2021050100,,mobile-app,user,166485079.0
80,en.wikipedia,,monthly,2021060100,,mobile-app,user,150704624.0
81,en.wikipedia,,monthly,2021070100,,mobile-app,user,161461155.0


In [3]:
# sample use the Wikipedia API Library
from wikipedia_api.pageviews.api_client import WikipediaPageViewApiClient
from wikipedia_api.pageviews.api_types import (
    APIHeader,
    AccessMethod,
    AgentType,
    AggregatePageViewRequest,
    Granularity,
)

project = "en.wikipedia"
api_header = APIHeader("'https://github.com/IvyLinMS',", "ivylin@uw.edu")
client = WikipediaPageViewApiClient(project, api_header)
request = AggregatePageViewRequest(
    access=AccessMethod.MOBILE_APP, 
    agent=AgentType.USER, 
    granularity=Granularity.MONTHLY,
    start_time="20071201",
    end_time="20210901"
)
df = client.get_aggregated_pageviews(request)
df

Unnamed: 0,project,access,granularity,timestamp,views,agent
0,en.wikipedia,mobile-site,monthly,2014100100,3091546685,all-agents
1,en.wikipedia,mobile-site,monthly,2014110100,3027489668,all-agents
2,en.wikipedia,mobile-site,monthly,2014120100,3278950021,all-agents
3,en.wikipedia,mobile-site,monthly,2015010100,3485302091,all-agents
4,en.wikipedia,mobile-site,monthly,2015020100,3091534479,all-agents
...,...,...,...,...,...,...
78,en.wikipedia,mobile-app,monthly,2021040100,162260497,user
79,en.wikipedia,mobile-app,monthly,2021050100,166485079,user
80,en.wikipedia,mobile-app,monthly,2021060100,150704624,user
81,en.wikipedia,mobile-app,monthly,2021070100,161461155,user
