In [1]:
import wikipedia

ModuleNotFoundError: No module named 'wikipedia'

https://pypi.org/project/Wikipedia-API/

In [8]:
p = wikipedia.page("Switzerland")

In [74]:
p.summary[0:50]

'Switzerland, officially the Swiss Confederation, i'

In [9]:
p.section('History')

'Switzerland has existed as a state in its present form since the adoption of the Swiss Federal Constitution in 1848. The precursors of Switzerland established a protective alliance at the end of the 13th century (1291), forming a loose confederation of states which persisted for centuries.'

In [72]:
p.categories[0:5]

['All articles containing potentially dated statements',
 'All articles with dead external links',
 'All articles with unsourced statements',
 'Articles containing Alemannic German-language text',
 'Articles containing French-language text']

# USER ACTIVITY

Documentation and example of GET requests available at https://wikimedia.org/api/rest_v1/#!/Pageviews_data/

Requests library documentation: http://docs.python-requests.org/en/master/

In [2]:
import datetime
import requests
from dateutil.relativedelta import relativedelta

In [7]:
def getUserActivity(article, granularity, start, end, project ="en.wikipedia.org",
                    access="all-access", agent="user",dateformat="iso"):
    """
    Method to obtain user activity of a given page for a given period of time
    article: name of the wikiipedia article
    granularity: time granularity of activity, either 'monthly' or 'daily'
    start: start date of the research as Datetime.datetime object
    end: end date of the research as Datetime.datetime object
    project: If you want to filter by project, use the domain of any Wikimedia project (by default en.wikipedia.org)
    access: If you want to filter by access method, use one of desktop, mobile-app or mobile-web (by default all-access)
    agent: If you want to filter by agent type, use one of user, bot or spider (by default user).
    dateformat: the dateformat used in result array, can be 'iso','ordinal','datetime'.
    return:
        it return an array of array of the form [ [user_activity_value1, date1], [user_activity_value2, date2]]
    """

    #granularity['monthly','daily']
    #format['iso','ordinal','datetime']
    #Be carefull, for daily granularity left bound date is included, for monthly granularity left bound date is excluded
    
    dstart = start.strftime("%Y%m%d")
    dend = end.strftime("%Y%m%d")
    path = ("https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/"+project
            +"/"+access+"/"+agent+"/"+article+"/"+granularity+"/"+dstart+"/"+dend)
    r = requests.get(path)
    res = []
    for i in range(len(r.json()['items'])):
        time_label = None
        if granularity == 'daily':
            time_label = (start + datetime.timedelta(days=i))
        else:
            time_label = (start + relativedelta(months=+i))
        if dateformat == 'iso':
            time_label = time_label.isoformat()
        elif dateformat == 'ordinal':
            time_label = time_label.toordinal()
            
        res.append([r.json()['items'][i]['views'],time_label])
    return res

Example: number of views for article Switzerland on February 2017 and March 2017

In [8]:
s = datetime.datetime(year=2017,month=2,day=1)
e = datetime.datetime(year=2017,month=5,day=1)
getUserActivity(article="Switzerland",granularity="monthly",start=s,end=e,dateformat="iso")

<Response [200]>


[[313410, '2017-02-01T00:00:00'],
 [344772, '2017-03-01T00:00:00'],
 [308334, '2017-04-01T00:00:00']]

Example: number of views for article France on 22 September 2017 and 23 September 2017

In [9]:
s = datetime.datetime(year=2017,month=9,day=22)
e = datetime.datetime(year=2017,month=9,day=24)
getUserActivity(article="Switzerland",granularity="daily",start=s,end=e,dateformat="datetime")

<Response [200]>


[[9657, datetime.datetime(2017, 9, 22, 0, 0)],
 [8875, datetime.datetime(2017, 9, 23, 0, 0)],
 [9661, datetime.datetime(2017, 9, 24, 0, 0)]]