## Signpost Article Views

I've wanted to run some basic statistics on *Wikipedia Signpost* article views for a while now, to figure out what people like or don't like reading about. Now I'm finally getting to sit down and do it...

In [5]:
from pageviews import PageviewsClient
import arrow
import datetime
import urllib
from pandas import DataFrame
import pandas as pd
import mwapi


def viewcounts(article_name, start=None, end=None):
    """
    Fetches the viewcounts.
    """
    article_name = article_name.replace(' ', '_')
    parsed_article_name = urllib.parse.quote(article_name).replace('/', '%2F')
    p = PageviewsClient().article_views("en.wikipedia",
                                        [parsed_article_name],
                                        access="all-access",
                                        # access="users",
                                        granularity="daily",
                                        start=start,
                                        end=end)
    counts = {key: p[key][article_name] for key in p.keys()}
    # return [counts[key] for key in sorted(counts.keys())]
    return [p[key][article_name] for key in sorted(p.keys())]
    return counts

def article_viewcounts(article_name):
    """
    Fetches a list of the Signpost article viewcount from the date of the publication window.
    The Signpost is usually published late, so a generous 14 day news "cycle" is allotted as the publication window.
    In reality views are low before publication and after publication of the next issue, so this doesn't have much effect.
    """
    pubdate = arrow.get(article_name.split("/")[1])
    enddate = (pubdate + datetime.timedelta(days=14)).strftime('%Y%m%d%H')
    pubdate = pubdate.strftime('%Y%m%d%H')
    return viewcounts(article_name, start=pubdate, end=enddate)

def total_viewcount(article_name):
    """
    Returns the total 60-day viewcount.
    """
    return sum(article_viewcounts(article_name))

def average_daily_viewcount(article_name):
    """
    Returns the average daily viewcount of the article.
    """
    counts = article_viewcounts(article_name)
    return sum(counts)/len(counts)

def get_all_articles(prefix):
    """
    Returns a list of the titles of all of the Signpost articles published after a certain prefix.
    Prefix is 2015-10-07 for now, the earliest published Signpost story for which data is available (yet).
    """
    session = mwapi.Session('https://en.wikipedia.org', user_agent='signpostviews Jupyter notebook')
    raw_result = session.get(action='query',
                             list='allpages',
                             apfrom=prefix,
                             apto='Wikipedia Signpost/A',
                             apprefix='Wikipedia Signpost',
                             apnamespace=4,
                             aplimit=500,
                             formatversion=2)
    # The >= 2 call filters out results e.g. Wikipedia:Wikipedia Signpost/2015-07-18
    # The not 2016 call keeps out titles too recent to have full data for.
    result = [r['title'] for r in raw_result['query']['allpages'] if r['title'].count("/") >= 2]
    return result

def tabulate(articles):
    pass_dict = {article: article_viewcounts(article) for article in articles}
    return pass_dict

In [2]:
[article for article in get_all_articles("Wikipedia Signpost/2015-10-07/Op-ed") if '2016' not in article]

['Wikipedia:Wikipedia Signpost/2015-10-07/Op-ed',
 'Wikipedia:Wikipedia Signpost/2015-10-07/Technology report',
 'Wikipedia:Wikipedia Signpost/2015-10-07/Traffic report',
 'Wikipedia:Wikipedia Signpost/2015-10-14/Blog',
 'Wikipedia:Wikipedia Signpost/2015-10-14/Editorial',
 'Wikipedia:Wikipedia Signpost/2015-10-14/Featured content',
 'Wikipedia:Wikipedia Signpost/2015-10-14/Gallery',
 'Wikipedia:Wikipedia Signpost/2015-10-14/News and notes',
 'Wikipedia:Wikipedia Signpost/2015-10-14/Op-ed',
 'Wikipedia:Wikipedia Signpost/2015-10-14/Technology report',
 'Wikipedia:Wikipedia Signpost/2015-10-14/Traffic report',
 'Wikipedia:Wikipedia Signpost/2015-10-14/WikiConference Report',
 'Wikipedia:Wikipedia Signpost/2015-10-14/WikiConference report',
 'Wikipedia:Wikipedia Signpost/2015-10-21/Arbitration report',
 'Wikipedia:Wikipedia Signpost/2015-10-21/Editorial',
 'Wikipedia:Wikipedia Signpost/2015-10-21/Featured content',
 'Wikipedia:Wikipedia Signpost/2015-10-21/In the media',
 'Wikipedia:Wiki

In [6]:
targets = [article for article in get_all_articles("Wikipedia Signpost/2015-10-07/Op-ed") if '2016' not in article]
all_views = tabulate(targets)

In [7]:
all_views

{'Wikipedia:Wikipedia Signpost/2015-10-07/Op-ed': [16,
  3,
  2,
  8,
  382,
  663,
  363,
  270,
  180,
  169,
  172,
  117,
  87,
  22,
  29],
 'Wikipedia:Wikipedia Signpost/2015-10-07/Technology report': [6,
  11,
  8,
  3,
  104,
  174,
  125,
  111,
  113,
  104,
  91,
  55,
  15,
  14,
  14],
 'Wikipedia:Wikipedia Signpost/2015-10-07/Traffic report': [7,
  32,
  5,
  3,
  203,
  233,
  148,
  145,
  146,
  108,
  102,
  77,
  30,
  28,
  14],
 'Wikipedia:Wikipedia Signpost/2015-10-14/Blog': [28,
  154,
  170,
  122,
  84,
  88,
  93,
  86,
  72,
  6,
  11,
  18],
 'Wikipedia:Wikipedia Signpost/2015-10-14/Editorial': [52,
  355,
  268,
  151,
  109,
  108,
  111,
  88,
  79,
  19,
  39,
  16],
 'Wikipedia:Wikipedia Signpost/2015-10-14/Featured content': [40,
  40,
  17,
  23,
  175,
  203,
  141,
  116,
  98,
  96,
  82,
  77,
  11,
  14,
  22],
 'Wikipedia:Wikipedia Signpost/2015-10-14/Gallery': [33, 1, 12, 1, 2],
 'Wikipedia:Wikipedia Signpost/2015-10-14/News and notes': [22,
  

In [19]:
pd.set_option('display.max_rows', None)
frame = DataFrame([all_views[key] for key in sorted(all_views.keys())], index=sorted(all_views.keys()))
frame

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
Wikipedia:Wikipedia Signpost/2015-10-07/Op-ed,16,3.0,2.0,8.0,382.0,663.0,363.0,270.0,180.0,169.0,172.0,117.0,87.0,22.0,29.0
Wikipedia:Wikipedia Signpost/2015-10-07/Technology report,6,11.0,8.0,3.0,104.0,174.0,125.0,111.0,113.0,104.0,91.0,55.0,15.0,14.0,14.0
Wikipedia:Wikipedia Signpost/2015-10-07/Traffic report,7,32.0,5.0,3.0,203.0,233.0,148.0,145.0,146.0,108.0,102.0,77.0,30.0,28.0,14.0
Wikipedia:Wikipedia Signpost/2015-10-14/Blog,28,154.0,170.0,122.0,84.0,88.0,93.0,86.0,72.0,6.0,11.0,18.0,,,
Wikipedia:Wikipedia Signpost/2015-10-14/Editorial,52,355.0,268.0,151.0,109.0,108.0,111.0,88.0,79.0,19.0,39.0,16.0,,,
Wikipedia:Wikipedia Signpost/2015-10-14/Featured content,40,40.0,17.0,23.0,175.0,203.0,141.0,116.0,98.0,96.0,82.0,77.0,11.0,14.0,22.0
Wikipedia:Wikipedia Signpost/2015-10-14/Gallery,33,1.0,12.0,1.0,2.0,,,,,,,,,,
Wikipedia:Wikipedia Signpost/2015-10-14/News and notes,22,13.0,19.0,27.0,336.0,262.0,167.0,104.0,106.0,112.0,103.0,82.0,10.0,11.0,36.0
Wikipedia:Wikipedia Signpost/2015-10-14/Op-ed,17,4.0,10.0,10.0,175.0,241.0,147.0,101.0,112.0,103.0,89.0,78.0,10.0,14.0,23.0
Wikipedia:Wikipedia Signpost/2015-10-14/Technology report,8,1.0,9.0,5.0,114.0,155.0,121.0,86.0,88.0,100.0,81.0,69.0,4.0,12.0,17.0


In [15]:
article_viewcounts("Wikipedia:Wikipedia Signpost/2015-12-23/Technology report")

[7, 2, 10]

In [16]:
viewcounts("Wikipedia:Wikipedia Signpost/2015-12-23/Technology report")

[7, 2, 10]

In [20]:
article_viewcounts("Wikipedia:Wikipedia Signpost/2015-12-02/Op-ed")

[326, 1388, 654, 482, 350, 239, 209, 137, 145, 71, 40]