In [7]:
#!/usr/bin/python
# -*- coding: latin-1 -*-
"""
This notebook generates the statistics for all WLE contests.
"""
import os, sys, inspect
current_folder = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0]))
folder_parts = current_folder.split(os.sep)
pywikibot_folder = os.sep.join(folder_parts[0:-1])
if current_folder not in sys.path:
    sys.path.insert(0, current_folder)
if pywikibot_folder not in sys.path:
    sys.path.insert(0, pywikibot_folder)

import pywikibot as pb
from pywikibot import pagegenerators
from pywikibot.specialbots import UploadRobot
from StringIO import StringIO
import mwparserfromhell as mwh
from datetime import datetime
import pandas as pd
import numpy as np
from urllib import urlencode, urlopen
import json
from random import sample
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import io
import time
import csv
from itertools import groupby
from operator import itemgetter
from mako.template import Template

In [8]:
sns.set_style("darkgrid")
%matplotlib inline

In [60]:
YEARS                       = [2015, 2016, 2017]
BASE_WLE_NAME               = u"Commons:Wiki Loves Earth {0} in Spain/Log"

commons_site = pb.Site("commons", "commons")

In [61]:
images_df = pd.DataFrame(columns=['image_title', 'uploader', 'days_from_registration', 'year',
                                   'code', 'name', 'community', 'commons_cat', 
                                   'lat', 'long'])

In [62]:
images_df.head()

Unnamed: 0,image_title,uploader,days_from_registration,year,code,name,community,commons_cat,lat,long


In [64]:
for year in YEARS :
    log_page = BASE_WLE_NAME.format(year)
    list_page = pb.Page(commons_site, log_page)
    
    pb.output('Retrieving --> WLE {0} images list from cache'.format(year))
    list_page_text = StringIO(list_page.text[list_page.text.find('\n') + 1:list_page.text.rfind('\n')])
    _df = pd.read_csv(list_page_text,
                        sep=";",
                        index_col=False,
                        names=['image_title', 'uploader', 'days_from_registration', 
                                'timestamp', 'code', 'name', 'community', 'commons_cat', 
                                'lat', 'long'],
                        encoding='utf-8',
                        quoting=csv.QUOTE_NONE)
    _df["timestamp"] = pd.to_datetime(_df["timestamp"], format="%Y-%m-%d %H:%M:%S")
    _df.set_index(["timestamp"], inplace=True)
    _df['code'].fillna(u'', inplace=True)
    _df['name'].fillna(u'', inplace=True)
    _df['community'].fillna(u'', inplace=True)
    _df['lat'].fillna(0.0, inplace=True)
    _df['long'].fillna(0.0, inplace=True)
    del _df.index.name

    _df = _df.loc[(_df.index > '{0}-04-30 23:59:59'.format(year)) & (_df.index < '{0}-06-01 01:00:00'.format(year))]
    _df["year"] = year
    _df.head()
    images_df = pd.concat([images_df, _df], ignore_index=False)
    pb.output('Retrieved --> WLE {0} image list from cache'.format(year))

Retrieving --> WLE 2015 images list from cache
Retrieved --> WLE 2015 image list from cache
Retrieving --> WLE 2016 images list from cache
Retrieved --> WLE 2016 image list from cache
Retrieving --> WLE 2017 images list from cache
Retrieved --> WLE 2017 image list from cache


In [65]:
images_df.head()

Unnamed: 0,code,commons_cat,community,days_from_registration,image_title,lat,long,name,uploader,year
2015-05-03 12:36:37,ES6130002,Sierras Subbéticas Natural Park,Andalusia,28,"""Encina andante"". Parque Natural de la Subbéti...",37.425616,-4.314789,Sierra Subbética,Teckömo,2015
2015-05-09 12:54:58,ES1200001,Picos de Europa,Asturias,22,'Grasilla' (Pinguicula grandiflora) en la Send...,43.2428,-4.9133,Picos de Europa (Asturias),ROSUROB,2015
2015-05-13 18:00:59,ES4320037,Sierra de Gata,Extremadura,18,(1) ABEJA LIBANDO FLOR DEL NARANJO EN EL CORRA...,40.273333,-6.645556,Sierra de Gata,JESUS SANCHEZ RODRIGUEZ,2015
2015-05-13 18:01:00,ES4320037,Sierra de Gata,Extremadura,18,(2) ABEJA LIBANDO FLOR DEL NARANJO EN EL CORRA...,40.273333,-6.645556,Sierra de Gata,JESUS SANCHEZ RODRIGUEZ,2015
2015-05-06 19:25:36,ES7010033,Jandía Natural Park,Canary Islands,1995,0004395970-original.jpg,28.11601,-14.33852,Jandía,Tamara k,2015


In [184]:
columns = ['uploader']
columns.extend([str(year) for year in YEARS])
columns = ['total']

images_per_upload_df = pd.DataFrame(columns=columns)

ordered_uploaders = images_df.groupby(["uploader"])["image_title"].agg('count').reset_index(name='count').sort_values(['count'], ascending=False)["uploader"].tolist()
grouped = images_df.groupby(["uploader"])

for uploader in ordered_uploaders :
    group = grouped.get_group(uploader)
    yearly_grouped = group.groupby(["year"])
    _dict = {'uploader': uploader}
    _dict["total"] = 0
    for year in YEARS :
        _dict[str(year)] = 0
        try :
            image_sum = len(yearly_grouped.get_group(year))
            _dict[str(year)] = image_sum
            _dict["total"] += image_sum
        except Exception as e:
            pass
    _df = pd.DataFrame([_dict])
    images_per_upload_df = pd.concat([images_per_upload_df, _df])

template = u"""{| align=right
|[[File:WLE Austria Logo (transparent).svg|200px|link=]]
|-
| style="text-align:center; font-family:arial black; font-size:200%; color:grey" | {{LangSwitch| es=España|ca=Espanya|en=Spain}}&nbsp;&nbsp;&nbsp;
|}
This page shows the statistics of the '''Wiki Loves Earth in Spain''' contest in all its editions. Two different sections are provided: number of uploaded images per contributors, and number of uploaded images per [[:en:Site of Community Importance|sites of community importance]].

==Images per contributor==
The collapsible section below shows the contributors and what they have provided in the two editions of WLE.

{{Hidden|headerstyle=background:#ECECEC;|contentstyle=white;color:black|style=padding:2px |1=Show complete statistics.}}

{| class="wikitable sortable" style="width:60%; margin-left:20%; margin-right:20%; font-size:89%; margin-top:0.5em;"
|- valign="middle"
! style = "width: 25%;" | Author
% for year in years:
! style = "width: 10%\;" | Images uploaded in ${year}<br/><small>total (from a SCI)</small>
% endfor
! style = "width: 10%\;" | Images uploaded (total)
% for _, row in images_df.iterrows() :
|-
% if "flickr" in row["uploader"] :
| [${row["uploader"]}]
% else :
| {{u|${row["uploader"]}}}
% endif
% for year in years :
| align="center" | ${int(row[str(year)])}
% endfor
| align="center" | ${row["total"]}
% endfor
|}"""
vars = {
    "images_df": images_per_upload_df,
    "years": YEARS
}
t = Template(template)
_text = t.render(**vars)

In [185]:
images_per_upload_df.head()

Unnamed: 0,2015,2016,2017,total,uploader
0,594.0,0.0,0.0,594,https://www.flickr.com/people/10352740@N03 Tur...
0,0.0,482.0,54.0,536,Iagocasabiell
0,9.0,392.0,73.0,474,Discasto
0,244.0,0.0,123.0,367,Tanja 007
0,0.0,342.0,0.0,342,Efra33


In [186]:
_text

u'{| align=right\n|[[File:WLE Austria Logo (transparent).svg|200px|link=]]\n|-\n| style="text-align:center; font-family:arial black; font-size:200%; color:grey" | {{LangSwitch| es=Espa\xf1a|ca=Espanya|en=Spain}}&nbsp;&nbsp;&nbsp;\n|}\nThis page shows the statistics of the \'\'\'Wiki Loves Earth in Spain\'\'\' contest in all its editions. Two different sections are provided: number of uploaded images per contributors, and number of uploaded images per [[:en:Site of Community Importance|sites of community importance]].\n\n==Images per contributor==\nThe collapsible section below shows the contributors and what they have provided in the two editions of WLE.\n\n{{Hidden|headerstyle=background:#ECECEC;|contentstyle=white;color:black|style=padding:2px |1=Show complete statistics.}}\n\n{| class="wikitable sortable" style="width:60%; margin-left:20%; margin-right:20%; font-size:89%; margin-top:0.5em;"\n|- valign="middle"\n! style = "width: 25%;" | Author\n! style = "width: 10%\\;" | Images upl

In [187]:
_page = pb.Page(commons_site, u"User:Discasto/test1")
_page.text = _text
pb.output('Publishing --> WLE Statistics')
#print statisticts_text
_page.save(u"WLE Spain statistics")

Publishing --> WLE Statistics
Page [[User:Discasto/test1]] saved
