## Wikipedia data set

In [1]:
#Imports 
import pandas as pd
import math

#Visualization imports
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.core.properties import value
from bokeh.models.widgets import Panel, Tabs
from bokeh.io import show, output_notebook

output_notebook()

## Social list

In [2]:
topic = 'social'
metric = 'month_mutual_reverts'
social_list = list(set(pd.read_csv('./results/'+topic+'/'+metric+'.csv', index_col=0).index))
social_list

['Social_innovation',
 'Right_to_Internet_access',
 'Disruptive_innovation',
 'Digital_Single_Market',
 'Intellectual_property',
 'Information_ethics',
 'Freedom_of_information_laws_by_country',
 'Social_inequality',
 'Price_discrimination',
 'Polarization_(politics)',
 'Digital_commons_(economics)',
 'Decentralization',
 'Circular_economy',
 'Net_neutrality',
 'Sharing_economy',
 'Internet_safety',
 'Reputation_system',
 'User-centered_design',
 'Scalability',
 'Discrimination',
 'User_experience',
 'Digital_identity',
 'Resilience_(network)',
 'Quality_of_experience',
 'Green_computing',
 'E-commerce',
 'Privacy-enhancing_technologies',
 'Digital_divide',
 'Robot_ethics',
 'Decision-making',
 'Level_playing_field',
 'E-democracy',
 'Immersive_technology',
 'Data_ownership',
 'Secure_by_design',
 'Algorithmic_regulation',
 'Cybercrime',
 'Usability',
 'Living_lab',
 'Right_to_be_forgotten',
 'Open_data',
 'Tamper_resistance',
 'Peer-to-peer',
 'Information_explosion',
 'Psychological_

## Technology list

In [3]:
topic = 'tech'
metric = 'month_mutual_reverts'
tech_list = list(set(pd.read_csv('./results/'+topic+'/'+metric+'.csv', index_col=0).index))
tech_list

['Contextual_searching',
 'Semantic_analysis_(machine_learning)',
 'Google',
 'Identity_management_system',
 'Wireless',
 'Python_(programming_language)',
 'Amazon_(company)',
 'Real-time_Transport_Protocol',
 'Internet_protocol_suite',
 'Internet_traffic',
 'Nanotechnology',
 'Session_Initiation_Protocol',
 'ISCSI',
 'Automated_reasoning',
 'Telecommunication',
 'OpenFlow',
 'Internet_exchange_point',
 'Institute_of_Electrical_and_Electronics_Engineers',
 'Constrained_Application_Protocol',
 'Dynamic_spectrum_management',
 'Robust_Header_Compression',
 'PKCS',
 'Integrated_Services_Digital_Network',
 'Computer_vision',
 'Machine_translation',
 'OpenBSD',
 'Network_layer',
 'Encryption',
 'QUIC',
 'Mobile_data_offloading',
 'Quagga_(software)',
 'Virtual_private_network',
 'Cyber-physical_system',
 'Deep_web',
 'Industry_4.0',
 'Electronic_Product_Code',
 'GSM',
 'Facebook',
 'Cyberattack',
 'Peering',
 'Global_Positioning_System',
 'Open-source_hardware',
 'Network_service',
 'Malvert

#### Metrics availables
* **pageviews**: numer of times an article has been accessed during one month.

* **month_edits**: number of revisions of the article during a given month, i.e. number of times it has been edited. 

* **month_reverts**: number of reverts on the article during a given month, i.e. number of times a user has canceled the edit of another user.

* **month_mutual_reverts**: number of mutual reverts on the article during a given month, i.e. number of times two users have mutually canceled each other's edit.

* **month_users**: number of distinct users who edited the article during a given month, counting different IPs (anonymous editors) as different users.

* **month_users_reg**: number of distinct registered users who edited the article during a given month (excluding anonymous editors).

* **month_reverting_users**: number of distinct users who made some revert in the article during a given month.

* **month_mutual_reverting_users**: number of distinct users involved in some mutual revert in the article during a given month.

#### Languages availables
Language options: 
* **en**
* **de**
* **fr**
* **es**
* **it**
* **sv** 
* **nl**
* **pl**
* **ru**

There is no data from metric pageview in language 'pl' and 'ru' before july 2015.


### Data extraction

In [4]:
topic = 'social' #'tech'
metric = 'month_edits'
#metrics = ["pageviews", "month_edits", "month_reverts", "month_mutual_reverts", "month_users", "month_users_reg","month_reverting_users", "month_mutual_reverting_users"]

In [5]:
pd.read_csv('./results/'+topic+'/'+metric+'.csv', index_col=0).tail()

Unnamed: 0_level_0,name.1,lang,name_lang,200107,200108,200109,200110,200111,200112,200201,...,201710,201711,201712,201801,201802,201803,201804,201805,201806,201807
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Information_society,Information_society,ru,Информационное_общество,0,0,0,0,0,0,0,...,5,2,0,3,0,2,3,4,10,0
General_Data_Protection_Regulation,General_Data_Protection_Regulation,ru,Общий_регламент_по_защите_данных,0,0,0,0,0,0,0,...,0,0,0,7,0,1,2,4,11,27
Secure_Electronic_Transaction,Secure_Electronic_Transaction,ru,Secure_Electronic_Transaction,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
Social_inequality,Social_inequality,ru,Социальное_неравенство,0,0,0,0,0,0,0,...,3,0,0,1,0,2,1,0,0,0
Ubiquitous_computing,Ubiquitous_computing,ru,Повсеместные_вычисления,0,0,0,0,0,0,0,...,0,1,0,0,0,0,1,0,1,0


## Data Visualization


In [6]:
# Plot to compare different articles in a given language
color =["#e6194b","#3cb44b","#ffe119","#0082c8","#f58231","#911eb4","#46f0f0","#f032e6","#d2f53c","#fabebe","#008080","#e6beff","#aa6e28","#fffac8","#800000","#aaffc3","#808000","#ffd8b1","#000080","#808080","#FFFFFF","#000000","#1F77B4", "#B85A0D", '#878787', "#E377C2", '#66ccff', '#cc0066', '#cccc00', '#98DF8A', "#82853B","#92a8d1","#034f84","#f7cac9","#f7786b","#d5f4e6","#80ced6","#fefbd8","#618685","#ffef96","#50394c","#b2b2b2","#f4e1d2","#deeaee","#b1cbbb","#eea29a","#c94c4c","#3e4444","#82b74b","#405d27","#c1946a","#b9936c","#dac292",
"#e6e2d3","#c4b7a6","#6b5b95","#feb236","#d64161","#ff7b25","#eaece5","#b2c2bf","#c0ded9","#3b3a30","#c8c3cc","#563f46","#8ca3a3","#484f4f","#686256",
"#c1502e","#587e76","#a96e5b","#bccad6","#8d9db6","#667292","#f1e3dd","#cfe0e8","#b7d7e8","#87bdd8","#daebe8","#fbefcc","#f9ccac","#f4a688","#e0876a",
"#f9d5e5","#eeac99","#e06377","#c83349","#5b9aa0","#d6d4e0","#b8a9c9","#622569","#96ceb4","#ffeead","#ffcc5c","#ff6f69","#588c7e","#f2e394","#f2ae72","#d96459"]*5

def plot_metrics(arts, lang='en', log=False, topic='tech', metric='month_edits', last_x_months=1000):
    df = pd.read_csv('./results/'+topic+'/'+metric+'.csv', index_col=0)
    axis_type = 'linear'
    if(last_x_months==1000):
        last_x_months=len(df.columns)-3
    if (log):
        axis_type = 'log'
    df1 = pd.DataFrame(columns=df.columns)
    for art in arts:
        df1 = pd.concat([df1, df[(df.index ==art) & (df.lang==lang)]])
    df=df1.copy()
    for col in df.columns[len(df.columns)-last_x_months:]:
        df.rename(columns={col:col[0:4]+'_'+col[4:6]}, inplace = True)

    data = {'articles': list(df.columns[len(df.columns)-last_x_months:])}
    for i, row in df.iterrows():
        data[i] = list(row.values[len(df.columns)-last_x_months:])

    source = ColumnDataSource(data=data)

    p = figure(x_range=list(df.columns[len(df.columns)-last_x_months:]), plot_width = 970, title=topic + ' ' + metric + ' in \''+ lang + '\' language',  
               y_axis_type=axis_type)
    i=0
    for indx, row in df.iterrows():
        p.line(data['articles'], data[indx], color=color[i], legend=value(indx), line_width=1.5)
        #p.vbar(x=dodge('articles', pos[i], range=p.x_range), top=art, width=0.17, source=source, 
            #color=color[i], legend=value(art))
        i+=1
    p.xaxis.major_label_orientation = math.pi/3
    p.grid.grid_line_alpha=1
    p.x_range.range_padding = 0.01
    p.legend.location = "top_left"
    p.legend.click_policy="hide"
    
    tab = Panel(child=p, title=metric)
    return tab

def plot_multiple_metrics(arts, lang='en', log=False, topic='tech', last_x_months=1000):
    tabs=[]
    for metric in ["pageviews", "month_edits", "month_reverts", "month_mutual_reverts", "month_users", "month_users_reg","month_reverting_users", "month_mutual_reverting_users"]:
        tab=plot_metrics(arts, lang=lang, log=log, topic=topic, metric=metric, last_x_months=last_x_months)
        tabs.append(tab)
    tabs = Tabs(tabs=tabs)
    show(tabs)


In [10]:
#articles to visualize
#to visualize all the social articles: arts = social_list
#to visualize all the technological articles: arts = tech_list
#otherwise select the articles you want
#a few more predefined lists:  
selected_tech = ['Blockchain','Cryptocurrency', 'Ethereum', 'Artificial_intelligence', 'Algorithm',  'Machine_learning', 'Quantum_computing']
top_social = ['Fake_news', 'General_Data_Protection_Regulation', 'Net_neutrality', 'Cyber_sovereignty']
selected_social = ['Algorithmic_bias',  'Cyber_sovereignty', 'Data_Sovereignty', 'Data_literacy', 'Data_ownership', 'Fake_news','General_Data_Protection_Regulation', 'Green_computing', 'Hyperconnectivity', 'Information_privacy',  'Internet_privacy', 'Net_neutrality', 'Open-source_model', 'Privacy-enhancing_technologies',  'Psychological_effects_of_Internet_use', 'Right_to_Internet_access', 'Right_to_be_forgotten', 'Robot_ethics', 'Robot_tax', 'Secure_by_design', ]
filtered_social = ['Algorithmic_bias', 'Echo_chamber_(media)', 'Explainable_Artificial_Intelligence', 'Filter_bubble', 'Green_computing', 'Internet_governance', 'Internet_privacy', 'Open-source_model', 'Privacy-enhancing_technologies', 'Privacy_by_design', 'Psychological_effects_of_Internet_use', 'Right_to_Internet_access', 'Right_to_be_forgotten', 'Robot_ethics', 'Secure_by_design', 'Web_accessibility']

#chose predefined or personalized list of articles to be visualized
arts=top_social

######there is no data from metric pageview in language 'pl' and 'ru' before july 2015
# language options: ['en', 'de', 'fr', 'es', 'it', 'sv', 'nl', 'pl', 'ru']
lang = 'en'
# topic options: "tech", "social"
topic="social" 

#number of months to plot (if last_x_months=1000, plot all the available months)
last_x_months=36
plot_multiple_metrics(arts, lang=lang, log=False, topic=topic, last_x_months=last_x_months)