In [1]:
# -*- coding: utf-8 -*-
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 20)

import numpy as np

from scipy import stats

from bokeh.io import show, output_file, export_png
from bokeh.plotting import figure
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.transform import factor_cmap
from bokeh.palettes import Category20b
from bokeh.models import LabelSet
import math

output_notebook()



In [2]:
sheet_id = "1R-8IiE7ZjDWMQ2olysYYPChvw-aarJzs"
sheet_name = "Final_papers"
url = "https://docs.google.com/spreadsheets/d/{0}/gviz/tq?tqx=out:csv&sheet={1}".format(sheet_id, sheet_name)
df = pd.read_csv(url)

In [3]:
df.head()

Unnamed: 0,Title,Authors,Year,DOI,"Type of publication (Journal, Book, Conference proceedings, etc)",Source (name of publication),Keywords,Variability factors (factors that affect the dashboard composition),Application domain,Method,...,Generative approach? (applicable to those solutions that need a pre-configuration),First level classification,Detailed classification,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25
0,Integrated model-driven dashboard development,"Palpanas, T.; Chowdhary, P.; Mihaila, G.; Pine...",2007,10.1007/s10796-007-9032-9,Article,Information Systems Frontiers,"Model-driven development, Dashboard, Business ...",Business process,BI,Model Driven,...,Yes,Personalized,Personalized (model-driven),,,,,,,
1,Exploration views: Understanding dashboard cre...,"Elias, M.; Bezerianos, A.",2011,10.1007/978-3-642-23768-3_23,Conference Paper,Lecture Notes in Computer Science (including s...,"synchronized views, interface customization, n...",User preferences,BI,Configuration wizard + visual mapping,...,,Customizable w/ system support,Assisted customization + personalized recommen...,,,,,,,
2,The TDAQ Analytics Dashboard: a real-time web ...,"Miotto, G.L.; Magnoni, L.; Sloper, J.E.",2011,10.1088/1742-6596/331/2/022019,Conference Paper,INTERNATIONAL CONFERENCE ON COMPUTING IN HIGH ...,-,User preferences,Physics,Configuration wizard,...,,Customizable,Customizable (user-driven),,,,,,,
3,A Semantic Dashboard Description Language for ...,"Kintz, M.",2012,,Conference Paper,CEUR Workshop Proceedings,"Dashboards, business processes, monitoring, co...","Business process, goals",BI,Model Driven,...,Yes,Personalized,Personalized (goal-driven) (with customization...,,,,,,,
4,A customisable dashboard display for environme...,"Filonik D., Medland R., Foth M., Rittenbruch M.",2013,10.1007/978-3-642-37157-8_8,Conference Paper,Lecture Notes in Computer Science (including s...,"energy monitoring, environmental sustainabilit...",User preferences,Energy monitoring,Configuration wizard,...,,Customizable,Customizable (user-driven),,,,,,,


In [4]:
print("Number of papers resulting the SLR: ", len(df))

Number of papers resulting the SLR:  30


In [5]:
keywords = df["Keywords"].dropna().astype('str').values
keywords_list = []
for keyword in keywords:
    [keywords_list.append(x.strip().lower()) for x in keyword.split(';')]
    
keywords_list = [x for x in keywords_list if x != '']
dfKeywords = pd.DataFrame(keywords_list)

In [6]:
dfKeywords

Unnamed: 0,0
0,"model-driven development, dashboard, business ..."
1,"synchronized views, interface customization, n..."
2,-
3,"dashboards, business processes, monitoring, co..."
4,"energy monitoring, environmental sustainabilit..."
5,"internet of things, device cloud, remote user ..."
6,"on-line analytical processing, adaptive dashbo..."
7,"variability, data visualisation, sensors, mode..."
8,"emergency management, information visualizatio..."
9,"web apis, semantic annotation, monitoring envi..."


In [7]:
dfKeywords.columns = ['Term']

In [8]:
resultsKeywords = pd.DataFrame(dfKeywords['Term'].value_counts())
resultsKeywords

Unnamed: 0,Term
-,4
data analytics,1
star schema,1
"internet of things, device cloud, remote user interface, device dashboard, tr-069",1
"aesthetics, dashboard, generator, usability guidelines, user testing",1
semantic web of things,1
interactive data summarization and exploration,1
"model-driven development, dashboard, business performance management",1
"on-line analytical processing, adaptive dashboards systems, olap personalization, private data clouds, and multi-agent systems",1
"monitoring dashboard, dashboard generation, cloud monitoring, sos monitoring.",1


In [9]:
resultsKeywords.reset_index(inplace=True)
resultsKeywords.columns = ['Term', 'Count']

resultsKeywords

Unnamed: 0,Term,Count
0,-,4
1,data analytics,1
2,star schema,1
3,"internet of things, device cloud, remote user ...",1
4,"aesthetics, dashboard, generator, usability gu...",1
5,semantic web of things,1
6,interactive data summarization and exploration,1
7,"model-driven development, dashboard, business ...",1
8,"on-line analytical processing, adaptive dashbo...",1
9,"monitoring dashboard, dashboard generation, cl...",1


In [10]:
# output_file('vbar.html')

p = figure(x_range=resultsKeywords['Term'].tolist(), plot_width=1250, toolbar_location=None, title="Keywords Counts")
p.vbar(x=resultsKeywords['Term'].tolist(), top=resultsKeywords['Count'].tolist(), width=0.3, color='#4BACC5')
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 9
p.xaxis.major_label_orientation = math.pi/2

show(p)

In [11]:
authors = df["Authors"].values
authors_list = []
for author in authors:
    [authors_list.append(x.strip()) for x in author.split(';')]
    
authors_list = [x for x in authors_list if x != '']

dfAuthors = pd.DataFrame(authors_list)
dfAuthors.columns = ['Name']

In [12]:
resultsAuthors = pd.DataFrame(dfAuthors['Name'].value_counts())
resultsAuthors.reset_index(inplace=True)
resultsAuthors.columns = ['Name', 'Count']
resultsAuthors

Unnamed: 0,Name,Count
0,"Vázquez-Ingelmo, A.",2
1,"García-Peñalvo, F.J.",2
2,"Theron, R.",2
3,"Van Hoecke, S.",2
4,"Kintz, M.",2
5,"Noonpakdee, W.",1
6,"de Walle, R.V.",1
7,R. Weinreich,1
8,"Riveill, M.",1
9,"Elias, M.",1


In [13]:
p = figure(x_range=resultsAuthors['Name'].tolist(), plot_width=990, toolbar_location=None, title="Authors Counts")
p.vbar(x=resultsAuthors['Name'].tolist(), top=resultsAuthors['Count'].tolist(), width=0.3, color='#900E3F')
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 4
p.xaxis.major_label_orientation = math.pi/2

show(p)

In [14]:
resultsYears = pd.DataFrame(df['Year'].value_counts())
resultsYears.reset_index(inplace=True)
resultsYears.columns = ['Year', 'Count']
resultsYears.sort_values(['Year', 'Count'], ascending=[False, False], inplace=True)
resultsYears

Unnamed: 0,Year,Count
2,2021,4
4,2020,3
6,2019,2
1,2018,5
0,2017,6
7,2016,2
3,2014,3
8,2013,1
9,2012,1
5,2011,2


In [15]:
x = resultsYears['Year'].astype('str').tolist() + ['2015', '2010', '2009', '2008']
x.sort()
p = figure(x_range=x, plot_width=600, plot_height=400, toolbar_location=None, title="Papers per year")
p.vbar(x=resultsYears['Year'].astype('str').tolist(), top=resultsYears['Count'].astype('int').tolist(), color='#900E3F', width=0.9)
p.xgrid.grid_line_color = None
p.yaxis[0].ticker.desired_num_ticks = 7
p.y_range.start = 0
p.y_range.end = 6.5

show(p)

In [16]:
resultsType = pd.DataFrame(df['Type of publication (Journal, Book, Conference proceedings, etc)'].value_counts())
resultsType.reset_index(inplace=True)
resultsType.columns = ['Type', 'Count']
resultsType.sort_values(['Type', 'Count'], ascending=[False, False], inplace=True)
resultsType

Unnamed: 0,Type,Count
0,Conference Paper,21
1,Article,9


In [17]:
p = figure(x_range=resultsType['Type'].tolist(), plot_width=600, plot_height=600, title="Publication type")
p.vbar(x=resultsType['Type'].tolist(), top=resultsType['Count'].tolist(), width=0.9, color='#900E3F')
p.xgrid.grid_line_color = None
p.yaxis[0].ticker.desired_num_ticks = 10
p.y_range.start = 0

show(p)

In [18]:
variability = df["Variability factors (factors that affect the dashboard composition)"].values
variability_list = []
for v in variability:
    print(v)
    [variability_list.append(x.strip().lower().capitalize()) for x in v.split(',')]
    
variability_list = [x for x in variability_list if x != '']

dfVariability = pd.DataFrame(variability_list)
dfVariability.columns = ['Name']

resultsVariability = pd.DataFrame(dfVariability['Name'].value_counts())
resultsVariability.reset_index(inplace=True)
resultsVariability.columns = ['Name', 'Count']
resultsVariability.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
resultsVariability

Business process
User preferences
User preferences
Business process, goals
User preferences
Data source
Usage profiles
User preferences
User preferences
User profile
User preferences
User preferences
User role
Data structure, user preferences
User preferences
User profile, data structure, analysis scenario
User preferences, guidelines
User preferences
Data structure, user preferences
User abilities
Data structure
User preferences
User preferences
User preferences
User preferences
User preferences
Data structure
Data structure, user preferences
User preferences, guidelines
User preferences


Unnamed: 0,Name,Count
7,Analysis scenario,1
3,Business process,2
8,Data source,1
1,Data structure,6
9,Goals,1
2,Guidelines,2
5,Usage profiles,1
6,User abilities,1
0,User preferences,20
4,User profile,2


In [19]:
p = figure(x_range=resultsVariability['Name'].astype('str').tolist(), plot_width=800, plot_height=500, title="Variability factors")
p.vbar(x=resultsVariability['Name'].astype('str').tolist(), top=resultsVariability['Count'].astype('int').tolist(), width=0.5, color='#900E3F')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = math.pi/3.8
p.yaxis[0].ticker.desired_num_ticks = 12
p.y_range.start = 0

show(p)

In [20]:
resultsDomain = pd.DataFrame(df['Application domain'].value_counts())
resultsDomain.reset_index(inplace=True)
resultsDomain.columns = ['Name', 'Count']
resultsDomain.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
resultsDomain

Unnamed: 0,Name,Count
0,BI,9
11,Communication,1
7,Disaster situations,1
12,Economics,1
6,Emergency management,1
9,Energy monitoring,1
3,Generic,2
10,Interface evaluation,1
1,IoT,4
4,Learning Analytics,2


In [21]:
p = figure(x_range=resultsDomain['Name'].astype('str').tolist(), plot_width=600, plot_height=500, title="Application domain")
p.vbar(x=resultsDomain['Name'].astype('str').tolist(), top=resultsDomain['Count'].astype('int').tolist(), width=0.5, color='#900E3F')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = math.pi/4
p.xaxis.major_label_text_font_size = "9pt"
p.yaxis[0].ticker.desired_num_ticks = 12
p.y_range.start = 0

show(p)

In [22]:
method = df["Method"].values
method_list = []
for m in method:
    [method_list.append(x.strip().lower().capitalize()) for x in m.split(',')]
    
method_list = [x for x in method_list if x != '']

dfMethod = pd.DataFrame(method_list)
dfMethod.columns = ['Name']

resultsMethod = pd.DataFrame(dfMethod['Name'].value_counts())
resultsMethod.reset_index(inplace=True)
resultsMethod.columns = ['Name', 'Count']
resultsMethod.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
resultsMethod

Unnamed: 0,Name,Count
5,Agents,2
2,Configuration files,4
0,Configuration wizard,6
3,Configuration wizard + visual mapping,4
9,Generator with models as input,1
8,Inclusive user modelling,1
12,Knowledge graphs and indicator ontology,1
10,Machine learning,1
1,Model driven,5
11,Pre-defined templates,1


In [23]:
p = figure(x_range=resultsMethod['Name'].astype('str').tolist(), plot_width=600, plot_height=500, title="Variability methods")
p.vbar(x=resultsMethod['Name'].astype('str').tolist(), top=resultsMethod['Count'].astype('int').tolist(), width=0.5, color='#900E3F')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = math.pi/4
p.yaxis[0].ticker.desired_num_ticks = 12
p.y_range.start = 0

show(p)

In [24]:
target = df["Target of the variability process"].values
target_list = []
for t in target:
    [target_list.append(x.strip().lower().capitalize()) for x in t.split(',')]
    
target_list = [x for x in target_list if x != '']

dfTarget = pd.DataFrame(target_list)
dfTarget.columns = ['Name']

resultsTarget = pd.DataFrame(dfTarget['Name'].value_counts())
resultsTarget.reset_index(inplace=True)
resultsTarget.columns = ['Name', 'Count']
resultsTarget.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
resultsTarget

Unnamed: 0,Name,Count
2,Data sources,13
4,Functionality,1
3,Kpis,13
1,Layout,20
5,Visual design,1
0,Visualization components,27


In [25]:
p = figure(x_range=resultsTarget['Name'].astype('str').tolist(), plot_width=600, plot_height=500, title="Target of the variability process")
p.vbar(x=resultsTarget['Name'].astype('str').tolist(), top=resultsTarget['Count'].astype('int').tolist(), width=0.5, color='#900E3F')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = math.pi/4
p.xaxis.major_label_text_font_size = "10pt"
p.yaxis[0].ticker.desired_num_ticks = 12
p.y_range.start = 0

show(p)

In [26]:
stage = df["Stage at which the variation / customization / personalization is performed"].values
stage_list = []
for s in stage:
    [stage_list.append(x.strip().lower().capitalize()) for x in s.split(',')]
    
stage_list = [x for x in stage_list if x != '']

dfStage = pd.DataFrame(stage_list)
dfStage.columns = ['Name']

resultsStage = pd.DataFrame(dfStage['Name'].value_counts())
resultsStage.reset_index(inplace=True)
resultsStage.columns = ['Name', 'Count']
resultsStage.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
resultsStage

Unnamed: 0,Name,Count
3,Compile-time,1
0,Pre-configuration,13
2,Run-time,8
1,User-configuration,9


In [27]:
p = figure(x_range=resultsStage['Name'].astype('str').tolist(), plot_width=600, plot_height=500, title="Stage at which the variability process is applied")
p.vbar(x=resultsStage['Name'].astype('str').tolist(), top=resultsStage['Count'].astype('int').tolist(), width=0.5, color='#900E3F')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = math.pi/4
p.xaxis.major_label_text_font_size = "12pt"
p.yaxis[0].ticker.desired_num_ticks = 12
p.y_range.start = 0

show(p)

In [28]:
resultsTest = pd.DataFrame(df['Tested in real-world scenario?'].value_counts())
resultsTest.reset_index(inplace=True)
resultsTest.columns = ['Name', 'Count']
resultsTest.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
resultsTest

Unnamed: 0,Name,Count
2,No,6
1,Partial,11
0,Yes,13


In [29]:
p = figure(x_range=resultsTest['Name'].astype('str').tolist(), plot_width=600, plot_height=500, title="Solution tested in real-world scenario?")
p.vbar(x=resultsTest['Name'].astype('str').tolist(), top=resultsTest['Count'].astype('int').tolist(), width=0.5, color='#900E3F')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = math.pi/4
p.xaxis.major_label_text_font_size = "12pt"
p.yaxis[0].ticker.desired_num_ticks = 12
p.y_range.start = 0

show(p)

In [30]:
resultsClassification = pd.DataFrame(df['First level classification'].value_counts())
resultsClassification.reset_index(inplace=True)
resultsClassification.columns = ['Name', 'Count']
resultsClassification.sort_values(['Count', 'Name'], ascending=[False, True], inplace=True)
resultsClassification

Unnamed: 0,Name,Count
0,Customizable,14
1,Customizable w/ system support,5
2,Hybrid,4
3,Personalized,4
4,Adaptive,3


In [31]:
p = figure(x_range=resultsClassification['Name'].astype('str').tolist(), plot_height=500, plot_width=800, title="Solutions' classification")
p.vbar(x=resultsClassification['Name'].astype('str').tolist(), top=resultsClassification['Count'].astype('int').tolist(), width=0.9, color='#900E3F')
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 15
p.x_range.range_padding = 0.1

show(p)