In [301]:
# -*- coding: utf-8 -*-
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 20)


import numpy as np

from scipy import stats


from bokeh.io import show, output_file, export_png
from bokeh.plotting import figure

 
# use creds to create a client to interact with the Google Drive API



scope = ['https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('client.json', scope)
client = gspread.authorize(creds)

from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.transform import factor_cmap
from bokeh.palettes import Category20b
from bokeh.models import LabelSet
import math

output_notebook()

In [311]:
print("The following sheets are available")
for sheet in client.openall():
    print("{} - {}".format(sheet.title, sheet.id))


spreadsheet = client.open_by_key("1Dt2M32K0WDfhLCAadtp1uEVW0fLG5htNu_G2tK_OQdU")
sheetFinalResults = spreadsheet.get_worksheet(4)

The following sheets are available
SLR - 1Dt2M32K0WDfhLCAadtp1uEVW0fLG5htNu_G2tK_OQdU


In [312]:
df = pd.DataFrame(sheetFinalResults.get_all_records())

In [313]:
df.head()

Unnamed: 0,Application domain,Authors,Conference or journal name,DOI,Detailed classification,First level classification,Generative approach? (applicable to those solutions that need a pre-configuration),Keywords,Method,Source (name of publication),Stage at which the variation / customization / personalization is performed,Support provided by a software solution?,Target of the variability process,Tested in real-world scenario?,The solution outcome is a fully functional dashboard?,Title,"Type of publication (Journal, Book, Conference proceedings, etc)",Variability factors (factors that affect the dashboard composition),Year
0,Energy monitoring,Filonik D.;Medland R.;Foth M.;Rittenbruch M.,Lecture Notes in Computer Science (including s...,10.1007/978-3-642-37157-8_8,Customizable (user-driven),Customizable,,energy monitoring;environmental sustainability...,Configuration wizard,Lecture Notes in Computer Science (including s...,User-configuration,Yes,"Displayed data, visualization type, layout",Yes,Yes,A customisable dashboard display for environme...,Conference Paper,User preferences,2013
1,Microservices monitoring,Mayer B.;Weinreich R.,2017 IEEE International Conference on Software...,10.1109/ICSAW.2017.44,Customizable (user-driven),Customizable,,Microservices;microservice monitoring;microser...,Configuration wizard,2017 IEEE International Conference on Software...,User-configuration,Yes,"Displayed data, visualization type, layout",No,Yes,A Dashboard for Microservice Monitoring and Ma...,Conference Paper,User preferences,2017
2,Emergency management,Nascimento B. S.; Vivacqua A. S.;Borges M. R. S.,"2016 IEEE International Conference on Systems,...",10.1109/SMC.2016.7844746,Assisted customization,Customizable w/ system support,,Emergency management;information visualization...,"Configuration wizard, visual mapping","2016 IEEE International Conference on Systems,...",User-configuration,Yes,"Displayed data, visualization type, layout",Yes,Yes,A flexible architecture for selection and visu...,Conference Paper,User preferences,2016
3,BI,Noonpakdee W.;Khunkornsiri T.;Phothichai A.;Da...,2018 5th International Conference on Industria...,10.1109/IEA.2018.8387148,Customizable (designer-driven and user-driven),Customizable,Partial,Dashboard design;Bussiness Intelligence;Small ...,Pre-defined templates,2018 5th International Conference on Industria...,Pre-configuration,Yes,"Displayed data, visualization type, layout",Yes,Yes,A framework for analyzing and developing dashb...,Conference Paper,"User preferences, guidelines",2018
4,Services monitoring,Kumar K.;Bose J.;Soni S. K.,2017 14th IEEE India Council International Con...,10.1109/INDICON.2017.8487236,Customizable (user-driven),Customizable,,Data analytics;Mobile applications analytics;s...,Configuration files,2017 14th IEEE India Council International Con...,User-configuration,Yes,"Displayed data, visualization type, layout",No,Yes,A Generic Visualization Framework based on a D...,Conference Paper,User preferences,2017


In [314]:
print("Number of papers resulting the SLR: ", len(df))

Number of papers resulting the SLR:  25


In [315]:
keywords = df["Keywords"].dropna().astype('str').values
keywords_list = []
for keyword in keywords:
    [keywords_list.append(x.strip().lower()) for x in keyword.split(';')]
    
keywords_list = [x for x in keywords_list if x != '']
dfKeywords = pd.DataFrame(keywords_list)

In [316]:
dfKeywords

Unnamed: 0,0
0,energy monitoring
1,environmental sustainability
2,persuasive technology
3,domestic environments
4,households
5,urban informatics
6,microservices
7,microservice monitoring
8,microservice management
9,microservice dashboard


In [317]:
dfKeywords.columns = ['Term']

In [318]:
resultsKeywords = pd.DataFrame(dfKeywords['Term'].value_counts())
resultsKeywords

Unnamed: 0,Term
data visualization,3
dashboards,3
dashboard,3
monitoring,2
multi-agent systems,2
business processes,2
model-driven development,2
on-line analytical processing,2
internet of things,1
olap personalization,1


In [319]:
resultsKeywords.reset_index(inplace=True)
resultsKeywords.columns = ['Term', 'Count']

resultsKeywords

Unnamed: 0,Term,Count
0,data visualization,3
1,dashboards,3
2,dashboard,3
3,monitoring,2
4,multi-agent systems,2
5,business processes,2
6,model-driven development,2
7,on-line analytical processing,2
8,internet of things,1
9,olap personalization,1


In [320]:
# output_file('vbar.html')

p = figure(x_range=resultsKeywords['Term'].tolist(), plot_width=1250, toolbar_location=None, title="Keywords Counts")
p.vbar(x=resultsKeywords['Term'].tolist(), top=resultsKeywords['Count'].tolist(), width=0.3, color='#4BACC5')
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 9
p.xaxis.major_label_orientation = math.pi/2

show(p)

In [321]:
authors = df["Authors"].values
authors_list = []
for author in authors:
    [authors_list.append(x.strip()) for x in author.split(';')]
    
authors_list = [x for x in authors_list if x != '']

dfAuthors = pd.DataFrame(authors_list)
dfAuthors.columns = ['Name']

In [322]:
resultsAuthors = pd.DataFrame(dfAuthors['Name'].value_counts())
resultsAuthors.reset_index(inplace=True)
resultsAuthors.columns = ['Name', 'Count']

resultsAuthors.sort_values(['Count', 'Name'], ascending=[False, True], inplace=True)
resultsAuthors

Unnamed: 0,Name,Count
2,Barros R.,2
1,Belo O.,2
4,Correia H.,2
7,García-Peñalvo F. J.,2
6,Kintz M.,2
3,Rodrigues P.,2
5,Therón R.,2
0,Vázquez-Ingelmo A.,2
16,Arjun S.,1
39,Bederson B. B.,1


In [323]:
p = figure(x_range=resultsAuthors['Name'].tolist(), plot_width=990, toolbar_location=None, title="Authors Counts")
p.vbar(x=resultsAuthors['Name'].tolist(), top=resultsAuthors['Count'].tolist(), width=0.3, color='#4BACC5')
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 4
p.xaxis.major_label_orientation = math.pi/2

show(p)

In [324]:
resultsYears = pd.DataFrame(df['Year'].value_counts())
resultsYears.reset_index(inplace=True)
resultsYears.columns = ['Year', 'Count']
resultsYears.sort_values(['Year', 'Count'], ascending=[False, False], inplace=True)
resultsYears

Unnamed: 0,Year,Count
0,2018,8
1,2017,6
3,2016,3
2,2014,3
5,2013,1
6,2012,1
4,2011,2
7,2007,1


In [325]:
x = resultsYears['Year'].astype('str').tolist() + ['2015', '2010', '2009', '2008']
x.sort()
p = figure(x_range=x, plot_width=600, plot_height=400, toolbar_location=None, title="Papers per year")
p.vbar(x=resultsYears['Year'].astype('str').tolist(), top=resultsYears['Count'].astype('int').tolist(), color='#4BACC5', width=0.9)
p.xgrid.grid_line_color = None
p.yaxis[0].ticker.desired_num_ticks = 10
p.y_range.start = 0

show(p)

In [326]:
resultsType = pd.DataFrame(df['Type of publication (Journal, Book, Conference proceedings, etc)'].value_counts())
resultsType.reset_index(inplace=True)
resultsType.columns = ['Type', 'Count']
resultsType.sort_values(['Type', 'Count'], ascending=[False, False], inplace=True)
resultsType

Unnamed: 0,Type,Count
0,Conference Paper,21
1,Article,4


In [327]:
p = figure(x_range=resultsType['Type'].tolist(), plot_width=600, plot_height=600, title="Publication type")
p.vbar(x=resultsType['Type'].tolist(), top=resultsType['Count'].tolist(), width=0.9, color='#4BACC5')
p.xgrid.grid_line_color = None
p.yaxis[0].ticker.desired_num_ticks = 10
p.y_range.start = 0

show(p)

In [328]:
variability = df["Variability factors (factors that affect the dashboard composition)"].values
variability_list = []
for v in variability:
    [variability_list.append(x.strip().lower().capitalize()) for x in v.split(',')]
    
variability_list = [x for x in variability_list if x != '']

dfVariability = pd.DataFrame(variability_list)
dfVariability.columns = ['Name']

resultsVariability = pd.DataFrame(dfVariability['Name'].value_counts())
resultsVariability.reset_index(inplace=True)
resultsVariability.columns = ['Name', 'Count']
resultsVariability.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
resultsVariability

Unnamed: 0,Name,Count
9,Analysis scenario,1
1,Business process,3
6,Data sources,2
2,Data structure,3
5,Goals,2
4,Guidelines,2
3,Usage profiles,2
7,User abilities,1
10,User description data structure,1
0,User preferences,16


In [329]:
p = figure(x_range=resultsVariability['Name'].astype('str').tolist(), plot_width=800, plot_height=500, title="Variability factors")
p.vbar(x=resultsVariability['Name'].astype('str').tolist(), top=resultsVariability['Count'].astype('int').tolist(), width=0.5, color='#4BACC5')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = math.pi/3.8
p.yaxis[0].ticker.desired_num_ticks = 12
p.y_range.start = 0

show(p)

In [330]:
resultsDomain = pd.DataFrame(df['Application domain'].value_counts())
resultsDomain.reset_index(inplace=True)
resultsDomain.columns = ['Name', 'Count']
resultsDomain.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
resultsDomain

Unnamed: 0,Name,Count
0,BI,8
11,Disaster situations,1
6,Economics,1
9,Emergency management,1
13,Energy monitoring,1
8,Generic,1
7,Interface evaluation,1
1,IoT,2
4,Learning Analytics,2
10,Microservices monitoring,1


In [331]:
p = figure(x_range=resultsDomain['Name'].astype('str').tolist(), plot_width=600, plot_height=500, title="Application domain")
p.vbar(x=resultsDomain['Name'].astype('str').tolist(), top=resultsDomain['Count'].astype('int').tolist(), width=0.5, color='#4BACC5')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = math.pi/4
p.xaxis.major_label_text_font_size = "9pt"
p.yaxis[0].ticker.desired_num_ticks = 12
p.y_range.start = 0

show(p)

In [332]:
method = df["Method"].values
method_list = []
for m in method:
    [method_list.append(x.strip().lower().capitalize()) for x in m.split(',')]
    
method_list = [x for x in method_list if x != '']

dfMethod = pd.DataFrame(method_list)
dfMethod.columns = ['Name']

resultsMethod = pd.DataFrame(dfMethod['Name'].value_counts())
resultsMethod.reset_index(inplace=True)
resultsMethod.columns = ['Name', 'Count']
resultsMethod.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
resultsMethod

Unnamed: 0,Name,Count
3,Agents,3
2,Configuration files,3
0,Configuration wizard,8
8,Context-sensitive generator,1
7,Inclusive user modelling,1
9,Indicator ontology,1
11,Knowledge graphs,1
5,Model driven,3
6,Pre-defined templates,2
10,Semantic reasoner,1


In [333]:
p = figure(x_range=resultsMethod['Name'].astype('str').tolist(), plot_width=600, plot_height=500, title="Variability methods")
p.vbar(x=resultsMethod['Name'].astype('str').tolist(), top=resultsMethod['Count'].astype('int').tolist(), width=0.5, color='#4BACC5')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = math.pi/4
p.yaxis[0].ticker.desired_num_ticks = 12
p.y_range.start = 0

show(p)

In [334]:
target = df["Target of the variability process"].values
target_list = []
for t in target:
    [target_list.append(x.strip().lower().capitalize()) for x in t.split(',')]
    
target_list = [x for x in target_list if x != '']

dfTarget = pd.DataFrame(target_list)
dfTarget.columns = ['Name']

resultsTarget = pd.DataFrame(dfTarget['Name'].value_counts())
resultsTarget.reset_index(inplace=True)
resultsTarget.columns = ['Name', 'Count']
resultsTarget.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
resultsTarget

Unnamed: 0,Name,Count
0,Displayed data,24
3,Functionalities,2
4,Interaction,2
2,Layout,22
5,Visual design,2
1,Visualization type,23


In [335]:
p = figure(x_range=resultsTarget['Name'].astype('str').tolist(), plot_width=600, plot_height=500, title="Target of the variability process")
p.vbar(x=resultsTarget['Name'].astype('str').tolist(), top=resultsTarget['Count'].astype('int').tolist(), width=0.5, color='#4BACC5')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = math.pi/4
p.xaxis.major_label_text_font_size = "10pt"
p.yaxis[0].ticker.desired_num_ticks = 12
p.y_range.start = 0

show(p)

In [336]:
stage = df["Stage at which the variation / customization / personalization is performed"].values
stage_list = []
for s in stage:
    [stage_list.append(x.strip().lower().capitalize()) for x in s.split(',')]
    
stage_list = [x for x in stage_list if x != '']

dfStage = pd.DataFrame(stage_list)
dfStage.columns = ['Name']

resultsStage = pd.DataFrame(dfStage['Name'].value_counts())
resultsStage.reset_index(inplace=True)
resultsStage.columns = ['Name', 'Count']
resultsStage.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
resultsStage

Unnamed: 0,Name,Count
3,Compile-time,1
0,Pre-configuration,10
2,Run-time,7
1,User-configuration,8


In [337]:
p = figure(x_range=resultsStage['Name'].astype('str').tolist(), plot_width=600, plot_height=500, title="Stage at which the variability process is applied")
p.vbar(x=resultsStage['Name'].astype('str').tolist(), top=resultsStage['Count'].astype('int').tolist(), width=0.5, color='#4BACC5')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = math.pi/4
p.xaxis.major_label_text_font_size = "12pt"
p.yaxis[0].ticker.desired_num_ticks = 12
p.y_range.start = 0

export_png(p, filename="Figure 4.png")
show(p)

In [338]:
resultsTest = pd.DataFrame(df['Tested in real-world scenario?'].value_counts())
resultsTest.reset_index(inplace=True)
resultsTest.columns = ['Name', 'Count']
resultsTest.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
resultsTest

Unnamed: 0,Name,Count
1,No,7
2,Partial,5
0,Yes,13


In [339]:
p = figure(x_range=resultsTest['Name'].astype('str').tolist(), plot_width=600, plot_height=500, title="Solution tested in real-world scenario?")
p.vbar(x=resultsTest['Name'].astype('str').tolist(), top=resultsTest['Count'].astype('int').tolist(), width=0.5, color='#4BACC5')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = math.pi/4
p.xaxis.major_label_text_font_size = "12pt"
p.yaxis[0].ticker.desired_num_ticks = 12
p.y_range.start = 0

show(p)

In [340]:
resultsClassification = pd.DataFrame(df['First level classification'].value_counts())
resultsClassification.reset_index(inplace=True)
resultsClassification.columns = ['Name', 'Count']
resultsClassification.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
resultsClassification

Unnamed: 0,Name,Count
2,Adaptive,4
0,Customizable,11
3,Customizable w/ system support,3
4,Hybrid,2
1,Personalized,5


In [341]:
p = figure(x_range=resultsClassification['Name'].astype('str').tolist(), plot_width=600, plot_height=500, title="Solutions' classification")
p.vbar(x=resultsClassification['Name'].astype('str').tolist(), top=resultsClassification['Count'].astype('int').tolist(), width=0.5, color='#4BACC5')
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = math.pi/4
p.yaxis[0].ticker.desired_num_ticks = 12
p.y_range.start = 0

show(p)