In [1]:
import requests
import datetime
import pandas as pd
import numpy as np
import random
from orkg import ORKG
from math import pi
from bokeh.models import ColumnDataSource, HoverTool, WheelZoomTool, ResetTool, SaveTool, PanTool, DatetimeTickFormatter, Whisker
# from bokeh.models.widgets import DataTable, TableColumn, HTMLTemplateFormatter
from bokeh.plotting import figure, show, output_notebook

from bokeh.io import show
from bokeh.models import LinearColorMapper, BasicTicker, ColorBar
# from bokeh.models import BasicTicker, ColorBar, LinearColorMapper, PrintfTickFormatter
# from bokeh.plotting import figure
from bokeh.models.glyphs import Text, Rect

output_notebook()
custom_pallete = ['#00ccff','#00ffff', '#66ffcc', '#66ff99', '#66ff66']

In [2]:
def usage_to_color(uses, totals):
    assert(uses >= -1)

    if uses == -1:
        bg_color = 'white'
        text_col = 'black'
        return (bg_color,text_col)    
    
    usage = float(uses/totals)
    threshold = float(1/len(custom_pallete))
    for i in range(1, len(custom_pallete)+1):
        if usage < i * threshold:
            bg_color = custom_pallete[i]
            text_col = 'white'
            return (bg_color,text_col)
        else:
            continue

    return ('black','white')

In [3]:
orkg = ORKG(host='https://orkg.org/orkg', simcomp_host='https://orkg.org/orkg/simcomp')

df = orkg.contributions.compare_dataframe(contributions=['R38371','R38323','R38392','R38296','R38344','R38266'])

# cleanup empty properties
df = df.replace(r'^\s*$', np.nan, regex=True)
df = df.dropna()

papers = df.columns
properties = df.index

df.reset_index(inplace = True)
df.rename(columns = {'index':'properties'}, inplace = True)
df.set_index('properties', inplace=True)

# reshape to 1D array or rates with a month and year for each row.
df = pd.DataFrame(df.stack(), columns=['values']).reset_index()
df.rename(columns = {'level_1':'papers'}, inplace = True)

# generate some random coloring flags
coloring = {}
max_set = 3
num_set = 0
for p in properties:
    # forse coloring for the interesting ones
    if 'participant' in p or 'assay method' in p or 'detection instrument' in p:
        coloring[p] = True
        num_set +=1
        continue
    if num_set < max_set:
        rand_val = bool(random.getrandbits(1))
        coloring[p] = rand_val
        if rand_val:
            num_set +=1
    else:
        coloring[p] = False
colorings = pd.Series([coloring[p] for p in df.properties])

# generate some random uses, starting from -1 which is not counted towards totals
rand_uses = [random.randrange(-1, 8, 1) for _ in df.index]
uses = pd.Series(rand_uses)

columns = 3
df.insert(columns,'uses', uses)
columns += 1
df.insert(columns,'color', '#FFFFFF')
columns += 1
df.insert(columns,'txtcolor', '#000000')
columns += 1
df.insert(columns,'totals', 0)
columns += 1
df.insert(columns,'colorings', colorings)
columns += 1
df.insert(columns,'short_papers', '')

# the processings below need to be optimized, they are very hackish

# shorten the names, otherwise we have problems with the figure
short_papers = []
for i,p in enumerate(papers):
    df.loc[(df['papers'] == p),'short_papers']= '%s_%.12s' % (i,p)
    short_papers.append('%s_%.12s' % (i,p))

# count totals and update in df
for p in properties:
    totals = sum(u for u in df.loc[df['properties'] == p]['uses'] if u > -1)
    df.loc[(df['properties'] == p),'totals']= totals

# compute colors and update in df
for idx,row in df.iterrows():
    if row['colorings']:
        (bgcolor,txtcolor) = usage_to_color(row['uses'],row['totals'])
        df.loc[(df['values'] == row['values']),'color']= bgcolor
        df.loc[(df['values'] == row['values']),'txtcolor']= txtcolor
        
#construct figure
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"

p = figure(title="Comparison# ({0} - {1})".format(short_papers[0], short_papers[-1]),
           x_range=short_papers, y_range=list(reversed(properties)),
           x_axis_location="above", plot_width=1000, plot_height=500,
           tools=TOOLS, toolbar_location='below',
           tooltips=[('uses/totals', '@uses/@totals'), ('value', '@values'),  ('coloring', '@colorings')])

p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "7px"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = pi / 3

fontsize="5px"
source = ColumnDataSource(df)

glyph = Text(x="short_papers", y="properties", text="values", text_align='center', text_color="txtcolor",
            text_font="monospace",text_font_size=fontsize)
rects = Rect(x="short_papers", y="properties",  width=1, height=1, fill_color="color",
            line_color=None)

p.add_glyph(source, rects)
p.add_glyph(source, glyph)

color_mapper = LinearColorMapper(palette=custom_pallete, low=0, high=1)
color_bar = ColorBar(color_mapper=color_mapper, ticker= BasicTicker(),
                     location=(0,0))
p.add_layout(color_bar, 'right')

show(p)      # show the plot