In [1]:
from bokeh.plotting import figure, output_file, output_notebook, show
from bokeh.models import ColumnDataSource, LinearColorMapper
from bokeh.palettes import Reds
import re
import numpy as np
import pandas as pd

In [2]:
input_file = "journal_collections_data.log"
# output_notebook()
output_file("Verification_Heatmap2.html")

In [3]:
data = np.recarray((54069), 
                   formats = ["U50", "f8", "f8", "f8", "i4", "i4", "U7"], 
                   names = ("Title", "Num_Verified", "Num_Names", "Ratios", "x", "y", "color"))

In [4]:
with open(input_file, "r") as f:
    for i in range(54069):
        data[i][0] = f.readline().strip() # The title 
        data[i][1] = float(f.readline().strip().split()[-1]) # Number Verified 
        data[i][2] = float(f.readline().strip().split()[-1]) # Number of Names
        data[i][3] = float(f.readline().strip().split()[-1]) # Verification Ratio 
data.sort(kind = "stable", order = "Ratios") # Sort in order of increasing ratio
data.sort(kind = "stable", order = "Num_Names") # Sort in order of increasing number of names while maintaining sort stability

In [5]:
# Assigning x, y values
for i in range(0, 269): 
    for j in range(0, 201):
        data[201 * i + j][4] = i + 1
        data[201 * i + j][5] = j + 1 

In [6]:
# Assigning colors 
# Using the Reds palette from bokeh.palettes
# Cells with higher ratio values are a deeper red
black = "#000000"
for i in range(54069):
    if data[i][2] == 0: 
        data[i][6] = black
    else: 
        if data[i][3] == 0: 
            data[i][6] = Reds[8][7]
        elif data[i][3] > 0 and data[i][3] < 0.167: 
            data[i][6] = Reds[8][6]
        elif data[i][3] >= 0.167 and data[i][3] < 0.33: 
            data[i][6] = Reds[8][5]
        elif data[i][3] >= 0.33 and data[i][3] < 0.5: 
            data[i][6] = Reds[8][4]
        elif data[i][3] >= 0.5 and data[i][3] < 0.67: 
            data[i][6] = Reds[8][3]
        elif data[i][3] >= 0.67 and data[i][3] < 0.833: 
            data[i][6] = Reds[8][2]
        elif data[i][3] >= 0.833 and data[i][3] < 1:
            data[i][6] = Reds[8][1]
        else: # Ratio == 1
            data[i][6] = Reds[8][0]

In [7]:
df = pd.DataFrame(data) # Sadly we have to use a dataframe here because ColumnDataSource doesn't recognize record arrays
source = ColumnDataSource(df) 

In [8]:
TOOLS = "hover, save, pan, box_zoom, reset, wheel_zoom"
fig = figure(title = "Journals to Verification Ratio", 
             x_range = (0, 269), 
             y_range = (0, 201),
             plot_width = 1200,
             plot_height = 800,
             tools = TOOLS,
             toolbar_location = "below",
             tooltips = [("Journal", "@Title"), 
                         ("Number of Names", "@Num_Names"), 
                         ("Number of Verified Names", "@Num_Verified"),
                         ("Verification Ratio", "@Ratios")])

In [9]:
fig.rect(x = "x", 
         y = "y", 
         width = 1, 
         height = 1, 
         source = source,
         fill_color = "color", 
         line_color = "White",
         line_alpha = 0.25,
         line_width = 0.2)

In [10]:
show(fig)