In [1]:
import pandas as pd
import numpy as np

np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)

#import missingno as msno
import matplotlib.pyplot as plt
import seaborn as sns
from __future__ import division
from numpy.random import seed

pd.options.display.float_format = '{:,.2f}'.format
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
sns.set_style(style = 'whitegrid')
sns.set_context(context= 'notebook')
flatui = ['#1ABC9C','#16A085','#2ECC71','#27AE60','#3498DB','#2980B9','#9B59B6','#8E44AD',
          '#34495E','#2C3E50','#F1C40F','#F39C12','#E67E22','#D35400','#E74C3C','#C0392B',
          '#ECF0F1','#BDC3C7','#95A5A6','#7F8C8D']
sns.set_palette(flatui)
%config InlineBackend.figure_format = 'svg'
%matplotlib inline
from bokeh.io import output_notebook, show

from bokeh.plotting import figure, output_file, show, save
from bokeh.models import ColumnDataSource
from bokeh.layouts import column, row

from bokeh.models import LabelSet, Label, FactorRange
from bokeh.models.glyphs import VBar
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    LinearColorMapper,
    BasicTicker,
    PrintfTickFormatter,
    ColorBar,
    Div
)
from bokeh.models import Text,Label, Range1d
from bokeh.palettes import brewer
from bokeh.models.widgets import Panel, Tabs
from bokeh.models import NumeralTickFormatter
from bokeh.layouts import gridplot
from bokeh.palettes import Category20_20
from bokeh.core.properties import value
from bokeh.transform import CategoricalColorMapper

In [2]:
from bokeh.resources import CDN
from bokeh.embed import components

In [6]:
output_notebook()

In [5]:
import squarify
# import colorlover as cl
from bokeh.palettes import Paired12

In [5]:
df = pd.read_excel('/home/jdoz/Downloads/CVE_Top_3.xlsx', 'Sheet1')
df.head()

Unnamed: 0,Vendor,Vul_Type,Count
0,Microsoft,Denial of Service,1463
1,Microsoft,Execute Code,2567
2,Microsoft,Overflow,1495
3,Microsoft,Bypass Something,369
4,Microsoft,Gain Information,486


In [6]:
vendor_level = df.groupby('Vendor')['Count'].sum().reset_index()
vendor_level.sort_values('Count', ascending=False, inplace=True)
vendor_level

Unnamed: 0,Vendor,Count
1,Microsoft,8495
0,Apple,7954
2,Oracle,1153


In [7]:
# returns a list of rectangles
values = squarify.normalize_sizes(vendor_level['Count'].values, 15, 10)
rects = squarify.squarify(values, x = 0, y = 0, dx = 15, dy = 10)

colors = [Paired12[1],Paired12[3],Paired12[5]]
cmap = {x:y for x,y in zip(vendor_level['Vendor'].unique(), colors)}

data = pd.DataFrame(rects)

In [8]:
data['top'] = data['y'] + data['dy']
data['right'] = data['x'] + data['dx']
data['Vendor'] = vendor_level['Vendor'].unique()
data['color'] = data['Vendor'].map(cmap)
data = data.merge(vendor_level)

In [13]:
hover = HoverTool(tooltips=[
    ("Vendor", "@Vendor"),
    ("Count", "@Count"),
])

source = ColumnDataSource(data = data)

p = figure(tools = [hover], toolbar_location = None, plot_width = 900)
    
p.quad(bottom = 'y',
       right = 'right',
       top = 'top', 
       left = 'x', 
       color = 'color',
       line_color = 'white', 
       line_width = 2,
       source = source)
p.text(x = 'x', y = 'top', 
       x_offset = 10, y_offset = 30,
       text_font_size = '16pt',
       text='Vendor', source = source)

p.grid.visible = False
p.axis.visible = False
p.outline_line_color = None
show(p)
# script, div = components(p)
# print(script)
# print(div)

In [20]:
def squares(df,group,val, x_dim, y_dim):

    temp_grouped = df.groupby(group)[val].sum().reset_index()
    temp_grouped = temp_grouped.sort_values(val, ascending = False)

    values = squarify.normalize_sizes(temp_grouped[val].values, x_dim, -y_dim)
    rects = squarify.squarify(values,x = 0, y = 0, dx=x_dim, dy=-y_dim)
    rects_df = pd.DataFrame(data = rects)
    rects_df[group] = temp_grouped[group]
    
    rects_df[val] = temp_grouped[val]
    
    columns = ['dx','dy','x','y',str(group)]
    rf = pd.DataFrame(columns=columns)
    
    for index, row in rects_df.iterrows():
        g_df = df[df[group]== row[group]]
        g_df = g_df.sort_values(val,ascending = False)

        values = squarify.normalize_sizes(g_df[val].values, 
                                          row['dx'], row['dy'])
        
        rects = squarify.squarify(values,
                                  x = row['x'], y = row['y'], 
                                  dx = row['dx'], dy = row['dy'])
        rects_group_df = pd.DataFrame(data = rects)
        rects_group_df[group] = str(row[group])
        rects_group_df[val] = g_df[val].values
        rf = pd.concat([rf,rects_group_df])
    
    df = df.merge(rf, on = [group, val], how = 'left')
    df['bottom'] = df['y'] + df['dy']
    df['right'] = df['x'] + df['dx']
    return df

In [21]:
df = df.sort_values(['Vendor','Count'], ascending=False)

In [22]:
sq = squares(df, 'Vendor', 'Count', 10,10)

In [23]:
sq['Color'] = sq['Vendor'].map(cmap)
sq = sq.sort_values('Vendor', ascending=True)

In [25]:
source = ColumnDataSource(data = sq)
hover = HoverTool(tooltips=[
    ("Vendor", "@Vendor"),
    ("Count", "@Count"),
    ('Vulnerability','@Vul_Type')
])


p = figure(tools = [hover], toolbar_location = None, plot_width = 900)
    
p.quad(bottom = 'bottom',
       right = 'right',
       top = 'y', 
       left = 'x', 
       color = 'Color',
       line_color = 'white', 
       line_width = 2,
       source = source)

text_source = ColumnDataSource(data = sq.groupby('Vendor').agg({'x':min, 'y':max}).reset_index())

p.text(x = 'x', y = 'y', 
       x_offset = 5, y_offset = 10,
       text_font_size = '10pt',text_color = 'white',
       text_align = 'left', text_baseline = 'middle',
       text='Vendor', source = text_source)


title = Label(x = 5, y = 0, text = 'Vulnerabilities by Vendor', 
             text_align = 'center', text_font_size = '14pt', 
             text_font_style = 'bold')
p.add_layout(title)
p.grid.visible = False
p.axis.visible = False
p.min_border = 0
p.outline_line_color = None
show(p);
script, div = components(p)
print(script)
print(div)


<script type="text/javascript">
    (function() {
  var fn = function() {
    Bokeh.safely(function() {
      (function(root) {
        function embed_document(root) {
          var docs_json = {"33350ee5-52ce-456e-afe2-14403e3ef87f":{"roots":{"references":[{"attributes":{"formatter":{"id":"712fc0c1-ec69-4889-908e-4ca3090759d5","type":"BasicTickFormatter"},"plot":{"id":"be2f0644-f1d9-49d6-b2d0-7cc91ef1d606","subtype":"Figure","type":"Plot"},"ticker":{"id":"4ce38b2d-a2bd-43ae-9f7a-8d40a73b9c14","type":"BasicTicker"},"visible":false},"id":"efe6fc58-ada6-4775-83db-b0a016961185","type":"LinearAxis"},{"attributes":{"plot":{"id":"be2f0644-f1d9-49d6-b2d0-7cc91ef1d606","subtype":"Figure","type":"Plot"},"text":"Vulnerabilities by Vendor","text_align":"center","text_font_size":{"value":"14pt"},"text_font_style":"bold","x":5,"y":0},"id":"dcac20a9-4eef-4ecd-acac-8512b373edaa","type":"Label"},{"attributes":{"bottom":{"field":"bottom"},"fill_alpha":{"value":0.1},"fill_color":{"value":"#1f77b4"},"le