In [29]:
from pathlib import Path
import pandas as pd

import holoviews as hv
from holoviews import opts, dim
hv.extension('bokeh')

In [78]:
nodes = ["Soil Data", "STATSGO2", "SSURGO", "Raster Soil Survey", "rvindicator: yes", "rvindicator: no", "rvindicator: NULL", "NITA Processed", "Accepted", "Discarded"]
#            0             1         2              3                     4                   5                     6                  7              8            9


edges_tot = [
    (0, 1, 592_103),   (0, 2, 7_704_916), (0, 3, 10_537), # Distribution of Different Sources
    (1, 4, 175_374),   (1, 5, 418_858),   (1, 6, 871), # STATSGO2 Distribution
    (2, 4, 3_538_803), (2, 5, 3_926_405), (2, 6, 239_708), # SSURGO Distribution
    (3, 4, 3_679),     (3, 5, 6_810),     (3, 6, 48), # RSS Distribution
    (4, 7, 3_717_856), # rvindicator = yes are NITA processed
    (7, 8, 3_580_123), (7, 9, 137_733), # NITA accepted and discarded
    (5, 9, 4_352_073), (6, 9, 240_627) # rvindicator no and NULL are automatically discarded
]

n_tot = 8307556
edges_per = [
    (0, 1, 592_103/n_tot*100),   (0, 2, 7_704_916/n_tot*100), (0, 3, 10_537/n_tot*100), # Distribution of Different Sources
    (1, 4, 175_374/n_tot*100),   (1, 5, 418_858/n_tot*100),   (1, 6, 871/n_tot*100), # STATSGO2 Distribution
    (2, 4, 3_538_803/n_tot*100), (2, 5, 3_926_405/n_tot*100), (2, 6, 239_708/n_tot*100), # SSURGO Distribution
    (3, 4, 3_679/n_tot*100),     (3, 5, 6_810/n_tot*100),     (3, 6, 48/n_tot*100), # RSS Distribution
    (4, 7, 3_717_856/n_tot*100), # rvindicator = yes are NITA processed
    (7, 8, 3_580_123/n_tot*100), (7, 9, 137_733/n_tot*100), # NITA accepted and discarded
    (5, 9, 4_352_073/n_tot*100), (6, 9, 240_627/n_tot*100) # rvindicator no and NULL are automatically discarded
]

def make_sankey_diagram(edges, nodes, dimension='Percentage', unit='%'):

    nodes = hv.Dataset(enumerate(nodes), 'index', 'label')

    def hook(plot, element):
        plot.handles['text_1_glyph'].text_font_size = '13pt'

    value_dim = hv.Dimension(dimension, unit=unit)
    soil_distribution = hv.Sankey((edges, nodes), ['From', 'To'], vdims=value_dim)

    soil_distribution.opts(
        opts.Sankey(labels='label', label_position='right', width=2000, height=800, cmap='Set1',
                    toolbar=None,
                    hooks=[hook],
                    edge_color=dim('To').str(), node_color=dim('index').str()))
    
    return soil_distribution

soil_distribution_per = make_sankey_diagram(edges_per, nodes, dimension='Percentage', unit='%')
soil_distribution_tot = make_sankey_diagram(edges_tot, nodes, dimension='Count', unit=None)

In [82]:
soil_distribution_per

In [83]:
soil_distribution_tot