In [5]:
from ipysankeywidget import SankeyWidget
from floweaver import *
import pandas as pd

In [6]:
flow_df = pd.DataFrame()
mutations = []
genomic_feats = []
genetic_feats = []
operons = []
regulons = []

In [7]:
flow_df = flow_df.append(
    {
        "source": "*3 regulon C",
        "target": "*3 operon C",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)
flow_df = flow_df.append(
    {
        "source": "*3 operon C",
        "target": "1 gene A",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)
flow_df = flow_df.append(
    {
        "source": "1 gene A",
        "target": "1 feature A",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)
flow_df = flow_df.append(
    {
        "source": "1 feature A",
        "target": "1 feature A DEL",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)

flow_df = flow_df.append(
    {
        "source": "*3 regulon C",
        "target": "*3 operon C",
        "type": "unknown",
        "value": 2.0
    },
    ignore_index=True)
flow_df = flow_df.append(
    {
        "source": "*3 operon C",
        "target": "2 gene B",
        "type": "unknown",
        "value": 2.0
    },
    ignore_index=True)
flow_df = flow_df.append(
    {
        "source": "2 gene B",
        "target": "*2 feature B",
        "type": "unknown",
        "value": 2.0
    },
    ignore_index=True)
flow_df = flow_df.append(
    {
        "source": "*2 feature B",
        "target": "*2 feature B SNP",
        "type": "unknown",
        "value": 2.0
    },
    ignore_index=True)

mutations.append("1 feature A DEL")
genomic_feats.append("1 feature A",)
genetic_feats.append("1 gene A",)
regulons.append("1 regulon A")

mutations.append("*2 feature B SNP")
genomic_feats.append("*2 feature B",)
genetic_feats.append("2 gene B",)
operons.append("*3 operon C")
regulons.append("*3 regulon C")

In [9]:
mutation_partition = Partition.Simple("process", mutations)
genomic_feat_partition = Partition.Simple("process", genomic_feats)
genetic_feat_partition = Partition.Simple("process", genetic_feats)
operon_partition = Partition.Simple("process", operons)
regulon_partition = Partition.Simple("process", regulons)

nodes = {
    "mutation": ProcessGroup(mutations, title="mutation"),
    "genomic feature": ProcessGroup(genomic_feats, title="genomic feature"),
    "genetic feature": ProcessGroup(genetic_feats, title="genetic feature"),
    "operon": ProcessGroup(operons, title="operon"),
    "regulon": ProcessGroup(regulons, title="regulon"),
}

nodes["mutation"].partition = mutation_partition
nodes["genomic feature"].partition = genomic_feat_partition
nodes["genetic feature"].partition = genetic_feat_partition
nodes["operon"].partition = operon_partition
nodes["regulon"].partition = regulon_partition

# The order that the labels is entered into the bundles is explicit. Don't arbitrarily give an order.
bundles = [
    Bundle(
        "regulon",
        "operon",
    ),
    Bundle(
        "operon",
        "genetic feature",
    ),
    Bundle(
        "genetic feature",
        "genomic feature",
    ),
    Bundle(
        "genomic feature",
        "mutation",
    ),
]

ordering = [
    ["regulon"],
    ["operon"],
    ["genetic feature"],
    ["genomic feature"],
    ["mutation"]
]


sdd = SankeyDefinition(nodes,
                       bundles,
                       ordering,
                      )

p_d = {"disrupt": "#6BAAD3", "unknown": "#6BAAD3", "repaired": "#6BAAD3"}

size = dict(width=1200, height=300)

results = weave(sdd,
                flow_df,
                palette=p_d,
                ).to_widget(
    margins=dict(left=400, right=350),
    **size
)
results.auto_save_svg("mutflow.svg")
results

SankeyWidget(groups=[{'id': 'regulon', 'type': 'process', 'title': 'regulon', 'nodes': ['regulon^*3 regulon C'…