In [97]:
from ipysankeywidget import SankeyWidget
from floweaver import *
import pandas as pd

In [98]:
flow1_df = pd.DataFrame()

# muts to genomic features
flow1_df = flow1_df.append(
    {
        "source": "1 gene2",
        "target": "1 gene2 SNP",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)
flow1_df = flow1_df.append(
    {
        "source": "1 RBS2",
        "target": "1 RBS2 DEL",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)
flow1_df = flow1_df.append(
    {
        "source": "1 TF1 BS",
        "target": "1 TF1 BS INS",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)
flow1_df = flow1_df.append(
    {
        "source": "1 TF2 BS",
        "target": "1 TF2 BS INS",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)

flow1_df

Unnamed: 0,source,target,type,value
0,1 gene2,1 gene2 SNP,unknown,1.0
1,1 RBS2,1 RBS2 DEL,unknown,1.0
2,1 TF1 BS,1 TF1 BS INS,unknown,1.0
3,1 TF2 BS,1 TF2 BS INS,unknown,1.0


In [99]:
flow2_df = pd.DataFrame()

# muts to genetic features
flow2_df = flow2_df.append(
    {
        "source": "1 gene2",
        "target": "1 gene2",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)
flow2_df = flow2_df.append(
    {
        "target": "1 RBS2",
        "source": "1 gene1/gene2",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)
flow2_df = flow2_df.append(
    {
        "target": "1 TF1 BS",
        "source": "2 gene0/gene1",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)
flow2_df = flow2_df.append(
    {
        "target": "1 TF2 BS",
        "source": "2 gene0/gene1",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)

flow2_df

Unnamed: 0,source,target,type,value
0,1 gene2,1 gene2,unknown,1.0
1,1 gene1/gene2,1 RBS2,unknown,1.0
2,2 gene0/gene1,1 TF1 BS,unknown,1.0
3,2 gene0/gene1,1 TF2 BS,unknown,1.0


In [100]:
flow3_df = pd.DataFrame()

# muts to operon features
flow3_df = flow3_df.append(
    {
        "target": "1 gene2",
        "source": "*4 operon",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)
flow3_df = flow3_df.append(
    {
        "target": "1 gene1/gene2",
        "source": "*4 operon",
        "type": "unknown",
        "value": 1.0
    },
    ignore_index=True)
flow3_df = flow3_df.append(
    {
        "target": "2 gene0/gene1",
        "source": "*4 operon",
        "type": "unknown",
        "value": 2.0
    },
    ignore_index=True)

flow3_df

Unnamed: 0,source,target,type,value
0,*4 operon,1 gene2,unknown,1.0
1,*4 operon,1 gene1/gene2,unknown,1.0
2,*4 operon,2 gene0/gene1,unknown,2.0


In [101]:
flow4_df = pd.DataFrame()

# muts to regulon features
flow4_df = flow4_df.append(
    {
        "target": "*4 operon",
        "source": "*4 regulon",
        "type": "unknown",
        "value": 4.0
    },
    ignore_index=True)
flow4_df

Unnamed: 0,source,target,type,value
0,*4 regulon,*4 operon,unknown,4.0


In [102]:
flow_df = pd.DataFrame()
flow_df = flow_df.append(flow1_df)
flow_df = flow_df.append(flow2_df)
flow_df = flow_df.append(flow3_df)
flow_df = flow_df.append(flow4_df)
flow_df

Unnamed: 0,source,target,type,value
0,1 gene2,1 gene2 SNP,unknown,1.0
1,1 RBS2,1 RBS2 DEL,unknown,1.0
2,1 TF1 BS,1 TF1 BS INS,unknown,1.0
3,1 TF2 BS,1 TF2 BS INS,unknown,1.0
0,1 gene2,1 gene2,unknown,1.0
1,1 gene1/gene2,1 RBS2,unknown,1.0
2,2 gene0/gene1,1 TF1 BS,unknown,1.0
3,2 gene0/gene1,1 TF2 BS,unknown,1.0
0,*4 operon,1 gene2,unknown,1.0
1,*4 operon,1 gene1/gene2,unknown,1.0


In [103]:
mutations = ["1 gene2 SNP", "1 RBS2 DEL", "1 TF1 BS INS", "1 TF2 BS INS"]
genomic_feats = ["1 gene2", "1 RBS2", "1 TF1 BS", "1 TF2 BS"]
genetic_feats = ["1 gene2", "1 gene1/gene2", "2 gene0/gene1"]
operons = ["*4 operon"]
regulons = ["*4 regulon"]

In [106]:
mutation_partition = Partition.Simple("process", mutations)
genomic_feat_partition = Partition.Simple("process", genomic_feats)
genetic_feat_partition = Partition.Simple("process", genetic_feats)
operon_partition = Partition.Simple("process", operons)
regulon_partition = Partition.Simple("process", regulons)

nodes = {
    "mutation": ProcessGroup(mutations, title="mutation"),
    "genomic feature": ProcessGroup(genomic_feats, title="genomic feature"),
    "genetic feature": ProcessGroup(genetic_feats, title="genetic feature"),
    "operon": ProcessGroup(operons, title="operon"),
    "regulon": ProcessGroup(regulons, title="regulon"),
}

nodes["mutation"].partition = mutation_partition
nodes["genomic feature"].partition = genomic_feat_partition
nodes["genetic feature"].partition = genetic_feat_partition
nodes["operon"].partition = operon_partition
nodes["regulon"].partition = regulon_partition

# The order that the labels is entered into the bundles is explicit. Don't arbitrarily give an order.
bundles = [
    Bundle(
        "regulon",
        "operon",
    ),
    Bundle(
        "operon",
        "genetic feature",
    ),
    Bundle(
        "genetic feature",
        "genomic feature",
    ),
    Bundle(
        "genomic feature",
        "mutation",
    ),
]

ordering = [
    ["regulon"],
    ["operon"],
    ["genetic feature"],
    ["genomic feature"],
    ["mutation"]
]


sdd = SankeyDefinition(nodes,
                       bundles,
                       ordering,
                      )

p_d = {"disrupt": "#6BAAD3", "unknown": "#6BAAD3", "repaired": "#6BAAD3"}

size = dict(width=1200, height=150)

results = weave(sdd,
                flow_df,
                palette=p_d,
                ).to_widget(
    margins=dict(left=400, right=350),
    **size
)
results.auto_save_svg("mutflow.svg")
results

SankeyWidget(groups=[{'id': 'regulon', 'type': 'process', 'title': 'regulon', 'nodes': ['regulon^*4 regulon']}…