Run this within the SAMap docker container

In [None]:
! pip install plotly==5.8.0
! pip install nodejs
! pip install ipywidgets>=7.6

In [None]:
! conda install -y -c conda-forge python-kaleido

In [None]:
import plotly.graph_objects as go
import pandas as pd 
import plotly.express as px
import numpy as np

# Cluster level mappings

In [None]:
# Read in the mapping scores
R = pd.read_csv( 'Metadata/GroupFigure_SAMap_scores.csv', index_col=0 )
R = R.unstack().reset_index(drop=False)
R.columns = ['source','target','Value']
R['source_sp'] = [ s.split('_')[0] for s in R.source ]
R['target_sp'] = [ t.split('_')[0] for t in R.target ]
# Cut the mappings that are below 0.1
thresh = 0.1
indThresh = R.where( R.Value>thresh ).dropna().index.values
# Also keeping edges that represent the max mapping
# score for each cluster just so we plot everything
# these low scores are manually trimmed out in Illustrator
indSourceMax = R.groupby(['source','target_sp']).apply(lambda x:x.Value.idxmax()).values
indTargetMax = R.groupby(['target','source_sp']).apply(lambda x:x.Value.idxmax()).values
keep = set(indThresh) | set(indSourceMax) | set(indTargetMax)
R = R.iloc[R.index.isin(keep),:].drop(['source_sp','target_sp'],axis=1)
R

In [None]:
# Remove this one tiny cluster that's not well annotated
R = R.loc[~R.source.str.contains('\?\?\?-1')&\
          ~R.target.str.contains('\?\?\?-1'),:].reset_index(drop=True)
# Put species in the order we want
R = R.loc[ ((R.source.str.startswith('Ml')&R.target.str.startswith('Pl'))|
            (R.source.str.startswith('Pl')&R.target.str.startswith('Sm'))|
            (R.source.str.startswith('Sm')&R.target.str.startswith('Ml'))), : ]\
     .reset_index(drop=True)
# Rename the Mlig clusters on the right so we can plot separately
R.loc[:,'target'] = R.target.str.replace( 'Ml', 'Ml2' )
# Add a little bit to the ones that aren't connected so they plot
# (again, removed in Illustrator)
R.loc[R.Value==0,'Value'] = 1e-4
R

In [1]:
# Print out all of the edges we'll need to remove later
R[R.Value<=thresh]

In [None]:
# Order the clusters for plotting
label_1 = [ 'Ml_Cathepsin', 'Ml_Intestine-1', 'Ml_Intestine-2', 'Ml_GSC',  
            'Ml_Female Germline', 'Ml_Male Germline', 'Ml_Neoblast',  
            'Ml_Neural Progenitors', 'Ml_Neural-1', 'Ml_Neural-2', 'Ml_Neural-3', 
            'Ml_Neural-4', 'Ml_Neural-POU4', 'Ml_Muscle', 'Ml_Epidermal Progenitors',    
            'Ml_Epidermal-1', 'Ml_Epidermal-2', 'Ml_Parenchymal-1', 'Ml_Parenchymal-2',   
            'Ml_Parenchymal-3', 'Ml_Parenchymal-4', 'Ml_Parenchymal-5', 'Ml_Protonephridia',
            'Ml_???-2' ]
label_2 = [ 'Pl_Cathepsin', 'Pl_Intestine-1', 'Pl_Intestine-2', 'Pl_Ophis', 
            'Pl_GSC', 'Pl_GSC progeny/diff germline', 'Pl_Neoblast-1', 
            'Pl_Neoblast-2', 'Pl_Neural Progenitors', 'Pl_Neural-1', 'Pl_Neural-2', 
            'Pl_Neural-POU4', 'Pl_Muscle-1', 'Pl_Muscle-2', 'Pl_Muscle-3', 
            'Pl_Epidermal-1', 'Pl_Epidermal-2', 'Pl_Pharynx', 'Pl_Parenchymal', 
            'Pl_Protonephridia' ]
label_3 = [ 'Sm_Cathepsin', 'Sm_Intestine', 'Sm_S1', 'Sm_GSC', 'Sm_Neoblast',
            'Sm_Neural Progenitors', 'Sm_Neural-1', 'Sm_Neural-2', 'Sm_Neural-POU4', 
            'Sm_Neural-3', 'Sm_Neural-4', 'Sm_Neural-5', 'Sm_Muscle Progenitors',  
            'Sm_Muscle-1', 'Sm_Muscle-2', 'Sm_Muscle-3', 'Sm_Tegument Progenitors', 
            'Sm_Tegument-1', 'Sm_Tegument-2', 'Sm_Vitellocytes', 'Sm_Oesophageal Gland',
            'Sm_Protonephridia']
label_4 = [ 'Ml2_Cathepsin', 'Ml2_Intestine-1', 'Ml2_Intestine-2', 'Ml2_GSC',  
            'Ml2_Female Germline', 'Ml2_Male Germline', 'Ml2_Neoblast',  
            'Ml2_Neural Progenitors', 'Ml2_Neural-1', 'Ml2_Neural-2', 'Ml2_Neural-3', 
            'Ml2_Neural-4', 'Ml2_Neural-POU4', 'Ml2_Muscle', 'Ml2_Epidermal Progenitors',    
            'Ml2_Epidermal-1', 'Ml2_Epidermal-2', 'Ml2_Parenchymal-1', 'Ml2_Parenchymal-2',   
            'Ml2_Parenchymal-3', 'Ml2_Parenchymal-4', 'Ml2_Parenchymal-5', 'Ml2_Protonephridia',
            'Ml2_???-2' ]

In [None]:
R12=R[R['source'].isin(label_1)].reset_index(drop=True)
R23=R[R['source'].isin(label_2)].reset_index(drop=True)
R34=R[R['source'].isin(label_3)].reset_index(drop=True)

In [None]:
R12['source_idx'] = R12.apply(lambda x: label_1.index(x.source), axis=1)
R12['target_idx'] = R12.apply(lambda x: label_2.index(x.target), axis=1)

R23['source_idx'] = R23.apply(lambda x: label_2.index(x.source), axis=1)
R23['target_idx'] = R23.apply(lambda x: label_3.index(x.target), axis=1)

R34['source_idx'] = R34.apply(lambda x: label_3.index(x.source), axis=1)
R34['target_idx'] = R34.apply(lambda x: label_4.index(x.target), axis=1)

In [None]:
node_gap=0.005
node_width_cof = 0.05

In [None]:
x1 = [0.05]*len(label_1)
x2 = [0.35]*len(label_2)
x3 = [0.65]*len(label_3)
x4 = [0.95]*len(label_4)

In [None]:
width1 = []
for source in label_1:
    width1.append(np.sum(R12[R12['source']==source]['Value'])*node_width_cof)
    
width2 = []
for target in label_2:
    left_value = np.sum(R12[R12['target']==target]['Value'])
    right_value = np.sum(R23[R23['source']==target]['Value'])
    width2.append(max(left_value, right_value)*node_width_cof)
    
width3 = []
for target in label_3:
    left_value = np.sum(R23[R23['target']==target]['Value'])
    right_value = np.sum(R34[R34['source']==target]['Value'])
    width3.append(max(left_value, right_value)*node_width_cof)
    
width4 = []
for target in label_4:
    width4.append(np.sum(R34[R34['target']==target]['Value'])*node_width_cof)

In [None]:
y1 = []
for i in range(len(width1)):
    if i==0:
        y1.append(width1[0]/2)
    else:
        y1.append(np.sum(width1[:i]) + i*node_gap + width1[i]/2)

y2 = []
for i in range(len(width2)):
    if i==0:
        y2.append(width2[0]/2)
    else:
        y2.append(np.sum(width2[:i] ) + i*node_gap + width2[i]/2)

y3 = []
for i in range(len(width3)):
    if i==0:
        y3.append(width3[0]/2)
    else:
        y3.append(np.sum(width3[:i] ) + i*node_gap + width3[i]/2)

y4 = []
for i in range(len(width4)):
    if i==0:
        y4.append(width4[0]/2)
    else:
        y4.append(np.sum(width4[:i] ) + i*node_gap + width4[i]/2)

In [None]:
source12 = R12['source_idx']
source23 = R23['source_idx']+len(label_1)
source34 = R34['source_idx']+len(label_1)+len(label_2)

target12 = R12['target_idx']+len(label_1)
target23 = R23['target_idx']+len(label_1)+len(label_2)
target34 = R34['target_idx']+len(label_1)+len(label_2)+len(label_3)

values12 = R12['Value']
values23 = R23['Value']
values34 = R34['Value']

In [None]:
tissues = [ 'Cathepsin', 'Protonephridia', 'Intestine', 'Germline', 'Neoblast',
            'Neural', 'Muscle', 'Epidermal', 'Pharynx', 'Parenchymal', 'Other' ]

cmap = matplotlib.cm.get_cmap('tab20')
tcmap = { l: matplotlib.colors.to_hex( cmap(i/(len(tissues)-1)) ) \
             for i, l in enumerate(tissues) }
tcmap

gtmap = { g: g.split('_')[1].split('-')[0] for g in set(R.source) | set(R.target) }
gtmap['Sm_Muscle Progenitors'] = 'Muscle'
gtmap['Sm_Neural Progenitors'] = 'Neural'
gtmap['Ml_Neural Progenitors'] = 'Neural'
gtmap['Pl_Neural Progenitors'] = 'Neural'
gtmap['Ml2_Neural Progenitors'] = 'Neural'
gtmap['Ml_Female Germline'] = 'Germline'
gtmap['Ml2_Female Germline'] = 'Germline'
gtmap['Ml_Male Germline'] = 'Germline'
gtmap['Ml2_Male Germline'] = 'Germline'
gtmap['Ml_GSC'] = 'Germline'
gtmap['Ml2_GSC'] = 'Germline'
gtmap['Sm_GSC'] = 'Germline'
gtmap['Sm_S1'] = 'Neoblast'
gtmap['Sm_Vitellocytes'] = 'Other'
gtmap['Pl_GSC'] = 'Germline'
gtmap['Pl_Ophis'] = 'Other'
gtmap['Pl_GSC progeny/diff germline'] = 'Germline'
gtmap['Sm_Tegument Progenitors'] = 'Epidermal'
gtmap['Sm_Tegument-1'] = 'Epidermal'
gtmap['Sm_Tegument-2'] = 'Epidermal'
gtmap['Ml_Epidermal Progenitors'] = 'Epidermal'
gtmap['Ml2_Epidermal Progenitors'] = 'Epidermal'
gtmap['Ml_???-2'] = 'Epidermal'
gtmap['Ml2_???-2'] = 'Epidermal'
gtmap['Sm_Oesophageal Gland'] = 'Other'
gtmap['Sm_Cathepsin'] = 'Cathepsin'
gtmap['Ml_Cathepsin'] = 'Cathepsin'
gtmap['Ml2_Cathepsin'] = 'Cathepsin'
gtmap['Pl_Cathepsin'] = 'Cathepsin'

gcmap = { g: tcmap[gtmap[g]] for g in gtmap }
gcmap

In [None]:
color1 = [gcmap[tissue] for tissue in label_1]
color2 = [gcmap[tissue] for tissue in label_2]
color3 = [gcmap[tissue] for tissue in label_3]
color4 = [gcmap[tissue] for tissue in label_4]

In [None]:
plot = go.Figure(go.Sankey(arrangement='snap',
                           node = {
                               "label": ['<b>'+x.split('_')[1]+'</b>' for x in label_1+label_2+label_3+label_4],
                               "x": x1+x2+x3+x4,
                               "y": y1+y2+y3+y4,
                               'pad':10,
                               'color':color1+color2+color3+color4},
                           link = {
                               "source": pd.concat([source12, source23, source34]).values,
                               "target": pd.concat([target12, target23, target34]).values,
                               "value": pd.concat([values12, values23, values34]).values}))

plot.update_layout(width=1000, height=500, font_size=12, font = dict(family='Arial', color='black'))
plot.write_image( 'Plots/FigS2/PanelS2c.svg' )
plot.show()

# Family level mappings

In [None]:
R = pd.read_csv( 'Metadata/Family_SAMap_scores.csv', index_col=0 )
R = R.unstack().reset_index(drop=False)
R.columns = ['Source','Target','Value']
R = R.loc[R.Value>=0.3,:]
R = R.loc[(R.Source.str.startswith('Ml')&R.Target.str.startswith('Pl'))|\
          (R.Source.str.startswith('Pl')&R.Target.str.startswith('Sm'))|\
          (R.Source.str.startswith('Sm')&R.Target.str.startswith('Ml')),:].reset_index(drop=True)

R

In [None]:
R.loc[R.Target.str.startswith('Ml'),'Target'] = R.Target[R.Target.str.startswith('Ml')]\
                                                         .str.replace('Ml','Ml2')
R

In [None]:
# Add back in the protonephridia
toAdd = pd.DataFrame({ 'Source':['Ml_Protonephridia','Pl_Protonephridia','Sm_Protonephridia'],
                       'Target':['Pl_Protonephridia','Sm_Protonephridia','Ml2_Protonephridia'],
                       'Value':[0.1,0.1,0.1] })
R = R.append( toAdd, ignore_index=True )

In [None]:
R

In [None]:
R.Source.unique()

In [None]:
label_1 = ['Ml_Cathepsin', 'Ml_Intestine', 'Ml_Germline', 'Ml_Neoblast', 'Ml_Neural' , 'Ml_Muscle',  
           'Ml_Epidermal', 'Ml_Parenchymal', 'Ml_Protonephridia']
label_2 = ['Pl_Cathepsin', 'Pl_Intestine', 'Pl_Germline', 'Pl_Neoblast', 'Pl_Neural', 'Pl_Muscle', 
           'Pl_Epidermal', 'Pl_Parenchymal', 'Pl_Protonephridia']
label_3 = ['Sm_Cathepsin', 'Sm_Intestine', 'Sm_Germline', 'Sm_Neoblast', 'Sm_Neural', 'Sm_Muscle',
           'Sm_Epidermal', 'Sm_Protonephridia']
label_4 = ['Ml2_Cathepsin', 'Ml2_Intestine', 'Ml2_Neoblast', 'Ml2_Neural' , 'Ml2_Muscle',  
           'Ml2_Epidermal', 'Ml2_Protonephridia']

In [None]:
R12=R[R['Source'].isin(label_1)].reset_index(drop=True)
R23=R[R['Source'].isin(label_2)].reset_index(drop=True)
R34=R[R['Source'].isin(label_3)].reset_index(drop=True)

In [None]:
R12['source_idx'] = R12.apply(lambda x: label_1.index(x.Source), axis=1)
R12['target_idx'] = R12.apply(lambda x: label_2.index(x.Target), axis=1)

R23['source_idx'] = R23.apply(lambda x: label_2.index(x.Source), axis=1)
R23['target_idx'] = R23.apply(lambda x: label_3.index(x.Target), axis=1)

R34['source_idx'] = R34.apply(lambda x: label_3.index(x.Source), axis=1)
R34['target_idx'] = R34.apply(lambda x: label_4.index(x.Target), axis=1)

In [None]:
node_gap=0.005
node_width_cof = 0.05

In [None]:
x1 = [0.05]*len(label_1)
x2 = [0.35]*len(label_2)
x3 = [0.65]*len(label_3)
x4 = [0.95]*len(label_4)

In [None]:
width1 = []
for source in label_1:
    width1.append(np.sum(R12[R12['Source']==source]['Value'])*node_width_cof)
    
width2 = []
for target in label_2:
    left_value = np.sum(R12[R12['Target']==target]['Value'])
    right_value = np.sum(R23[R23['Source']==target]['Value'])
    width2.append(max(left_value, right_value)*node_width_cof)
    
width3 = []
for target in label_3:
    left_value = np.sum(R23[R23['Target']==target]['Value'])
    right_value = np.sum(R34[R34['Source']==target]['Value'])
    width3.append(max(left_value, right_value)*node_width_cof)
    
width4 = []
for target in label_4:
    width4.append(np.sum(R34[R34['Target']==target]['Value'])*node_width_cof)

In [None]:
y1 = []
for i in range(len(width1)):
    if i==0:
        y1.append(width1[0]/2)
    else:
        y1.append(np.sum(width1[:i]) + i*node_gap + width1[i]/2)

y2 = []
for i in range(len(width2)):
    if i==0:
        y2.append(width2[0]/2)
    else:
        y2.append(np.sum(width2[:i] ) + i*node_gap + width2[i]/2)

y3 = []
for i in range(len(width3)):
    if i==0:
        y3.append(width3[0]/2)
    else:
        y3.append(np.sum(width3[:i] ) + i*node_gap + width3[i]/2)

y4 = []
for i in range(len(width4)):
    if i==0:
        y4.append(width4[0]/2)
    else:
        y4.append(np.sum(width4[:i] ) + i*node_gap + width4[i]/2)

In [None]:
source12 = R12['source_idx']
source23 = R23['source_idx']+len(label_1)
source34 = R34['source_idx']+len(label_1)+len(label_2)

target12 = R12['target_idx']+len(label_1)
target23 = R23['target_idx']+len(label_1)+len(label_2)
target34 = R34['target_idx']+len(label_1)+len(label_2)+len(label_3)

values12 = R12['Value']
values23 = R23['Value']
values34 = R34['Value']

In [None]:
cmap = {'Cathepsin': '#1A70A4',
        'Epidermal': '#F07000',
        'Germline': '#188015',
        'Intestine': '#D62728',
        'Muscle': '#8050B0',
        'Neoblast': '#8C564B',
        'Neural': '#E070C0',
        'Parenchymal': '#BCBD22',
        'Protonephridia': '#F07000'}

color1 = [cmap[tissue] for tissue in [x.split('_')[1] for x in label_1]]
color2 = [cmap[tissue] for tissue in [x.split('_')[1] for x in label_2]]
color3 = [cmap[tissue] for tissue in [x.split('_')[1] for x in label_3]]
color4 = [cmap[tissue] for tissue in [x.split('_')[1] for x in label_4]]

In [None]:
plot = go.Figure(go.Sankey(arrangement='snap',
                           node = {
                               "label":label_1+label_2+label_3+label_4,
                               "x": x1+x2+x3+x4,
                               "y": y1+y2+y3+y4,
                               'pad': 15,
                               'thickness': 20,
                               'line': dict(color = "black", width = 1.0),
                               'color':color1+color2+color3+color4},
                           link = {
                               "source": pd.concat([source12, source23, source34]).values,
                               "target": pd.concat([target12, target23, target34]).values,
                               "value": pd.concat([values12, values23, values34]).values}))

plot.update_layout(width=750, height=550, font_size=10, font = dict(family='Arial', color='black'))
plot.write_image("Plots/Fig1/Panel1e.svg", format="svg")

plot.show()

# Neural mappings

In [None]:
R = pd.read_csv( 'Metadata/Neural_SAMap_scores.csv', index_col=0 )
R = R.unstack().reset_index(drop=False)
R.columns = ['Source','Target','Value']
R = R.loc[R.Value>=0.2,:]
R = R.loc[(R.Source.str.startswith('Ml')&R.Target.str.startswith('Pl'))|\
          (R.Source.str.startswith('Pl')&R.Target.str.startswith('Sm'))|\
          (R.Source.str.startswith('Sm')&R.Target.str.startswith('Ml')),:].reset_index(drop=True)

R

In [None]:
R.loc[R.Target.str.startswith('Ml'),'Target'] = R.Target[R.Target.str.startswith('Ml')]\
                                                         .str.replace('Ml','Ml2')
R

In [None]:
label_1 = ['Ml_0', 'Ml_1', 'Ml_2', 'Ml_4', 'Ml_5' , 'Ml_6','Ml_8', 'Ml_9', 'Ml_11']
label_2 = ['Pl_0', 'Pl_1', 'Pl_2', 'Pl_3', 'Pl_4', 'Pl_5','Pl_6', 'Pl_7', 'Pl_8']
label_3 = ['Sm_0', 'Sm_2', 'Sm_3', 'Sm_4', 'Sm_5', 'Sm_6', 'Sm_7', 'Sm_8','Sm_9', 'Sm_10',
           'Sm_12', 'Sm_13', 'Sm_14', 'Sm_15', 'Sm_16', 'Sm_17']
label_4 = ['Ml2_0', 'Ml2_1', 'Ml2_2', 'Ml2_3', 'Ml2_4', 'Ml2_5', 'Ml2_6', 'Ml2_8', 'Ml2_9', 'Ml2_11']

In [None]:
R12=R[R['Source'].isin(label_1)].reset_index(drop=True)
R23=R[R['Source'].isin(label_2)].reset_index(drop=True)
R34=R[R['Source'].isin(label_3)].reset_index(drop=True)

In [None]:
R12['source_idx'] = R12.apply(lambda x: label_1.index(x.Source), axis=1)
R12['target_idx'] = R12.apply(lambda x: label_2.index(x.Target), axis=1)

R23['source_idx'] = R23.apply(lambda x: label_2.index(x.Source), axis=1)
R23['target_idx'] = R23.apply(lambda x: label_3.index(x.Target), axis=1)

R34['source_idx'] = R34.apply(lambda x: label_3.index(x.Source), axis=1)
R34['target_idx'] = R34.apply(lambda x: label_4.index(x.Target), axis=1)

In [None]:
node_gap=0.005
node_width_cof = 0.05

In [None]:
x1 = [0.05]*len(label_1)
x2 = [0.35]*len(label_2)
x3 = [0.65]*len(label_3)
x4 = [0.95]*len(label_4)

In [None]:
width1 = []
for source in label_1:
    width1.append(np.sum(R12[R12['Source']==source]['Value'])*node_width_cof)
    
width2 = []
for target in label_2:
    left_value = np.sum(R12[R12['Target']==target]['Value'])
    right_value = np.sum(R23[R23['Source']==target]['Value'])
    width2.append(max(left_value, right_value)*node_width_cof)
    
width3 = []
for target in label_3:
    left_value = np.sum(R23[R23['Target']==target]['Value'])
    right_value = np.sum(R34[R34['Source']==target]['Value'])
    width3.append(max(left_value, right_value)*node_width_cof)
    
width4 = []
for target in label_4:
    width4.append(np.sum(R34[R34['Target']==target]['Value'])*node_width_cof)

In [None]:
y1 = []
for i in range(len(width1)):
    if i==0:
        y1.append(width1[0]/2)
    else:
        y1.append(np.sum(width1[:i]) + i*node_gap + width1[i]/2)

y2 = []
for i in range(len(width2)):
    if i==0:
        y2.append(width2[0]/2)
    else:
        y2.append(np.sum(width2[:i] ) + i*node_gap + width2[i]/2)

y3 = []
for i in range(len(width3)):
    if i==0:
        y3.append(width3[0]/2)
    else:
        y3.append(np.sum(width3[:i] ) + i*node_gap + width3[i]/2)

y4 = []
for i in range(len(width4)):
    if i==0:
        y4.append(width4[0]/2)
    else:
        y4.append(np.sum(width4[:i] ) + i*node_gap + width4[i]/2)

In [None]:
cmap = {'0': '#1f77b4',
        '1': '#aec7e8',
        '2': '#ff7f0e',
        '3': '#ffbb78',
        '4': '#2ca02c',
        '5': '#98df8a',
        '6': '#d62728',
        '7': '#ff9896',
        '8': '#9467bd',
        '9': '#c5b0d5',
        '10': '#8c564b',
        '11': '#c49c94',
        '12': '#e377c2',
        '13': '#f7b6d2',
        '14': '#7f7f7f',
        '15': '#c7c7c7',
        '16': '#bcbd22',
        '17': '#dbdb8d'
         }

color1 = [cmap[tissue] for tissue in [x.split('_')[1] for x in label_1]]
color2 = [cmap[tissue] for tissue in [x.split('_')[1] for x in label_2]]
color3 = [cmap[tissue] for tissue in [x.split('_')[1] for x in label_3]]
color4 = [cmap[tissue] for tissue in [x.split('_')[1] for x in label_4]]

In [None]:
plot = go.Figure(go.Sankey(arrangement='snap',
                           node = {
                               "label":  label_1+label_2+label_3+label_4,
                               "x": x1+x2+x3+x4,
                               "y": y1+y2+y3+y4,
                               'pad': 15,
                               'thickness': 20,
                               'line': dict(color = "black", width = 1.0),
                               'color':color1+color2+color3+color4},
                           link = {
                               "source": pd.concat([source12, source23, source34]).values,
                               "target": pd.concat([target12, target23, target34]).values,
                               "value": pd.concat([values12, values23, values34]).values}))

plot.update_layout(width=850, height=550, font_size=10, font = dict(family='Arial', color='black'))
plot.write_image("Plots/Fig1/Panel1f.svg", format="svg")
plot.show()

# Muscle mappings

In [None]:
R = pd.read_csv( 'Metadata/Muscle_SAMap_scores.csv', index_col=0 )
R = R.unstack().reset_index(drop=False)
R.columns = ['Source','Target','Value']
R = R.loc[R.Value>=0.2,:]
R = R.loc[(R.Source.str.startswith('Ml')&R.Target.str.startswith('Pl'))|\
          (R.Source.str.startswith('Pl')&R.Target.str.startswith('Sm'))|\
          (R.Source.str.startswith('Sm')&R.Target.str.startswith('Ml')),:].reset_index(drop=True)

R

In [None]:
R.loc[R.Target.str.startswith('Ml'),'Target'] = R.Target[R.Target.str.startswith('Ml')]\
                                                         .str.replace('Ml','Ml2')
R

In [None]:
# Labels correspond to the leiden clusters from the muscle_subclustering notebook
# We re-label them in the illustrator just to go from 0 to N
label_1 = ['Ml_0', 'Ml_2', 'Ml_3', 'Ml_5' , 'Ml_8','Ml_1']
label_2 = ['Pl_0', 'Pl_1', 'Pl_2', 'Pl_4', 'Pl_7', 'Pl_3','Pl_6']
label_3 = ['Sm_2', 'Sm_0', 'Sm_3', 'Sm_6', 'Sm_10', 'Sm_5', 'Sm_4', 'Sm_8' ]
label_4 = ['Ml2_2','Ml2_3', 'Ml2_8', 'Ml2_6', 'Ml2_9']

In [None]:
R12=R[R['Source'].isin(label_1)].reset_index(drop=True)
R23=R[R['Source'].isin(label_2)].reset_index(drop=True)
R34=R[R['Source'].isin(label_3)].reset_index(drop=True)

In [None]:
R12['source_idx'] = R12.apply(lambda x: label_1.index(x.Source), axis=1)
R12['target_idx'] = R12.apply(lambda x: label_2.index(x.Target), axis=1)

R23['source_idx'] = R23.apply(lambda x: label_2.index(x.Source), axis=1)
R23['target_idx'] = R23.apply(lambda x: label_3.index(x.Target), axis=1)

R34['source_idx'] = R34.apply(lambda x: label_3.index(x.Source), axis=1)
R34['target_idx'] = R34.apply(lambda x: label_4.index(x.Target), axis=1)

In [None]:
node_gap=0.005
node_width_cof = 0.05

In [None]:
x1 = [0.05]*len(label_1)
x2 = [0.35]*len(label_2)
x3 = [0.65]*len(label_3)
x4 = [0.95]*len(label_4)

In [None]:
width1 = []
for source in label_1:
    width1.append(np.sum(R12[R12['Source']==source]['Value'])*node_width_cof)
    
width2 = []
for target in label_2:
    left_value = np.sum(R12[R12['Target']==target]['Value'])
    right_value = np.sum(R23[R23['Source']==target]['Value'])
    width2.append(max(left_value, right_value)*node_width_cof)
    
width3 = []
for target in label_3:
    left_value = np.sum(R23[R23['Target']==target]['Value'])
    right_value = np.sum(R34[R34['Source']==target]['Value'])
    width3.append(max(left_value, right_value)*node_width_cof)
    
width4 = []
for target in label_4:
    width4.append(np.sum(R34[R34['Target']==target]['Value'])*node_width_cof)

In [None]:
y1 = []
for i in range(len(width1)):
    if i==0:
        y1.append(width1[0]/2)
    else:
        y1.append(np.sum(width1[:i]) + i*node_gap + width1[i]/2)

y2 = []
for i in range(len(width2)):
    if i==0:
        y2.append(width2[0]/2)
    else:
        y2.append(np.sum(width2[:i] ) + i*node_gap + width2[i]/2)

y3 = []
for i in range(len(width3)):
    if i==0:
        y3.append(width3[0]/2)
    else:
        y3.append(np.sum(width3[:i] ) + i*node_gap + width3[i]/2)

y4 = []
for i in range(len(width4)):
    if i==0:
        y4.append(width4[0]/2)
    else:
        y4.append(np.sum(width4[:i] ) + i*node_gap + width4[i]/2)

In [None]:
source12 = R12['source_idx']
source23 = R23['source_idx']+len(label_1)
source34 = R34['source_idx']+len(label_1)+len(label_2)

target12 = R12['target_idx']+len(label_1)
target23 = R23['target_idx']+len(label_1)+len(label_2)
target34 = R34['target_idx']+len(label_1)+len(label_2)+len(label_3)

values12 = R12['Value']
values23 = R23['Value']
values34 = R34['Value']

In [None]:
cmap = {'0': '#1f77b4',
        '1': '#aec7e8',
        '2': '#ff7f0e',
        '3': '#ffbb78',
        '4': '#2ca02c',
        '5': '#98df8a',
        '6': '#d62728',
        '7': '#ff9896',
        '8': '#9467bd',
        '9': '#c5b0d5',
        '10': '#8c564b',
        '11': '#c49c94',
        '12': '#e377c2',
        '13': '#f7b6d2',
        '14': '#7f7f7f',
        '15': '#c7c7c7',
        '16': '#bcbd22',
        '17': '#dbdb8d'
         }

color1 = [cmap[tissue] for tissue in [x.split('_')[1] for x in label_1]]
color2 = [cmap[tissue] for tissue in [x.split('_')[1] for x in label_2]]
color3 = [cmap[tissue] for tissue in [x.split('_')[1] for x in label_3]]
color4 = [cmap[tissue] for tissue in [x.split('_')[1] for x in label_4]]

In [None]:
plot = go.Figure(go.Sankey(arrangement='snap',
                           node = {
                               "label":  label_1+label_2+label_3+label_4,
                               "x": x1+x2+x3+x4,
                               "y": y1+y2+y3+y4,
                               'pad': 15,
                               'thickness': 20,
                               'line': dict(color = "black", width = 1.0),
                               'color':color1+color2+color3+color4},
                           link = {
                               "source": pd.concat([source12, source23, source34]).values,
                               "target": pd.concat([target12, target23, target34]).values,
                               "value": pd.concat([values12, values23, values34]).values}))

plot.update_layout(width=850, height=550, font_size=10, font = dict(family='Arial', color='black'))
plot.write_image("Plots/FigS2/PanelS2d.svg", format="svg")

plot.show()