Using the results from the X2K pipeline, this notebook produces network visualizations for the pathways enriched in a specified cohort.

In [1]:
import pandas as pd
import numpy as np
import math


Specify file location:

In [2]:
results_folder = "results" # folder containing X2K results


In [None]:
import ipycytoscape
import json

x2k_up = pd.read_csv(f'{results_folder}/up_x2k_zscores.csv', index_col=0)
chea_up = pd.read_csv(f'{results_folder}/up_chea_zscores.csv', index_col=0)

TRANSCRIPTION_FACTORS = chea_up.index.to_list()
KINASES = x2k_up.index.to_list()

In [3]:
def get_freq(dataframe):
    # Frequency
    flattened = dataframe.to_numpy().flatten()
    values, counts = np.unique([x for x in flattened if str(x) != 'nan'], return_counts=True)
    ret = pd.DataFrame({'counts': counts}, index = values)
    
    # Mean rank
    mean_ranks = []
    for g in ret.index:
        ranks = []
        for col in dataframe.columns:
            if g in dataframe[col].to_list(): 
                ranks.append(dataframe[col].to_list().index(g))
        mean_ranks.append(np.mean(ranks))
    ret['mean_rank'] = mean_ranks
    return ret

# X2K pathway visualization

Set the plotting parameters:

In [None]:
rank_threshold = 30 # cut-off of top-ranked kinases considered as enriched in a sample
samples = pd.read_csv("samples.csv", index_col=0).index.to_list() # csv of the sample IDs in the cohort to visualize
graph_thresh = 0.4  # set the minimum threshold for edges to show, controls the size of the network

print(f"Total N: {len(samples)}")
thresh = math.ceil(len(samples) * graph_thresh)
print(f"Edge weight threshold: {thresh}")

dir = "up" # choose the enrichment direction ["up", "dn"] to visualize
kinase_col = "#FFD700" # kinase color
tf_col = "#F08080" # TF color

In [243]:
# Build network 
edges_df = pd.DataFrame(columns=['target', 'source'])
nodes_df = pd.DataFrame(columns=['id', 'type', 'color'])

TFs = pd.read_csv(f"{results_folder}/{dir}_TFs.csv")
intermediates = pd.read_csv(f"{results_folder}/filtered_{dir}_intermediates.csv")
kinases = pd.read_csv(f"{results_folder}/{dir}_x2k_kinases.csv").iloc[:rank_threshold]

for id in samples:
    new = pd.DataFrame([[tf, p] for p in intermediates[id].dropna() for tf in TFs[id].dropna()], columns=['target', 'source'])
    edges_df = pd.concat([edges_df, new])
    new = pd.DataFrame([[p, kinase] for kinase in kinases[id].dropna() for p in intermediates[id].dropna()], columns=['target', 'source'])
    edges_df = pd.concat([edges_df, new])
        
nodes = pd.unique(TFs[samples].to_numpy().flatten().tolist() + intermediates[samples].to_numpy().flatten().tolist() + kinases[samples].to_numpy().flatten().tolist())
nodes = nodes[~pd.isnull(nodes)]
new = pd.DataFrame({'id': nodes})
new['type'] = 'intermediate'
new.loc[new['id'].isin(TRANSCRIPTION_FACTORS), 'type'] = 'TF'
new.loc[new['id'].isin(KINASES), 'type'] = 'kinase'
new['color'] = "#DCDCDC"
new.loc[new['type'] == 'TF', 'color'] = tf_col
new.loc[new['type'] == 'kinase', 'color'] = kinase_col
nodes_df = pd.concat([nodes_df, new])

edges_df = edges_df.groupby(['target', 'source']).size().reset_index()
edges_df.rename(columns={0: "count"}, inplace=True)
edges_df['label'] = edges_df['count']
edges_df['color'] = 'black'
edges_df = edges_df.loc[edges_df.source != edges_df.target]

nodes_df['label'] = nodes_df['id']

In [248]:
edges_show = edges_df.loc[edges_df['count'] >= thresh]
edges_show

Unnamed: 0,target,source,count,label,color
184,AC018755.18,BTK,7,7,black
188,AC018755.18,CSK,6,6,black
195,AC018755.18,FGR,7,7,black
198,AC018755.18,FYN,6,6,black
203,AC018755.18,HCK,7,7,black
...,...,...,...,...,...
52169,WAS,PRKCD,8,8,black
52179,WAS,SYK,9,9,black
52181,WAS,TEK,6,6,black
52186,WAS,TYK2,7,7,black


In [249]:
nodes_show = nodes_df.loc[nodes_df['id'].isin(edges_show.source.to_list() + edges_show.target.to_list())]
nodes_show

Unnamed: 0,id,type,color,label
61,RUNX3,TF,#F08080,RUNX3
178,MYO1F,intermediate,#DCDCDC,MYO1F
182,PRKCB,kinase,#FFD700,PRKCB
183,APOBR,intermediate,#DCDCDC,APOBR
185,MAP4K1,kinase,#FFD700,MAP4K1
...,...,...,...,...
906,FYN,kinase,#FFD700,FYN
911,JAK1,kinase,#FFD700,JAK1
919,PRKCQ,kinase,#FFD700,PRKCQ
924,TEK,kinase,#FFD700,TEK


In [None]:
nodes_dict = nodes_show.to_dict('records')
edges_dict = edges_show.to_dict('records')

# building nodes
data_keys = ['id', 'label', 'type', 'color']
position_keys = ['position_x','position_y']
rest_keys = ['score','idInt','name','score','group','removed','selected','selectable','locked','grabbed', 'grabbable']
    
nodes_graph_list=[]
for node in nodes_dict:
    dict_node = {}
    data_sub_dict = {'data':{el:node[el] for el in data_keys}}
    rest_sub_dict = {el:node[el] for el in node.keys() if el in rest_keys}
    posi_sub_dict = {}
    if 'position_x' in node.keys() and 'position_y' in node.keys():
        posi_sub_dict = {'position':{el:node[el] for el in node.keys() if el in position_keys}}
        
    dict_node = {**data_sub_dict,**rest_sub_dict,**posi_sub_dict}
    nodes_graph_list.append(dict_node)

# building edges
data_keys  = ['source', 'target']
data_keys2 = ['label', 'count']
rest_keys  = ['score','weight','group','networkId','networkGroupId','intn','rIntnId','group','removed','selected','selectable','locked','grabbed','grabbable','classes']
position_keys = ['position_x','position_y']
    
edges_graph_list = []
for edge in edges_dict:
    dict_edge = {}
    data_sub_dict = {el:edge[el] for el in data_keys}
    data_sub_dict2 = {el:edge[el] for el in edge.keys() if el in data_keys2}
    rest_sub_dict = {el:edge[el] for el in edge.keys() if el in rest_keys}
        
    dict_edge = {'data':{**data_sub_dict,**data_sub_dict2},**rest_sub_dict}
    edges_graph_list.append(dict_edge)
    
total_graph_dict = {'nodes': nodes_graph_list, 'edges':edges_graph_list}

cytoscapeobj = ipycytoscape.CytoscapeWidget()
data_graph = json.dumps(total_graph_dict)
json_to_python = json.loads(data_graph)
cytoscapeobj.graph.add_graph_from_json(json_to_python)

style = [{
                            "selector": 'node',
                            "style": {
                            'background-color': 'data(color)',
                            'border-color': 'data(borderColor)',
                            'border-width': 'data(borderWidth)',
                            'label': 'data(label)',
                            "text-valign": "center",
                            "text-halign": "center",
                            'width': "50",
                            'height': "50",
                            }
                        },
                        {
                            "selector": 'edge',
                            "style": {
                            'curve-style': 'straight',
                            'line-color': 'data(lineColor)',
                            'width': '1',
                            'label': '',
                            "text-rotation": "autorotate",
                            "text-margin-x": "0px",
                            "text-margin-y": "0px",
                            'font-size': '12px',
                            'target-arrow-shape': "data(directed)",
                            'target-endpoint': 'outside-to-node',
                            'source-endpoint': 'outside-to-node',
                            'target-arrow-color': 'data(lineColor)',
                            }
                        },
                        {
                            "selector": 'node.highlight',
                            "style": {
                                'border-color': 'gray',
                                'border-width': '2px',
                                'font-weight': 'bold',
                                'font-size': '18px',
                                'width': "90",
                                'height': "90",
                            }
                        },
                        {
                            "selector": 'node.focused',
                            "style": {
                                'border-color': 'gray',
                                'border-width': '2px',
                                'font-weight': 'bold',
                                'font-size': '18px',
                                'width': "90",
                                'height': "90",
                            }
                        },
                        {
                            "selector": 'edge.focusedColored',
                            "style": {
                                'line-color': '#F8333C',
                                'width': '6'
                            }
                        },
                        {
                            "selector": 'node.semitransp',
                            "style":{ 'opacity': '0.5' }
                        },
                        {
                            "selector": 'node.focusedSemitransp',
                            "style":{ 'opacity': '0.5' }
                        },
                        {
                            "selector": 'edge.colored',
                            "style": {
                                'line-color': '#F8333C',
                                'target-arrow-color': '#F8333C',
                                'width': '6'
                            }
                        },
                        {
                            "selector": 'edge.semitransp',
                            "style":{ 'opacity': '0.5' }
                        },
                        {
                            "selector": 'edge.focusedSemitransp',
                            "style":{ 'opacity': '0.5' }
                        }]
cytoscapeobj.set_style(style)
display(cytoscapeobj)