In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
%%appyter markdown
# Kinase - Transcription Factor Module Pairwise Analysis

This Appyter predicts shared modules of kinases, inferred from phosphoproteomic data through KEA3, and transcription factors, inferred from transcriptomic data from ChEA3, that are differentially active across two groups of samples. It first ranks the kinases and transcription factors and then scores pairs of kinases and transcription factors on an individual sample level per each group. These scores are binarized based on an additional threshold which only takes into account the top percentage of pair scores and with which clustermaps are created. Clusters or modules of kinases and transcription factors in these groups are then compared and those that appear across multiple of the heatmaps, representing different directions of activation, are retained and their relationship is displayed as a bipartite network.

In [None]:
%%appyter hide_code

{% do SectionField(
    name='primary',
    title='Kinase - Transcription Factor Pairwise Module Analysis',
    img='k-tf-logo.png'
) %}


{% set chea_up_file = FileField(
    name='chea_up',
    label='ChEA3 Enrichment Results from Upregulated RNA-seq expression vectors',
    description='Transcription Factors as the rows, samples as the columns',
    default='ChEA3_pancan_top500_baseline_normalized.tsv',
    required=True,
    examples={
        'ChEA3_pancan_top500_baseline_normalized.tsv': 'https://appyters.maayanlab.cloud/storage/Kinase_TF_Modules/ChEA3_10_cancer_type_meanRank_df_top500_baseline_normalized.tsv',
    },
    section='primary',
) %}

{% set chea_down_file = FileField(
    name='chea_down',
    label='ChEA3 Enrichment Results from Downregulated RNA-seq expression vectors',
    description='Transcription Factors as the rows, samples as the columns',
    default='ChEA3_pancan_bot500_baseline_normalized.tsv',
    required=True,
    examples={
        'ChEA3_pancan_bot500_baseline_normalized.tsv': 'https://appyters.maayanlab.cloud/storage/Kinase_TF_Modules/ChEA3_10_cancer_type_meanRank_df_bot500_baseline_normalized.tsv',
    },
    section='primary',
) %}


{% set kea_up_file = FileField(
    name='kea_up',
    label='KEA3 Enrichment Results from upregulated phosphorylated proteins',
    description='Kinases as the rows, samples as the columns',
    default='KEA3_pancan_top500_baseline_normalized.tsv',
    required=True,
    examples={
        'KEA3_pancan_top500_baseline_normalized.tsv': 'https://appyters.maayanlab.cloud/storage/Kinase_TF_Modules/KEA3_V2_10_cancer_type_meanRank_df_top500_baseline_normalized.tsv',
    },
    section='primary',
) %}


{% set kea_down_file = FileField(
    name='kea_down',
    label='KEA3 Enrichment Results from downregulated phosphorylated proteins',
    description='Kinases as the rows, samples as the columns',
    default='KEA3_pancan_bot500_baseline_normalized.tsv',
    required=True,
    examples={
        'KEA3_pancan_bot500_baseline_normalized.tsv': 'https://appyters.maayanlab.cloud/storage/Kinase_TF_Modules/KEA3_V2_10_cancer_type_meanRank_df_bot500_baseline_normalized.tsv',
    },
    section='primary',
) %}

{% set group_annotations_file = FileField(
    name='group_annotations_file',
    label='Pairwie group assigment for each sample. Column with annotations should be named group',
    description='Each row should be a sample and an annotation.',
    default='pancan_immune_subtypes.tsv',
    required=True,
    examples={
        'pancan_immune_subtypes.tsv': 'https://appyters.maayanlab.cloud/storage/Kinase_TF_Modules/pancan_immune_subtypes.tsv',
    },
    section='primary',
) %}

{% set ascending = BoolField(
    name='ascending',
    label='ChEA3 and KEA3 Enrichment results are in ascending order',
    default=False,
    section='primary',
) %}

{% set rank_threshold =
    IntField(
        name='rank_threshold', 
        label='Rank Threshold',
        description="Threshold for rank of kinase/TF for pair to be included in the analysis",
        default=30,
        section='primary'
    ) %}

{% set vis_threshold =
    FloatField(
        name='vis_threshold', 
        label='Rank Threshold',
        description="Threshold for the top percentage of kinase-transcription factor pairs to be visualized and clustered.",
        default=0.005,
        step=0.001,
        section='primary'
    ) %}

{% set cluster_threshold =
    IntField(
        name='cluster_threshold', 
        label='Cluster size threshold',
        description="Threshold for size of a cluster of kinases and transcription factors for it to be considered.",
        default=10,
        section='primary'
    ) %}

In [None]:
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import networkx as nx
import pyvis
from IPython.display import display, FileLink, HTML, Markdown, IFrame


def read_table(filename):
    if filename.endswith('.tsv') or filename.endswith('.tsv.gz'):
        return pd.read_csv(filename, sep='\t', index_col=0)
    elif filename.endswith('.csv') or filename.endswith('.csv.gz'):
        return pd.read_csv(filename, sep=',', index_col=0)
    elif filename.endswith('.gct') or filename.endswith('.gct.gz'):
        return pd.read_csv(filename, sep='\t', index_col=0, skiprows=2)
    else:
        return pd.read_table(filename, sep=None, engine='python', index_col=0)

In [None]:
%%appyter markdown
## Load Files
Load the [ChEA3](https://maayanlab.cloud/chea3/) and [KEA3](https://maayanlab.cloud/kea3/) files and convert to transcription factor and kinase enrichment ranks, respectively. 
Additionally load annotation file which defines the pairwise groups which will be compared in the analysis.

In [None]:
%%appyter code_eval

chea_up = read_table({{ chea_up_file }})
chea_down = read_table({{ chea_down_file }})
kea_up = read_table({{ kea_up_file }})
kea_down = read_table({{ kea_down_file }})

{% if ascending.raw_value %}
top_tf = chea_up.rank(ascending=False).T
bottom_tf = chea_down.rank(ascending=False).T

top_kinase = kea_up.rank(ascending=False).T
bottom_kinase = kea_down.rank(ascending=False).T
{% else %}
top_tf = chea_up.rank(ascending=True).T
bottom_tf = chea_down.rank(ascending=True).T

top_kinase = kea_up.rank(ascending=True).T
bottom_kinase = kea_down.rank(ascending=True).T
{% endif %}

group_annotations = read_table({{ group_annotations_file }})

In [None]:
%%appyter markdown
Parse group annotation and identify the available samples from the transcription factor and kinase ranking files.

In [None]:
groups = list(group_annotations["group"].dropna().unique())
if len(groups) < 2:
    raise RuntimeError("Two groups were not recognized in the annotations file")
elif len(groups) > 2:
    print("More than two groups were identifed in the provided annotations file. Proceeding with the first two appearing groups.")

label1, label2 = groups[0], groups[1]

In [None]:
available_samples = set(top_kinase.index.values).intersection(set(top_tf.index.values), set(bottom_tf.index.values), set(bottom_kinase.index.values))

group1 = list(group_annotations[group_annotations['group'] == label1].index.values)
group2 = list(group_annotations[group_annotations['group'] == label2].index.values)

group1 = list(set(group1).intersection(available_samples))
group2 = list(set(group2).intersection(available_samples))

if len(group1) == 0 or len(group2) == 0:
    raise RuntimeError("No samples identified for the given groups. Please ensure samples are in the index of you annotations file.")

print(f'{label1}:', len(group1))
print(f'{label2}:', len(group2))

In [None]:
%%appyter markdown
## Score kinase - transcription factor pairs
On a patient and group level, score kinase - transcription factor pairs according to the input rank threshold. If a kinase or transcription factor do not meet the rank threshold, then the pair recieves a score of 0, otherwise the score is the sum of the noramlzied ranks of the given kinase and transcription factor. If this process is running for an extended period of time, consider raising the rank threshold.

In [None]:
def melt_tf_kinase(kinase_df, tf_df, rank_threshold, fname):
    # output (values are z-scored scores)
    #     |kinase_1|kinase_2
    # tf_1|score|score
    # tf_2|score|score
        
    df_kinase_melt_norm = kinase_df.melt(var_name='kinase', value_name='rank', ignore_index=False)
    # |patient|kinase|rank|

    df_tf_melt_norm = tf_df.melt(var_name='tf', value_name='rank', ignore_index=False)
    # |patient|tf|rank|

    df_goal = pd.merge(left=df_kinase_melt_norm, left_index=True, right=df_tf_melt_norm, right_index=True, suffixes=('_kinase', '_tf'))
    # |patient|tf|rank_tf|kinase|rank_kinase|

    # filter
    df_goal = df_goal[((df_goal.rank_tf < rank_threshold) & (df_goal.rank_kinase < rank_threshold))]

    # compute scores
    df_goal['score'] = ((rank_threshold + 1) - df_goal['rank_tf'])/rank_threshold + ((rank_threshold + 1) -df_goal['rank_kinase'])/rank_threshold

    # aggregate score across all patients
    df_agg = df_goal.groupby(['tf', 'kinase'])['score'].sum().reset_index()

    # zscore normalize scores across all pairs
    df_agg['score_norm'] = np.log10(df_agg['score'])

    # turn into tf by kinase matrix of normalized scores
    df_score_norm = df_agg.pivot(index='kinase', columns='tf', values='score_norm')
    df_score_norm.to_csv(f"{fname}.tsv", sep='\t')
    return df_score_norm

In [None]:
%%appyter code_exec
rank_threshold = {{ rank_threshold }}

In [None]:
top_kinase_top_tf_gr1 = melt_tf_kinase(top_kinase.loc[group1], top_tf.loc[group1], rank_threshold, f'top_kinase_top_tf_{label1}')
bottom_kinase_bottom_tf_gr1 = melt_tf_kinase(bottom_kinase.loc[group1], bottom_tf.loc[group1], rank_threshold, f'bottom_kinase_bottom_tf_{label1}')
top_kinase_bottom_tf_gr1 = melt_tf_kinase(top_kinase.loc[group1], bottom_tf.loc[group1], rank_threshold, f'top_kinase_bottom_tf_{label1}')
bottom_kinase_top_tf_gr1 = melt_tf_kinase(bottom_kinase.loc[group1], top_tf.loc[group1], rank_threshold, f'bottom_kinase_top_tf_{label1}')

display(FileLink(f"top_kinase_top_tf_{label1}.tsv", result_html_prefix=str(f'up kinase - up transcription factor {label1} scores matrix: ')))
display(FileLink(f"bottom_kinase_bottom_tf_{label1}.tsv", result_html_prefix=str(f'down kinase - down transcription factor {label1} scores matrix: ')))
display(FileLink(f"top_kinase_bottom_tf_{label1}.tsv", result_html_prefix=str(f'up kinase - down transcription factor {label1} scores matrix: ')))
display(FileLink(f"bottom_kinase_top_tf_{label1}.tsv", result_html_prefix=str(f'down kinase - up transcription factor {label1} scores matrix: ')))

In [None]:
top_kinase_top_tf_gr2 = melt_tf_kinase(top_kinase.loc[group2], top_tf.loc[group2], rank_threshold, f'top_kinase_top_tf_{label2}')
bottom_kinase_bottom_tf_gr2 = melt_tf_kinase(bottom_kinase.loc[group2], bottom_tf.loc[group2], rank_threshold, f'bottom_kinase_bottom_tf_{label2}')
top_kinase_bottom_tf_gr2 = melt_tf_kinase(top_kinase.loc[group2], bottom_tf.loc[group2], rank_threshold, f'top_kinase_bottom_tf_{label2}')
bottom_kinase_top_tf_gr2 = melt_tf_kinase(bottom_kinase.loc[group2], top_tf.loc[group2], rank_threshold, f'bottom_kinase_top_tf_{label2}')

display(FileLink(f"top_kinase_top_tf_{label2}.tsv", result_html_prefix=str(f'up kinase - up transcription factor {label2} scores matrix: ')))
display(FileLink(f"bottom_kinase_bottom_tf_{label2}.tsv", result_html_prefix=str(f'down kinase - down transcription factor {label2} scores matrix: ')))
display(FileLink(f"top_kinase_bottom_tf_{label2}.tsv", result_html_prefix=str(f'up kinase - down transcription factor {label2} scores matrix: ')))
display(FileLink(f"bottom_kinase_top_tf_{label2}.tsv", result_html_prefix=str(f'down kinase - up transcription factor {label2} scores matrix: ')))

In [None]:
%%appyter markdown
## Visualize kinase and transcription factor pairs and identify modules
Create a clustermap of the top percentage of kinase-transcription factor pairs appearing in one or both of groups based on the visualization threshold. Identify modules of kinases and transcription factors based on the threshold for cluster/module size.

In [None]:
def create_heatmap(gr2_df, gr1_df, threshold, fname):   
    pairings_and_scores_gr2 = []
    for tf in gr2_df.columns.values:
        for (kinase, score) in gr2_df[tf].items():
            if not (pd.isna(score)):
                pairings_and_scores_gr2.append((score, (kinase, tf), 'gr2'))

    pairings_and_scores_gr1 = []
    for tf in gr1_df.columns.values:
        for (kinase, score) in gr1_df[tf].items():
            if not (pd.isna(score)):
                pairings_and_scores_gr1.append((score, (kinase, tf), 'gr1'))
                
    top_1_threshold_gr2 = int(len(pairings_and_scores_gr2)*threshold)
    top_1_threshold_gr1 = int(len(pairings_and_scores_gr1)*threshold)

    max_val_count = 0
    max_vals_gr2 = []
    while max_val_count < top_1_threshold_gr2:
        val_to_add = max(pairings_and_scores_gr2)
        max_vals_gr2.append(val_to_add)
        pairings_and_scores_gr2.remove(val_to_add)
        max_val_count += 1

    max_val_count = 0
    max_vals_gr1 = []
    while max_val_count < top_1_threshold_gr1:
        val_to_add = max(pairings_and_scores_gr1)
        max_vals_gr1.append(val_to_add)
        pairings_and_scores_gr1.remove(val_to_add)
        max_val_count += 1    

    max_vals = max_vals_gr1 + max_vals_gr2

    top_1_df = pd.DataFrame(max_vals)
    top_1_df.columns = ['score', 'pairing', 'type']

    binary_vals = dict()
    for (index, pair) in top_1_df['pairing'].items():
        if pair not in binary_vals:
            binary_vals[pair] = []
        binary_vals[pair].append(top_1_df['type'][index])

    both = 0
    gr1 = 0
    gr2 = 0
    new_scores = []
    for pair in binary_vals:
        if len(binary_vals[pair]) == 2:
            new_scores.append((1, pair))
            both += 1
        elif binary_vals[pair] == ['gr1']:
            new_scores.append((2, pair))
            gr1 += 1
        else:
            new_scores.append((-2, pair))
            gr2 += 1

    new_df = pd.DataFrame(new_scores, columns = ['score', 'pair'])

    #index = kinase
    #columns = tfs

    kinases = []
    tfs = []
    for (index, pair) in new_df['pair'].items():
        kinases.append(pair[0])
        tfs.append(pair[1])
    new_df['Kinases'] = kinases
    new_df['Transcription Factors'] = tfs

    new_df_pivot = new_df.pivot_table(index = 'Kinases', columns = 'Transcription Factors', values = 'score')
    new_df_pivot

    cmap = mpl.colors.ListedColormap(['blue', 'gray','red'])
    kws = dict(cbar_kws=dict(ticks=[-2.666667, 0, 2.666667], orientation='horizontal'))
    sns.set(font_scale = 1.0)

    g = sns.clustermap(new_df_pivot.fillna(0.), 
                    mask=np.isnan(new_df_pivot),
                    method = 'complete',
                    cmap = cmap,
                    figsize=(18, 18),
                    xticklabels=True, 
                    yticklabels=True,
                    vmin = -4,
                    vmax = 4,
                    **kws)
    
    g.savefig(f"{fname}.png")
    
    mat = g.data2d

    x0, _y0, _w, _h = g.cbar_pos
    g.ax_cbar.set_position([x0, 0.9, g.ax_row_dendrogram.get_position().width, 0.02])
    g.ax_cbar.tick_params(axis='x', length=10)
    g.ax_cbar.set_xticklabels([label2.replace('Mesenchymal', 'M'), 'Both', label1.replace('Mesenchymal', 'M')], fontsize = 20) ## Legend [gr2, Both, gr1]

    ax = g.ax_heatmap
    ax.set_xlabel("Transcription Factors", fontsize = 30, loc = 'center')
    ax.set_ylabel("Kinases", fontsize = 30, loc = 'center')

    for a in g.ax_row_dendrogram.collections:
        a.set_linewidth(3)

    for a in g.ax_col_dendrogram.collections:
        a.set_linewidth(3)

    kinase = list(new_df_pivot.index.values[g.dendrogram_row.reordered_ind])
    tfs = list(new_df_pivot.columns.values[g.dendrogram_col.reordered_ind])
    return mat, kinases, tfs

In [None]:
def is_valid_move(matrix, visited, row, col, value):
    rows, cols = len(matrix), len(matrix[0])
    return 0 <= row < rows and 0 <= col < cols and matrix[row][col] == value and not visited[row][col]

def dfs(matrix, visited, row, col, island_indices):
    visited[row][col] = True
    island_indices.append((row, col))

    directions = [(0, 1), (1, 0), (0, -1), (-1, 0)]  # right, down, left, up

    for dr, dc in directions:
        new_row, new_col = row + dr, col + dc
        if is_valid_move(matrix, visited, new_row, new_col, matrix[row][col]):
            dfs(matrix, visited, new_row, new_col, island_indices)

def find_islands(matrix, value, min_island_size):
    rows, cols = len(matrix), len(matrix[0])
    visited = [[False] * cols for _ in range(rows)]
    islands = []
    for row in range(rows):
        for col in range(cols):
            if matrix[row][col] == value and not visited[row][col]:
                island_indices = []
                dfs(matrix, visited, row, col, island_indices)

                if len(island_indices) >= min_island_size:
                    islands.append(island_indices)
    return islands


def find_all_clusters(matrix, min_island_size, kinases, tfs):
    label1_clusters = find_islands(matrix, 2, min_island_size)
    label1_modules = {}
    for i, clus in enumerate(label1_clusters):
        clus_module = {'kinases': [], 'tfs': []}
        for idx in clus:
            clus_module['kinases'].append(kinases[idx[0]])
            clus_module['tfs'].append(tfs[idx[1]])
        clus_module['kinases'] = list(set(clus_module['kinases']))
        clus_module['tfs'] = list(set(clus_module['tfs']))
        label1_modules[f"{label1}-{i + 1}"] = clus_module

    label2_clusters = find_islands(matrix, -2, min_island_size)
    label2_modules = {}
    for i, clus in enumerate(label2_clusters):
        clus_module = {'kinases': [], 'tfs': []}
        for idx in clus:
            clus_module['kinases'].append(kinases[idx[0]])
            clus_module['tfs'].append(tfs[idx[1]])
        clus_module['kinases'] = list(set(clus_module['kinases']))
        clus_module['tfs'] = list(set(clus_module['tfs']))
        label2_modules[f"{label2}-{i + 1}"] = clus_module

    shared_clusters = find_islands(matrix, 1, min_island_size)
    shared_modules = {}
    for i, clus in enumerate(shared_clusters):
        clus_module = {'kinases': [], 'tfs': []}
        for idx in clus:
            clus_module['kinases'].append(kinases[idx[0]])
            clus_module['tfs'].append(tfs[idx[1]])
        clus_module['kinases'] = list(set(clus_module['kinases']))
        clus_module['tfs'] = list(set(clus_module['tfs']))
        shared_modules[f"{label2}-{i + 1}"] = clus_module

    return {label1: label1_modules, label2: label2_modules, 'both': shared_modules}

In [None]:
%%appyter eval_code

vis_threshold = {{ vis_threshold }}
cluster_threshold = {{ cluster_threshold }}
identified_clusters = {}

In [None]:
%%appyter markdown
# Top ranked kinases - Top ranked transcription factors
Comparing top ranked kinases and transcription factors based on upregulated phosphosites and downregulated genes, respectively. Since their regulation mirrors each other, this is inferred as activation in the next step of the analysis.

In [None]:
mat, kinases, tfs = create_heatmap(top_kinase_top_tf_gr2, top_kinase_top_tf_gr1, vis_threshold, "top-kinase-top-tf")
toptopres = find_all_clusters(mat.fillna(0).values, 10, kinases, tfs)
identified_clusters['top-top'] = toptopres
display(FileLink(f"top_kinase_top_tf.png", result_html_prefix=str(f'Download Figure: ')))


In [None]:
%%appyter markdown
# Bottom ranked kinases - Bottom ranked transcription factors
Comparing top ranked kinases and transcription factors based on upregulated phosphosites and downregulated genes, respectively. Since their regulation mirrors each other, this is inferred as activation in the next step of the analysis.

In [None]:
mat, kinases, tfs = create_heatmap(bottom_kinase_bottom_tf_gr2, bottom_kinase_bottom_tf_gr1, vis_threshold, "bottom_kinase_bottom_tf")
botbotres = find_all_clusters(mat.fillna(0).values, 10, kinases, tfs)
identified_clusters['bot-bot'] = botbotres
display(FileLink(f"bottom_kinase_bottom_tf.png", result_html_prefix=str(f'Download Figure: ')))

In [None]:
%%appyter markdown
# Bottom ranked kinases - Top ranked transcription factors
Comparing top ranked kinases and transcription factors based on upregulated phosphosites and downregulated genes, respectively. Since their regulation opposes each other, this is inferred as inhibition in the next step of the analysis.

In [None]:
mat, kinases, tfs = create_heatmap(bottom_kinase_top_tf_gr2, bottom_kinase_top_tf_gr1, vis_threshold, "bottom-kinase-top-tf")
bottopres = find_all_clusters(mat.fillna(0).values, 10, kinases, tfs)
identified_clusters['bot-top'] = botbotres
display(FileLink(f"bottom-kinase-top-tf.png", result_html_prefix=str(f'Download Figure: ')))

In [None]:
%%appyter markdown
# Top ranked kinases - Bottom ranked transcription factors
Comparing top ranked kinases and transcription factors based on upregulated phosphosites and downregulated genes, respectively. Since their regulation opposes each other, this is inferred as inhibition in the next step of the analysis.

In [None]:
mat, kinases, tfs = create_heatmap(top_kinase_bottom_tf_gr2, top_kinase_bottom_tf_gr1, vis_threshold, "top-kinase-bottom-tf")
topbotres = find_all_clusters(mat.fillna(0).values, 10, kinases, tfs)
identified_clusters['top-bot'] = botbotres
display(FileLink(f"top-kinase-bottom-tf.png", result_html_prefix=str(f'Download Figure: ')))

In [None]:
%%appyter markdown
## Identify modules appearing across heatmaps
Kinase and transcription factor groups are identified from those appearing in modules across the above four visualizations.

In [None]:
clusters = identified_clusters
clus_dict = {0: 'top-top', 1: 'bot-bot', 2: 'top-bot', 3: 'bot-top'}

def find_overlaps_kinase(kinases_to_check):
    check_set = set(kinases_to_check)
    results = {}
    for i, c in enumerate(clusters):
        for key in clusters[c]:
            for n in clusters[c][key]:
                if len(clusters[c][key][n]) > 0:
                    overlap = set(clusters[c][key][n]['kinases']).intersection(check_set)
                    if len(overlap) > 3:
                        results[clus_dict[i] + key+str(n)] = list(overlap)
    overlap_all = []
    for key in results:
        if len(results[key]) > len(overlap_all):
            overlap_all = results[key]
    overlap_all = set(overlap_all)
    for key in results:
        overlap_all = overlap_all.intersection(set(results[key]))
    valid = len(results.keys()) >= 2
    return frozenset(overlap_all), valid

def find_overlaps_tf(kinases_to_check):
    check_set = set(kinases_to_check)
    results = {}
    for i, c in enumerate(clusters):
        for key in clusters[c]:
            for n in clusters[c][key]:
                if len(clusters[c][key][n]) > 0:
                    overlap = set(clusters[c][key][n]['tfs']).intersection(check_set)
                    if len(overlap) > 3:
                        results[clus_dict[i] + key+str(n)] = list(overlap)
    overlap_all = []
    for key in results:
        if len(results[key]) > len(overlap_all):
            overlap_all = results[key]
    overlap_all = set(overlap_all)
    for key in results:
        overlap_all = overlap_all.intersection(set(results[key]))
    valid = len(results.keys()) >= 2
    return frozenset(overlap_all), valid

        
def sublist(lst1, lst2):
    return len(set(lst1).intersection(set(lst2))) == len(lst1)

In [None]:
set_of_clusters_kinases = set()

print('Kinase Clusters:')
for i, c in enumerate(clusters):
    for key in clusters[c]:
        for n in clusters[c][key]:
            res, valid = find_overlaps_kinase(clusters[c][key][n]['kinases'])
            if valid:
                set_of_clusters_kinases.add(res)

for clus in set_of_clusters_kinases:
    if len(clus) > 2:
        clus_str = ''
        for c in clus:
            clus_str += c + ' '
        print(clus_str)

sorted_kinases = sorted(list(set_of_clusters_kinases), key=lambda x: len(x))
kinase_list = []
for kinase_gr in sorted_kinases:
    if len(kinase_gr) > 2:
        kinase_list.append(' '.join(kinase_gr))

set_of_clusters_tfs = set()

print('\nTFs Clusters:')
for i, c in enumerate(clusters):
    for key in clusters[c]:
        for n in clusters[c][key]:
            res, valid = find_overlaps_tf(clusters[c][key][n]['tfs'])
            if valid:
                set_of_clusters_tfs.add(res)

for clus in set_of_clusters_tfs:
    if len(clus) > 2:
        clus_str = ''
        for c in clus:
            clus_str += c + ' '
        print(clus_str)

sorted_tfs = sorted(list(set_of_clusters_tfs), key=lambda x: len(x))
tf_list = []
for tf_gr in sorted_tfs:
    if len(tf_gr) > 2:
        tf_list.append(' '.join(tf_gr))

In [None]:
%%appyter markdown
## Create Biparite Figure with idenfied kinase and transcription factor modules and their relationships
Kinase and transcription factor module relationships are drawn from heatmaps above where upregulated kinases and upregulated transcription factors as well as downregulated kinases and downregulated transcription factors are inferred as activation and upregulated transcription factors and downregulated kinases as well as downregulated transcription factors and upregulated kinases are inferred as inhibition.

In [None]:
G = nx.Graph()

G.add_nodes_from(kinase_list, bipartite=0)
G.add_nodes_from(tf_list, bipartite=1)

In [None]:
for i in identified_clusters:
    for label in identified_clusters[i]:
        for clus in identified_clusters[i][label]:
            clus_kinases = identified_clusters[i][label][clus]['kinases']
            k_grs = []
            for k_gr in kinase_list:    
                if len(set(k_gr.split()).intersection(clus_kinases)) == len(k_gr.split()):
                    k_grs.append(k_gr)
            clus_tfs = identified_clusters[i][label][clus]['tfs']
            tf_grs = []
            for tf_gr in tf_list:
                if len(set(tf_gr.split()).intersection(clus_tfs)) == len(tf_gr.split()):
                    tf_grs.append(tf_gr)

            for k_gr in k_grs:
                for tf_gr in tf_grs:
                    if label == label1:
                        if i == 'top-top' or i =='bot-bot':
                            G.add_edge(k_gr, tf_gr, attr=f"{label}-{i}", color='red', shape='-|>')
                        else:
                            G.add_edge(k_gr, tf_gr, attr=f"{label}-{i}", color='red', shape='|-|')
                    elif label == label2:
                        if i == 'top-top' or i =='bot-bot':
                            G.add_edge(k_gr, tf_gr, attr=f"{label}-{i}", color='blue', shape='-|>')
                        else:
                            G.add_edge(k_gr, tf_gr, attr=f"{label}-{i}", color='blue', shape='|-|')
                    elif label == 'both':
                        if i == 'top-top' or i =='bot-bot':
                            G.add_edge(k_gr, tf_gr, attr=f"{label}-{i}", color='grey', shape='-|>')
                        else:
                            G.add_edge(k_gr, tf_gr, attr=f"{label}-{i}", color='grey', shape='|-|')
G.remove_nodes_from(list(nx.isolates(G))) # remove nodes with no connections

In [None]:
edges = G.edges()
colors = [G[u][v]['color'] for u,v in edges]
shapes = [G[u][v]['shape'] for u,v in edges]
fig = plt.figure(1, figsize=(20, 10), dpi=300)
pos = nx.drawing.layout.bipartite_layout(G, kinase_list, align='vertical')
nx.draw_networkx_nodes(G, pos, node_size=300, margins=[.2, .1], alpha=.5, )
nx.draw_networkx_labels(G, pos, bbox = dict(facecolor="white", edgecolor='black', boxstyle='round,pad=1'))
for edge in G.edges(data=True):
    y_diff = np.abs(pos[edge[0]][1] - pos[edge[1]][1])
    x_diff = np.abs(pos[edge[0]][0] - pos[edge[1]][0])
    dist = np.sqrt(np.square(y_diff) + np.square(x_diff))

    min_target_margin = dist * 64

    min_target_margin += np.log10(1/(y_diff +.01)) * len(edge[1]) * .2

    if y_diff < .1:
        min_target_margin += len(edge[1]) * 1.5

    if y_diff > .5 :
        min_target_margin -= y_diff * 50
        
    nx.draw_networkx_edges(G, pos, edgelist=[(edge[0],edge[1])], width= 1.5, arrows=True, arrowstyle=edge[2]['shape'], edge_color=edge[2]['color'], min_source_margin=-150, min_target_margin=min_target_margin)

plt.tight_layout()
limits = plt.axis("off")
json_adjacency = nx.adjacency_data(G)
with open(f'{label1}vs{label2}_adjacency.json', 'w') as f:
    json.dump(json_adjacency, f)
nx.write_adjlist(G, f'{label1}vs{label2}_adjaceny.tsv', delimiter='\t')
plt.show()
display(FileLink(f"{label1}vs{label2}_adjacency.json", result_html_prefix=str('Download adjacency netowrk data JSON: ')))
display(FileLink(f"{label1}vs{label2}_adjaceny.tsv", result_html_prefix=str('Download  adjacency netowrk data TSV: ')))

In [None]:
g = pyvis.network.Network(notebook=True,  cdn_resources='remote')
g.from_nx(G)
g.save_graph(f'{label1}vs{label2}.html')
display(FileLink(f"{label1}vs{label2}.html", result_html_prefix=str('Download network html: ')))
g.show(f'{label1}vs{label2}.html')