In [3]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [4]:
import csv
import plotly.offline as py
import plotly.graph_objects as go
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
from networkx.algorithms import community
from collections import OrderedDict
import time
import requests

In [5]:
%%appyter hide_code

{% do SectionField(
    name="GENES",
    title="Submit a gene list",
) %}

gene_list = {{ TextField(
    name="gene_list",
    label="Gene list",
    description="Paste with a single gene on each line.",
    section="GENES",
    default="",
)}}

```python

gene_list = ''''''
```

In [7]:
# Load a sample gene list
sample_genes = []

with open("ULK4_293_coIP_hits.txt","r") as f_in:
    writer = csv.reader(f_in, lineterminator='\n')
    sample_genes = [item for sublist in writer for item in sublist if len(sublist) > 0]
    sample_genes = [*sample_genes]

sample_gene_list = [ x.upper() for x in sample_genes ]

In [8]:
# Load data
cloud_url = 'https://appyters.maayanlab.cloud/storage/Gene_Network_Analysis/'
cloud_url = "" # for reading locally

df_ppi_edges = pd.read_csv(cloud_url + 'ppi_edges_list.csv',header=None)
df_gene_edges = pd.read_csv(cloud_url + 'gene_edges_list.csv')

display(df_ppi_edges.head())
display(df_gene_edges.head())

Unnamed: 0,0,1
0,MAP2K4,UBA52
1,MAP2K4,NOD1
2,MAP2K4,MAPK3
3,MAP2K4,TAB1
4,MAP2K4,UBC


Unnamed: 0,A1BG,A1CF,A2M,A2ML1,A2MP1,A4GALT,A4GNT,AAAS,AACS,AACSP1,...,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11A,ZYG11B,ZYX,ZZEF1,ZZZ3
0,ITIH1,PAH,ITIH2,PPL,ARRDC5,ALPPL2,METTL7B,SNRPA,SLC35E3,TP53I13,...,CSE1L,MCM6,YBX1P4,CPS1,NSA2,CA5A,PPP1R3A,GNAI2,GAREM1,MAP2K6
1,AGXT,SLC2A2,AHSG,TGM1,RP11.927P21.9,KLRG2,SSTR5,RUVBL2,PEX26,RENBP,...,NPM1,TIMELESS,RP11.475I24.1,CKAP2P1,CICP27,FAM98C,TXLNB,TGFB1,DHRS12,HSBP1
2,CYP4A11,CPB2,IGFBP1,KLK13,CSPG4P11,FGF4,C2ORF82,PLK1,ZNF793,PRRT4,...,RAN,SAA3P,REM2,GLP2R,RPL10AP5,PCDHB1,TRDN,PXN,ATP5LP3,POT1
3,C8B,APOH,ITIH3,SCEL,SMIM2,NLRP7,KLHL4,CDC20,RPS6KA5,PPOX,...,LDHB,PSMC5,RP5.1147A1.1,RP11.91J3.1,RPL21P3,USHBP1,XIRP2,VASP,GPR162,SNRPGP14
4,SLC25A47,ALB,AGT,KRT78,RP11.123K3.4,DNMT3L,SPIC,KIF22,ZNF556,AQP7P1,...,PAICS,RFC3,SMCR8,MTNR1A,RPL37P1,ZBTB8B,PYGM,SPI1,RPL27A,GOLGA2P2Y


In [9]:
ppi_edges_dict = {}

for index, row in df_ppi_edges.iterrows():
    if row[0] in ppi_edges_dict:
        ppi_edges_dict[row[0]].append(row[1])
    else:
        ppi_edges_dict[row[0]] = [row[1]]

gene_edges_dict = df_gene_edges.to_dict('list')

In [10]:
def get_relevant_ppi_edges(gene_list):
    edges = []
    for gene in gene_list:
        if gene in ppi_edges_dict:  
            edges = [*edges,  *[(gene, x) for x in ppi_edges_dict[gene]]]
    return edges

def get_relevant_gene_edges(gene_list):
    edges = []
    for gene in gene_list:
        if gene in gene_edges_dict:  
            edges = [*edges,  *[(gene, x) for x in gene_edges_dict[gene]]]
    return edges

In [11]:
# create the Network object
ppi_edges = get_relevant_ppi_edges(sample_gene_list)
gene_edges = get_relevant_gene_edges(sample_gene_list)

def create_network(gene_list,ppi_edges,gene_edges):
    G = nx.Graph()
    G.add_nodes_from(gene_list)
    G.add_edges_from(ppi_edges)
    G.add_edges_from(gene_edges)
    return G

G = create_network(sample_gene_list,ppi_edges,gene_edges)
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 3031
Number of edges: 4703
Average degree:   3.1033


In [12]:
# Clustering
all_clusters = {}

In [13]:
# k_clique_communities
c = list(community.k_clique_communities(G, 3)) 
clusters = [ list(x) for x in c]
all_clusters["k_clique_communities"] = clusters

In [None]:
# Girvan newman communities
communities_generator = community.girvan_newman(G)
top_level_communities = next(communities_generator)
next_level_communities = next(communities_generator)
clusters = sorted(map(sorted, next_level_communities))
all_clusters["girvan_newman"] = clusters

In [14]:
# greedy_modularity_communities
c = list(community.greedy_modularity_communities(G))
clusters = [ list(x) for x in c]
all_clusters["greedy_modularity_communities"] = clusters

In [15]:
# asyn_fluid
k = 10

c = list(community.asyn_fluid.asyn_fluidc(G,k,seed=42))
clusters = [ list(x) for x in c]
all_clusters[f"asyn_fluid_k_{k}"] = clusters

NetworkXError: Fluid Communities require connected Graphs.

In [16]:
# Validation with Enrichr
enrichr_libraries = OrderedDict([
    ('Diseases/Drugs', ['GWAS_Catalog_2019']), 
    ('Ontologies', ['GO_Biological_Process_2018','MGI_Mammalian_Phenotype_Level_4_2019']),
    ('Pathways', ['KEGG_2019_Human','KEGG_2019_Mouse']),
    ('Transcription', ['ENCODE_TF_ChIP-seq_2015']),
])

# Util functions
def enrichr_link_from_genes(genes, description='', enrichr_link='https://amp.pharm.mssm.edu/Enrichr'):
    ''' Functional access to Enrichr API
    '''
    time.sleep(1)
    resp = requests.post(enrichr_link + '/addList', files={
    'list': (None, '\n'.join(genes)),
    'description': (None, description),
    })
    print(resp)
    if resp.status_code != 200:
        raise Exception('Enrichr failed with status {}: {}'.format(
          resp.status_code,
          resp.text,
        ))
    # wait a tinybit before returning link (backoff)
    time.sleep(3)
    result = resp.json()
    return dict(result, link=enrichr_link + '/enrich?dataset=' + resp.json()['shortId'])

def enrichr_get_top_results(userListId, bg, enrichr_link='https://amp.pharm.mssm.edu/Enrichr'):
    time.sleep(1)
    resp = requests.get(enrichr_link + '/enrich?userListId={}&backgroundType={}'.format(userListId, bg))
    print(resp)
    if resp.status_code != 200:
        raise Exception('Enrichr failed with status {}: {}'.format(
          resp.status_code,
          resp.text,
        ))
    time.sleep(3)
    return pd.DataFrame(resp.json()[bg], columns=['rank', 'term', 'pvalue', 'zscore', 'combinedscore', 'overlapping_genes', 'adjusted_pvalue', '', ''])


In [17]:
# Get Enrichr links for each cluster
enrichr_links = {}

for clustering_method, clusters in all_clusters.items():
    enrichr_links[clustering_method] = {}
    for index, genes in enumerate(clusters):
        try:
            link = enrichr_link_from_genes(genes, f'{clustering_method} cluster {index}')
        except:
            link = None
            print(f'Enrichr failed for {clustering_method}, cluster {index} genes')

        enrichr_links[clustering_method][index] = link

<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>
<Response [200

In [None]:
# Grab top results for each cluster
all_enrichr_results = []
for clustering_method, links in enrichr_links.items():
    for cluster, link in links.items():
        if link is None:
            continue
        for category, libraries in enrichr_libraries.items():
            for library in libraries:
                results = enrichr_get_top_results(link['userListId'], library).sort_values('pvalue').iloc[:top_n_results]
                results['link'] = link['link']
                results['library'] = library
                results['category'] = category
                results['direction'] = link_type
                results['cluster'] = cluster
                all_enrichr_results.append(results)
                #except:
                #    print('{}: {} {} {} cluster {} failed, continuing'.format(link, library, category, clustering_method, cluster))

df_enrichr_results = pd.concat(all_enrichr_results).reset_index()

display(df_enrichr_results)