In [104]:
# 0. Import

import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns

import celloracle as co
co.__version__

'0.16.0'

In [105]:
# visualization settings
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

plt.rcParams['figure.figsize'] = [6, 4.5]
plt.rcParams["savefig.dpi"] = 300

# My inference

In [297]:
#path_grn = "/cellar/users/aklie/data/datasets/paul15/analysis/celloracle/grn.csv"
path_grn = "/cellar/users/aklie/data/datasets/paul15/analysis/celloracle/grn_MEP_0_v6.csv"

In [298]:
grn = pd.read_csv(path_grn)
grn

Unnamed: 0,tf,gene,score,pval,cluster
0,0910001L09Rik,Spi1,-0.002040,3.042956e-02,MEP_0
1,1100001G20Rik,Irf8,0.002184,1.339544e-02,MEP_0
2,2310014H01Rik,Spi1,0.005813,1.328686e-04,MEP_0
3,5730469M10Rik,Klf1,0.021323,3.999462e-08,MEP_0
4,Abcd1,Spi1,-0.004447,6.231667e-05,MEP_0
...,...,...,...,...,...
73768,Zfp238,Lig1,-0.015066,4.030942e-07,MEP_0
73769,Zfp238,Igfbp4,-0.015569,1.163055e-06,MEP_0
73770,Zfp238,Sh3bgrl3,-0.015633,1.577574e-04,MEP_0
73771,Zfp238,Wbp2,-0.016524,5.294441e-06,MEP_0


In [299]:
# Create a dict of dataframes for each cluster
grn_dict = {}
for cluster in grn["cluster"].unique():
    curr_grn = grn[grn["cluster"] == cluster]
    curr_grn = curr_grn.rename(columns={"score": "coef_mean", "pval": "p", "tf": "source", "gene": "target"})
    curr_grn["coef_abs"] = np.abs(curr_grn["coef_mean"])
    grn_dict[cluster] = curr_grn
my_links = co.Links(name="louvain_annot", links_dict=grn_dict)

In [300]:
my_links.filter_links(p=0.001, weight="coef_abs", threshold_number=4000)

In [301]:
my_links.get_network_score()

In [302]:
my_links_scores = my_links.merged_score

# Tutorial links object

In [303]:
links = co.data.load_tutorial_links_object()

In [304]:
links.filter_links(p=0.001, weight="coef_abs", threshold_number=2000)

In [305]:
links.get_network_score()

In [306]:
links_scores = links.merged_score

# Compare node scores

In [307]:
# For each cluster, take top 10 nodes, and print out the overlap of my_links and links
metric = "degree_all"
for cluster in links_scores["cluster"].unique():
    links_top10 = links_scores[links_scores["cluster"] == cluster].sort_values(metric, ascending=False).head(10)
    my_links_top10 = my_links_scores[my_links_scores["cluster"] == cluster].sort_values(metric, ascending=False).head(10)
    overlap = len(set(links_top10.index).intersection(set(my_links_top10.index)))
    print(f"{cluster} overlap: {overlap}")

Ery_0 overlap: 0
Ery_1 overlap: 0
Ery_2 overlap: 0
Ery_3 overlap: 0
Ery_4 overlap: 0
Ery_5 overlap: 0
Ery_6 overlap: 0
Ery_7 overlap: 0
Ery_8 overlap: 0
Ery_9 overlap: 0
GMP_0 overlap: 0
GMP_1 overlap: 0
GMP_2 overlap: 0
GMPl_0 overlap: 0
GMPl_1 overlap: 0
Gran_0 overlap: 0
Gran_1 overlap: 0
Gran_2 overlap: 0
Gran_3 overlap: 0
MEP_0 overlap: 7
Mk_0 overlap: 0
Mo_0 overlap: 0
Mo_1 overlap: 0
Mo_2 overlap: 0


# Compare actual links called

In [308]:
tf = "E2f4"
cluster = "MEP_0"

In [309]:
my_links_df = my_links.filtered_links[cluster]
my_regulon = my_links_df[my_links_df["source"] == tf]
my_regulon.sort_values("coef_abs", ascending=False).iloc[:10]

Unnamed: 0,source,target,coef_mean,p,cluster,coef_abs
7580,E2f4,Gnai2,0.072412,1.940608e-13,MEP_0,0.072412
7581,E2f4,Prdx6,0.070796,2.233662e-13,MEP_0,0.070796
7582,E2f4,H2-K1,0.062146,1.030789e-10,MEP_0,0.062146
7583,E2f4,Snrpb,0.060433,4.515115e-13,MEP_0,0.060433
7584,E2f4,Eif3c,0.057441,6.731859e-13,MEP_0,0.057441
7585,E2f4,Aqp1,0.056434,1.110846e-09,MEP_0,0.056434
9288,E2f4,Txnip,-0.056241,6.175535e-10,MEP_0,0.056241
7586,E2f4,Eif3b,0.056041,1.273394e-09,MEP_0,0.056041
7587,E2f4,Mt1,0.049267,3.153427e-10,MEP_0,0.049267
7588,E2f4,Rpl23,0.048255,5.540771e-07,MEP_0,0.048255


In [310]:
my_positively_regulated_genes = my_regulon[my_regulon["coef_mean"] > 0]
my_negatively_regulated_genes = my_regulon[my_regulon["coef_mean"] < 0]
print(f"Number of positively regulated genes: {len(my_positively_regulated_genes)}")
print(f"Number of negatively regulated genes: {len(my_negatively_regulated_genes)}")

Number of positively regulated genes: 142
Number of negatively regulated genes: 30


In [311]:
links_df = links.filtered_links[cluster]
regulon = links_df[links_df["source"] == tf]
regulon.sort_values("coef_abs", ascending=False).iloc[:10]

Unnamed: 0,source,target,coef_mean,coef_abs,p,-logp
51962,E2f4,Prdx6,0.110142,0.110142,8.462215e-07,6.072516
27924,E2f4,Gnai2,0.098185,0.098185,5.801543e-15,14.236456
29799,E2f4,H2-K1,0.097043,0.097043,1.326578e-09,8.877267
62869,E2f4,Snrpb,0.091894,0.091894,3.35493e-13,12.474317
5563,E2f4,Aqp1,0.091122,0.091122,2.044744e-08,7.689361
21037,E2f4,Eif3b,0.090344,0.090344,1.045782e-09,8.980559
44058,E2f4,Ndufc2,0.084907,0.084907,4.239423e-10,9.372693
70900,E2f4,Txnip,-0.083561,0.083561,1.750257e-09,8.756898
54855,E2f4,Rac2,0.080631,0.080631,4.375286e-10,9.358994
44043,E2f4,Ndufb9,0.08033,0.08033,3.559827e-13,12.448571


In [312]:
positively_regulated_genes = regulon[regulon["coef_mean"] > 0]
negatively_regulated_genes = regulon[regulon["coef_mean"] < 0]
print(f"Number of positively regulated genes: {len(positively_regulated_genes)}")
print(f"Number of negatively regulated genes: {len(negatively_regulated_genes)}")

Number of positively regulated genes: 130
Number of negatively regulated genes: 26


In [313]:
# Check the overlap of each
positively_regulated_genes["target"].isin(my_positively_regulated_genes["target"]).sum()

120

In [314]:
negatively_regulated_genes["target"].isin(my_negatively_regulated_genes["target"]).sum()

24

# DONE!

---