In [None]:
import celloracle as co

In [None]:
co.network_analysis.set_R_path("/opt/R/4.0.4/bin/R")

In [None]:
co.test_R_libraries_installation()

In [None]:
co.check_python_requirements()

# GRN Model Construction and Network Analysis

In [None]:
# 0. Import

import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns

In [None]:
import celloracle as co
co.__version__

In [None]:
# visualization settings
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

plt.rcParams['figure.figsize'] = [6, 4.5]
plt.rcParams["savefig.dpi"] = 300

In [None]:
co.test_R_libraries_installation()

In [None]:
save_folder = "/media/Scratch_SSD_Voyager/Blue/KPMP_10X/KPMP_Ref_analysis_12-2020/Celloracle-STR/Figures"
os.makedirs(save_folder, exist_ok=True)

In [None]:
#/media/Home_Raid1_Voyager/b1lake/anaconda3/envs/celloracle_env/bin/seuratToAnndata "/media/Scratch_SSD_Voyager/Blue/KPMP_10X/KPMP_Ref_analysis_12-2020/Celloracle/aSTR_Seurat.Rds" "/media/Scratch_SSD_Voyager/Blue/KPMP_10X/KPMP_Ref_analysis_12-2020/Celloracle/aSTRAnnData"

In [None]:
# Load data
adata = sc.read_h5ad("/media/Scratch_SSD_Voyager/Blue/KPMP_10X/KPMP_Ref_analysis_12-2020/Celloracle/aSTRAnnData.h5ad")

In [None]:
adata

In [None]:
print(f"Cell number is :{adata.shape[0]}")
print(f"Gene number is :{adata.shape[1]}")

In [None]:
base_GRN = pd.read_parquet("/media/Scratch_SSD_Voyager/Blue/KPMP_10X/KPMP_Ref_analysis_12-2020/Celloracle2/base_GRN_dataframe.parquet")
base_GRN.head()

In [None]:
#Initiate Oracle object
oracle = co.Oracle()

In [None]:
# Show data name in anndata
print("metadata columns :", list(adata.obs.columns))
print("dimensional reduction: ", list(adata.obsm.keys()))

In [None]:
adata.obs.columns

In [None]:
# Check current cluster name
cluster_list = adata.obs.active_ident.unique()
cluster_list

In [None]:
adata.obsm['umap']=adata.obsm['X_umap']
sc.pl.umap(adata, color="active_ident")

In [None]:
# In this notebook, we use raw mRNA count as an input of Oracle object.
adata.X = adata.layers["raw_count"].copy()

# Instantiate Oracle object.
oracle.import_anndata_as_raw_count(adata=adata,
                                   cluster_column_name="active_ident",
                                   embedding_name="X_umap")

In [None]:
# Load TF info dataframe
oracle.import_TF_data(TF_info_matrix=base_GRN)


In [None]:
#Knn imputation
# Perform PCA
oracle.perform_PCA()

# Select important PCs
plt.plot(np.cumsum(oracle.pca.explained_variance_ratio_)[:100])
n_comps = np.where(np.diff(np.diff(np.cumsum(oracle.pca.explained_variance_ratio_))>0.002))[0][0]
plt.axvline(n_comps, c="k")
print(n_comps)
n_comps = min(n_comps, 50)

In [None]:
n_cell = oracle.adata.shape[0]
print(f"cell number is :{n_cell}")

In [None]:
k = int(0.025*n_cell)
print(f"Auto-selected k is :{k}")

In [None]:
oracle.knn_imputation(n_pca_dims=n_comps, k=k, balanced=True, b_sight=k*8,
                      b_maxl=k*4, n_jobs=4)

In [None]:
# Save oracle object.
oracle.to_hdf5("/media/Scratch_SSD_Voyager/Blue/KPMP_10X/KPMP_Ref_analysis_12-2020/Celloracle-STR/aSTR.celloracle.oracle")

In [None]:
# Load file.
oracle = co.load_hdf5("/media/Scratch_SSD_Voyager/Blue/KPMP_10X/KPMP_Ref_analysis_12-2020/Celloracle-STR/aSTR.celloracle.oracle")

# GRN calculation

In [None]:
%%time
# Calculate GRN for each population in "louvain_annot" clustering unit.
# This step may take long time.(~30 minutes)
links = oracle.get_links(cluster_name_for_GRN_unit="active_ident", alpha=10,
                         verbose_level=10, test_mode=False)

In [None]:
# Show the contents of pallete
links.palette

In [None]:
#Network preprocessing
links.filter_links(p=0.001, weight="coef_abs", threshold_number=2000)

In [None]:
plt.rcParams["figure.figsize"] = [9, 4.5]

In [None]:
links.plot_degree_distributions(plot_model=True,
                                               #save=f"{save_folder}/degree_distribution/",
                                               )

In [None]:
plt.rcParams["figure.figsize"] = [6, 4.5]

In [None]:
# Calculate network scores. It takes several minutes.
links.get_score()

In [None]:
links.merged_score.head()

In [None]:
# Save as csv
links.merged_score.to_csv(f"{save_folder}/Merged_Links_Scores_for_STR.csv")

In [None]:
# Save Links object.
links.to_hdf5(file_path="/media/Scratch_SSD_Voyager/Blue/KPMP_10X/KPMP_Ref_analysis_12-2020/Celloracle-STR/STR_links.celloracle.links")

In [None]:
# You can load files with the following command.
links = co.load_hdf5(file_path="/media/Scratch_SSD_Voyager/Blue/KPMP_10X/KPMP_Ref_analysis_12-2020/Celloracle-STR/STR_links.celloracle.links")

# Network analysis; Network score for each gene

In [None]:
#visualize genes with high network centrality
# Check cluster name
links.cluster

In [None]:
# Change the order of pallete
order = ['FIB', 'aFIB', 'MYOF']
links.palette = links.palette.loc[order]
links.palette

In [None]:
links.palette.loc['FIB'] = '#562680'
links.palette.loc['aFIB'] = '#6F3980'
links.palette.loc['MYOF'] = '#AD8AE6'
links.palette

In [None]:
links.thread_number = [1]

In [None]:
# Visualize top n-th genes that have high scores.
links.plot_scores_as_rank(cluster="FIB", n_gene=30, save=f"{save_folder}/ranked_score")

In [None]:
# Visualize top n-th genes that have high scores.
links.plot_scores_as_rank(cluster="aFIB", n_gene=30, save=f"{save_folder}/ranked_score")

In [None]:
# Visualize top n-th genes that have high scores.
links.plot_scores_as_rank(cluster="MYOF", n_gene=30, save=f"{save_folder}/ranked_score")

In [None]:
plt.ticklabel_format(style='sci',axis='y',scilimits=(0,0))
links.plot_score_comparison_2D(value="degree_centrality_all",
                               cluster1="FIB", cluster2="aFIB",
                               percentile=98, save=f"{save_folder}/score_comparison")

In [None]:
plt.ticklabel_format(style='sci',axis='y',scilimits=(0,0))
links.plot_score_comparison_2D(value="eigenvector_centrality",
                               cluster1="FIB", cluster2="aFIB",
                               percentile=98, save=f"{save_folder}/score_comparison")

In [None]:
plt.ticklabel_format(style='sci',axis='y',scilimits=(0,0))
links.plot_score_comparison_2D(value="betweenness_centrality",
                               cluster1="FIB", cluster2="aFIB",
                               percentile=98, save=f"{save_folder}/score_comparison")

In [None]:
plt.ticklabel_format(style='sci',axis='y',scilimits=(0,0))
links.plot_score_comparison_2D(value="degree_centrality_all",
                               cluster1="FIB", cluster2="MYOF",
                               percentile=98, save=f"{save_folder}/score_comparison")

In [None]:
plt.ticklabel_format(style='sci',axis='y',scilimits=(0,0))
links.plot_score_comparison_2D(value="eigenvector_centrality",
                               cluster1="FIB", cluster2="MYOF",
                               percentile=98, save=f"{save_folder}/score_comparison")

In [None]:
plt.ticklabel_format(style='sci',axis='y',scilimits=(0,0))
links.plot_score_comparison_2D(value="betweenness_centrality",
                               cluster1="FIB", cluster2="MYOF",
                               percentile=98, save=f"{save_folder}/score_comparison")

In [None]:
plt.ticklabel_format(style='sci',axis='y',scilimits=(0,0))
links.plot_score_comparison_2D(value="degree_centrality_all",
                               cluster1="aFIB", cluster2="MYOF",
                               percentile=98, save=f"{save_folder}/score_comparison")

In [None]:
plt.ticklabel_format(style='sci',axis='y',scilimits=(0,0))
links.plot_score_comparison_2D(value="eigenvector_centrality",
                               cluster1="aFIB", cluster2="MYOF",
                               percentile=98, save=f"{save_folder}/score_comparison")

In [None]:
plt.ticklabel_format(style='sci',axis='y',scilimits=(0,0))
links.plot_score_comparison_2D(value="betweenness_centrality",
                               cluster1="aFIB", cluster2="MYOF",
                               percentile=98, save=f"{save_folder}/score_comparison")

In [None]:
# Visualize network score dynamics
links.plot_score_per_cluster(goi="IGF1", save=f"{save_folder}/network_score_per_gene/")

In [None]:
# Visualize network score dynamics
links.plot_score_per_cluster(goi="SMAD1", save=f"{save_folder}/network_score_per_gene/")

In [None]:
# Visualize network score dynamics
links.plot_score_per_cluster(goi="FLI1", save=f"{save_folder}/network_score_per_gene/")

In [None]:
# Visualize network score dynamics
links.plot_score_per_cluster(goi="KLF6", save=f"{save_folder}/network_score_per_gene/")

In [None]:
# Visualize network score dynamics
links.plot_score_per_cluster(goi="STAT2", save=f"{save_folder}/network_score_per_gene/")

In [None]:
# Visualize network score dynamics
links.plot_score_per_cluster(goi="PLAGL1", save=f"{save_folder}/network_score_per_gene/")

In [None]:
# Visualize network score dynamics
links.plot_score_per_cluster(goi="THRB", save=f"{save_folder}/network_score_per_gene/")

In [None]:
# Visualize network score dynamics
links.plot_score_per_cluster(goi="ETS1", save=f"{save_folder}/network_score_per_gene/")

In [None]:
# Visualize network score dynamics
links.plot_score_per_cluster(goi="ZEB1", save=f"{save_folder}/network_score_per_gene/")

In [None]:
# Visualize network score dynamics
links.plot_score_per_cluster(goi="NR2F2", save=f"{save_folder}/network_score_per_gene/")

In [None]:
# Visualize network score dynamics
links.plot_score_per_cluster(goi="MEF2A", save=f"{save_folder}/network_score_per_gene/")

In [None]:
cluster_name = "FIB"
filtered_links_df = links.filtered_links[cluster_name]
filtered_links_df.head()
# Save as csv
filtered_links_df.to_csv(f"{save_folder}/Filtered_Links_for_{cluster_name}_Module.csv")

In [None]:
cluster_name = "aFIB"
filtered_links_df = links.filtered_links[cluster_name]
filtered_links_df.head()
# Save as csv
filtered_links_df.to_csv(f"{save_folder}/Filtered_Links_for_{cluster_name}_Module.csv")

In [None]:
cluster_name = "MYOF"
filtered_links_df = links.filtered_links[cluster_name]
filtered_links_df.head()
# Save as csv
filtered_links_df.to_csv(f"{save_folder}/Filtered_Links_for_{cluster_name}_Module.csv")