In [None]:
import decoupler as dc
from pathlib import Path
import os
import nichecompass as nc
import pandas as pd
import numpy as np

# Explore different databases
From Niche compass we can query:
- omnipath lr interactions
- collectri tf networks
- mebocost es interactions
- nichenet lrt interactions

From Decoupler we can query omnipath with the following resources:

In [None]:
dc.show_resources()

## Let's start with gene programs from Niche compass

### Omnipath apy for lr interactions of mouse organism

In [None]:
lr_interactions = nc.utils.extract_gp_dict_from_omnipath_lr_interactions(
    species="mouse",
    gene_orthologs_mapping_file_path=Path(os.getcwd()).parents[0] / "data" / "raw" / "human_mouse_gene_orthologs.csv",
    plot_gp_gene_count_distributions=False,
)
print(type(lr_interactions))

In [None]:
lr_df = pd.DataFrame.from_dict(lr_interactions, orient='index')
lr_df.targets_categories.value_counts()

In [None]:
lr_df

## Extract liana consensus LR database

In [None]:
import liana as li
li.resource.show_resources()

In [None]:
lr_consensus = li.resource.select_resource("mouseconsensus")

In [None]:
lr_consensus

## Extract nichenet lrt interactions

In [None]:
lrt_interactions = nc.utils.extract_gp_dict_from_nichenet_lrt_interactions(
    species="mouse",
    gene_orthologs_mapping_file_path=Path(os.getcwd()).parents[0] / "data" / "raw" / "human_mouse_gene_orthologs.csv",
    plot_gp_gene_count_distributions=False,
)

In [None]:
lrt_df = pd.DataFrame.from_dict(lrt_interactions, orient='index')


### Reading nichenet weighted networks for mouse from resource etracted from main nichenet repo

In [None]:
gr_df = pd.read_csv(Path(os.getcwd()).parents[0] / "data" / "raw" / "gr.csv",)
lr_sig_df = pd.read_csv(Path(os.getcwd()).parents[0] / "data" / "raw" / "lr_sig.csv",)

print(gr_df.info())
print(lr_sig_df.info())

## Extract mebocost es interactions

In [None]:
es_interactions = nc.utils.extract_gp_dict_from_mebocost_es_interactions(
    species="mouse",
    plot_gp_gene_count_distributions=False,
    dir_path=str(Path(os.getcwd()).parents[0] / "data" / "raw")
)

In [None]:
es_df = pd.DataFrame.from_dict(es_interactions, orient='index').reset_index()
es_df.columns = ['es_name', 'sources', 'sources_categories', 'targets', 'targets_categories']
es_df = es_df.set_index('es_name')
es_df.info()

## Extract niche compass gene program idea behind collectri TF network

In [None]:
tf_network = nc.utils.extract_gp_dict_from_collectri_tf_network(
    species="mouse",
    plot_gp_gene_count_distributions=False,
)

In [None]:
tf_df = pd.DataFrame.from_dict(tf_network, orient='index').reset_index()
tf_df.columns = ['tf_name', 'sources', 'sources_categories', 'targets', 'targets_categories']
tf_df = tf_df.set_index('tf_name')
tf_df.info()

# Extract decoupler collectri

In [None]:
net = dc.get_collectri(
    organism = "mouse",
    split_complexes=False,
)

In [None]:
net.info()

# Example of a data that we can intersect with the features of these metaresources

In [None]:
import anndata as ad
x_hat_s = ad.read_h5ad(Path(os.getcwd()).parents[0] / "data" / "processed" / "mouse1_slice153_x_hat_s.h5ad")

In [None]:
lr_df = lr_consensus.copy()
lr_df.info()

lr_df_exploded = li.resource.explode_complexes(lr_df, SOURCE="ligand", TARGET="receptor")
lr_df_exploded.info()

In [None]:
lr_df["ligand"] = lr_df["ligand"].str.lower()
lr_df["receptor"] = lr_df["receptor"].str.lower()
print(
    len(set(x_hat_s.var_names).intersection(set(lr_df["ligand"]))), "/", len(x_hat_s.var_names), " | ", len(set(lr_df["ligand"]))
    )
print(
    len(set(x_hat_s.var_names).intersection(set(lr_df["receptor"]))), "/", len(x_hat_s.var_names), " | ", len(set(lr_df["receptor"]))
    )


In [None]:
lr_df_exploded["ligand"] = lr_df_exploded["ligand"].str.lower()
lr_df_exploded["receptor"] = lr_df_exploded["receptor"].str.lower()
print(
    len(set(x_hat_s.var_names).intersection(set(lr_df_exploded["ligand"]))), "/", len(x_hat_s.var_names), " | ", len(set(lr_df_exploded["ligand"]))
    )
print(
    len(set(x_hat_s.var_names).intersection(set(lr_df_exploded["receptor"]))), "/", len(x_hat_s.var_names), " | ", len(set(lr_df_exploded["receptor"]))
    )


In [None]:
import squidpy as sq
class_labels = x_hat_s.obs["class_label"].astype("category").cat.categories
subclass_labels = x_hat_s.obs["subclass"].astype("category").cat.categories
print(subclass_labels)

x_hat_s.obsm["spatial"] = np.array([(x,y) for x,y in zip(x_hat_s.obs["centroid_x"], x_hat_s.obs["centroid_y"])])
for label in subclass_labels:
    subsample_x_hat_s = x_hat_s[x_hat_s.obs["subclass"] == label]
    if subsample_x_hat_s.shape[0] <100:
        continue

In [None]:
import omnipath as op
resource = op.interactions.PostTranslational.get()

In [None]:
gr_df.info()
gr_df["from"] = gr_df["from"].str.lower()
gr_df["to"] = gr_df["to"].str.lower()

SUBCLASSES_TO_EXPLORE = ["Astro", "L2/3 IT"]
x_hat_s_subsampled = x_hat_s[x_hat_s.obs["subclass"].isin(SUBCLASSES_TO_EXPLORE)]
print(
    len(set(x_hat_s_subsampled.var_names).intersection(set(gr_df["from"]))), "/", len(x_hat_s_subsampled.var_names), " | ", len(set(gr_df["from"]))
    )
print(
    len(set(x_hat_s_subsampled.var_names).intersection(set(gr_df["to"]))), "/", len(x_hat_s_subsampled.var_names), " | ", len(set(gr_df["to"]))
    )

In [None]:
import scipy
def get_expressed_genes(adata, pct):
    n_cells_in_matrix = adata.shape[0]
    # Calculate proportions for all genes at once using numpy operations
    proportion = (adata.X > 0.1).sum(axis=0) / n_cells_in_matrix
    # For sparse matrix, need to convert to array
    if scipy.sparse.issparse(proportion):
        proportion = proportion.A1
    # Get indices where proportion >= pct
    genes = adata.var_names[proportion >= pct]
    return genes


In [None]:
all_receptors = set(gr_df["to"])
expressed_genes_receiver = get_expressed_genes(subsample_x_hat_s, 0.1)
expressed_receptors = all_receptors.intersection(expressed_genes_receiver)
potential_ligands = gr_df.loc[gr_df["to"].isin(expressed_receptors),"from"].unique()

In [None]:
potential_ligands = {}
for i, cell_type in enumerate(SUBCLASSES_TO_EXPLORE):
    print("Iteration {} for celltype {}".format(i, cell_type))
    subsample_x_hat_s = x_hat_s[x_hat_s.obs["subclass"] == cell_type]
    print("\tDimension of subsample {}".format(subsample_x_hat_s.shape))
    expressed_genes_receiver = get_expressed_genes(subsample_x_hat_s, 0.1)
    expressed_receptors = all_receptors.intersection(expressed_genes_receiver)
    potential_ligands[cell_type] = gr_df.loc[gr_df["to"].isin(expressed_receptors),"from"].unique()

In [None]:
print(
    len(set(potential_ligands["Astro"]).intersection(set(potential_ligands["L2/3 IT"]))),
    "/", len(potential_ligands["L2/3 IT"]), " | ", len(set(potential_ligands["Astro"]))
)