In [4]:
import os
import random
import warnings
from pathlib import Path
from datetime import datetime
from typing import Union

import gdown
import matplotlib.pyplot as plt
import pandas as pd
import scanpy as sc
import seaborn as sns
import squidpy as sq
from matplotlib import gridspec
from sklearn.preprocessing import MinMaxScaler

from nichecompass.utils import *
from nichecompass.models import NicheCompass
#from nichecompass.utils import (add_gps_from_gp_dict_to_adata,
#                                compute_communication_gp_network,
#                                visualize_communication_gp_network,
#                                create_new_color_dict,
#                                extract_gp_dict_from_mebocost_ms_interactions,
#                                extract_gp_dict_from_nichenet_lrt_interactions,
#                                extract_gp_dict_from_omnipath_lr_interactions,
#                                filter_and_combine_gp_dict_gps_v2,
#                                generate_enriched_gp_info_plots)

  from .autonotebook import tqdm as notebook_tqdm


In [16]:

def create_folders(base_path: str, structure: dict, current_path: Union[str, Path] = None) -> None:
    if current_path is None:
        current_path = Path(base_path)
    Path(current_path).mkdir(exist_ok=True)
    for key, value in structure.items():
        new_path = current_path / str(key)
        if isinstance(value, dict):
            Path(new_path).mkdir(exist_ok=True)
            create_folders(base_path, value, new_path)


In [5]:
notebook_name = "0-b2c_load_raw_data.ipynb" 
notebook_path = Path.cwd() / notebook_name
current_timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
repo_path = Path.cwd().parent
data_folder_path = repo_path / ".data"
nichecompass_path = data_folder_path / "nichecompass"
nichecompass_path.mkdir(exist_ok=True)
nichecompass_data_path = nichecompass_path / "data"
nichecompass_data_path.mkdir(exist_ok=True)


In [45]:
reduce(lambda a, b: a / b, [Path.cwd(), "test", "test2"])

PosixPath('/mnt/LaCIE/ceger/Projects/SocialNichesHD/0-Preprocessing/test/test2')

In [22]:
base_path = str(Path.cwd() / ".." / ".data" / "nichecompass")
structure = {
    "artifacts": {
        notebook_name: {
            current_timestamp: {
                "model": {},
                "figures": {},
            }
        }
    },
    "data": {
        "gene_annotations": {},
        "gene_programs": {},
        "spatial_omics": {},
    }
}

create_folders(base_path, structure)

In [21]:
# Define paths
ga_data_folder_path = nichecompass_path / "data" / "gene_annotations"
gp_data_folder_path = nichecompass_path / "data" / "gene_programs"
so_data_folder_path = nichecompass_path / "data" / "spatial_omics"

omnipath_lr_network_file_path = f"{gp_data_folder_path}/omnipath_lr_network.csv"
collectri_tf_network_file_path = f"{gp_data_folder_path}/collectri_tf_network_{species}.csv"
nichenet_lr_network_file_path = f"{gp_data_folder_path}/nichenet_lr_network_v2_{species}.csv"
nichenet_ligand_target_matrix_file_path = f"{gp_data_folder_path}/nichenet_ligand_target_matrix_v2_{species}.csv"
mebocost_enzyme_sensor_interactions_folder_path = f"{gp_data_folder_path}/metabolite_enzyme_sensor_gps"
gene_orthologs_mapping_file_path = f"{ga_data_folder_path}/human_mouse_gene_orthologs.csv"
artifacts_folder_path = f"../../../artifacts"
model_folder_path = f"{artifacts_folder_path}/single_sample/{current_timestamp}/model"
figure_folder_path = f"{artifacts_folder_path}/single_sample/{current_timestamp}/figures"

os.makedirs(model_folder_path, exist_ok=True)
os.makedirs(gp_data_folder_path, exist_ok=True)
os.makedirs(figure_folder_path, exist_ok=True)
os.makedirs(so_data_folder_path, exist_ok=True)

In [18]:
### Dataset ###
dataset = "starmap_plus_mouse_cns"
species = "mouse"
spatial_key = "spatial"
n_neighbors = 4

### Model ###
# AnnData Keys
counts_key = "counts"
adj_key = "spatial_connectivities"
gp_names_key = "nichecompass_gp_names"
active_gp_names_key = "nichecompass_active_gp_names"
gp_targets_mask_key = "nichecompass_gp_targets"
gp_targets_categories_mask_key = "nichecompass_gp_targets_categories"
gp_sources_mask_key = "nichecompass_gp_sources"
gp_sources_categories_mask_key = "nichecompass_gp_sources_categories"
latent_key = "nichecompass_latent"

# Architecture
conv_layer_encoder = "gcnconv" # change to "gatv2conv" if enough compute and memory
active_gp_thresh_ratio = 0.01

# Trainer
n_epochs = 400
n_epochs_all_gps = 25
lr = 0.001
lambda_edge_recon = 500000.
lambda_gene_expr_recon = 300.
lambda_l1_masked = 0. # prior GP  regularization
lambda_l1_addon = 30. # de novo GP regularization
edge_batch_size = 1024 # increase if more memory available or decrease to save memory
n_sampled_neighbors = 4
use_cuda_if_available = True

### Analysis ###
cell_type_key = "Main_molecular_cell_type"
latent_leiden_resolution = 0.4
latent_cluster_key = f"latent_leiden_{str(latent_leiden_resolution)}"
sample_key = "batch"
spot_size = 0.2
differential_gp_test_results_key = "nichecompass_differential_gp_test_results"

In [16]:
# Get time of notebook execution for timestamping saved artifacts
now = datetime.now()
current_timestamp = now.strftime("%d%m%Y_%H%M%S")

In [15]:
adata = sc.read_h5ad(data_folder_path / "processed" / "preprocessing" / "2024-11-22_16-58-56_VisiumHD_mouse_lung_bin2cell.h5ad")
adata

AnnData object with n_obs × n_vars = 313497 × 17743
    obs: 'bin_count', 'array_row', 'array_col'
    var: 'gene_ids', 'feature_types', 'genome', 'n_cells'
    uns: 'spatial'
    obsm: 'spatial', 'spatial_cropped'

In [None]:
(base_path, (1,2,3))

In [22]:
# Retrieve OmniPath GPs (source: ligand genes; target: receptor genes)
omnipath_gp_dict = extract_gp_dict_from_omnipath_lr_interactions(
    species=species,
    load_from_disk=False,
    save_to_disk=True,
    lr_network_file_path=omnipath_lr_network_file_path,
    gene_orthologs_mapping_file_path=gene_orthologs_mapping_file_path,
    plot_gp_gene_count_distributions=True,
    gp_gene_count_distributions_save_path=f"{figure_folder_path}" \
                                           "/omnipath_gp_gene_count_distributions.svg")

FileNotFoundError: [Errno 2] No such file or directory: '../../../data/gene_annotations/human_mouse_gene_orthologs.csv'