# Mouse Brain Atlas

- **Creator**: Sebastian Birk (<sebastian.birk@helmholtz-munich.de>)
- **Date of Creation:** 22.01.2023
- **Date of Last Modification:** 10.01.2025 (Sebastian Birk; <sebastian.birk@helmholtz-munich.de>)

- In order to run this notebook, a trained model needs to be stored under ```../../../artifacts/{dataset}/models/{model_label}/{load_timestamp}```
    - dataset: ```mouse_brain_atlas```
    - model_label: ```reference```
    - load_timestamp: ```220824_000000_1```
- Run this notebook in the nichecompass-reproducibility environment, installable from ```('../../../envs/environment.yaml')```.

## 1. Setup

### 1.1 Import Libraries

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../../../utils")

In [3]:
import os
import warnings

import matplotlib.pyplot as plt
import numpy as np
import scanpy as sc

from nichecompass.models import NicheCompass
from nichecompass.utils import create_new_color_dict

from analysis_utils import plot_category_in_latent_and_physical_space

  File "/home/aih/sebastian.birk/miniconda3/envs/nichecompass-reproducibility/lib/python3.9/site-packages/urllib3/connectionpool.py", line 404, in _make_request
    self._validate_conn(conn)
  File "/home/aih/sebastian.birk/miniconda3/envs/nichecompass-reproducibility/lib/python3.9/site-packages/urllib3/connectionpool.py", line 1061, in _validate_conn
    conn.connect()
  File "/home/aih/sebastian.birk/miniconda3/envs/nichecompass-reproducibility/lib/python3.9/site-packages/urllib3/connection.py", line 419, in connect
    self.sock = ssl_wrap_socket(
  File "/home/aih/sebastian.birk/miniconda3/envs/nichecompass-reproducibility/lib/python3.9/site-packages/urllib3/util/ssl_.py", line 458, in ssl_wrap_socket
    ssl_sock = _ssl_wrap_socket_impl(
  File "/home/aih/sebastian.birk/miniconda3/envs/nichecompass-reproducibility/lib/python3.9/site-packages/urllib3/util/ssl_.py", line 502, in _ssl_wrap_socket_impl
    return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
  File "/

### 1.2 Define Parameters

In [4]:
dataset = "mouse_brain_atlas"

#### 1.2.1 Generic Parameters

In [5]:
## Model
# AnnData keys
gp_names_key = "nichecompass_gp_names"
active_gp_names_key = "nichecompass_active_gp_names"

#### 1.2.2 Dataset-specific Parameters

In [6]:
load_timestamp = "220824_000000_1"
model_label = "reference"
cell_type_key = "cell_type"
spot_size = 50
samples = [f"batch{i}" for i in range(1, 240)]
latent_leiden_resolution = 0.2
sample_key = "batch"
    
latent_cluster_key = f"latent_leiden_{str(latent_leiden_resolution)}"

### 1.3 Run Notebook Setup

In [7]:
sc.set_figure_params(figsize=(6, 6))

In [8]:
# Ignore future warnings and user warnings
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=UserWarning)
warnings.simplefilter(action="ignore", category=RuntimeWarning)

In [9]:
plt.rcParams['font.family'] = 'Helvetica'
plt.rcParams['font.size'] = 5

In [10]:
niche_color_map = {
    "0": "#66C5CC",
    "1": "#F6CF71",
    "2": "#F89C74",
    "3": "#DCB0F2",
    "4": "#87C55F",
    "5": "#9EB9F3",
    "6": "#FE88B1",
    "7": "#C9DB74",
    "8": "#8BE0A4",
    "9": "#B497E7",
    "10": "#D3B484",
    "11": "#B3B3B3",
    "12": "#276A8C",
    "13": "#DAB6C4",
    "14": "#9B4DCA",
    "15": "#9D88A2",
    "16": "#FF4D4D",
}

### 1.4 Configure Paths and Create Directories

In [11]:
# Define paths
figure_folder_path = f"../../../artifacts/{dataset}/figures/{model_label}/{load_timestamp}"
model_folder_path = f"../../../artifacts/{dataset}/models/{model_label}/{load_timestamp}"
result_folder_path = f"../../../artifacts/{dataset}/results/{model_label}/{load_timestamp}"

# Create required directories
os.makedirs(figure_folder_path, exist_ok=True)
os.makedirs(result_folder_path, exist_ok=True)

## 2. Model

In [None]:
# Load trained model
model = NicheCompass.load(dir_path=model_folder_path,
                          adata=None,
                          adata_file_name=f"{dataset}_{model_label}.h5ad",
                          gp_names_key=gp_names_key)

In [None]:
model.adata.uns[gp_names_key] = np.array([gp for gp in model.adata.uns[gp_names_key] if not "Add-on " in gp])
model.adata.uns[active_gp_names_key] = np.array([gp for gp in model.adata.uns[active_gp_names_key] if not "Add-on " in gp])
model.adata.uns[gp_names_key] = np.array([gp.replace(" ", "_") for gp in model.adata.uns[gp_names_key]])
model.adata.uns[active_gp_names_key] = np.array([gp.replace(" ", "_") for gp in model.adata.uns[active_gp_names_key]])

In [None]:
# Check number of active gene programs
active_gps = model.get_active_gps()
print(f"Number of total gene programs: {len(model.adata.uns[gp_names_key])}.")
print(f"Number of active gene programs: {len(active_gps)}.")

gp_summary_df = model.get_gp_summary()
gp_summary_df[gp_summary_df["gp_active"] == True][20:]

samples = model.adata.obs[sample_key].unique().tolist()
model.add_active_gp_scores_to_obs()

## 3. Analysis

### 3.1 Create Figures

In [12]:
model_folder_path

'../../../artifacts/mouse_brain_atlas/models/reference/220824_000000_1'

In [13]:
adata = sc.read_h5ad(f"{model_folder_path}/anndata_umap_with_clusters.h5ad")

OSError: Unable to synchronously open file (truncated file: eof = 19976421376, sblock->base_addr = 0, stored_eof = 21314909440)

In [None]:
# Preprocess data (niche selection and filtering)
niche_cell_counts = adata.obs["nichecompass_latent_cluster"].value_counts().to_dict()
retained_niches = [x for x, y in niche_cell_counts.items() if y > 100_000]
adata_filtered = adata[adata.obs["nichecompass_latent_cluster"].isin(retained_niches)]
print(f"retaining {len(adata_filtered)} of {len(adata)} cells following filtering")

In [None]:
len(retained_niches)

In [None]:
# Visualize subsample of embeddings
adata_filtered_subsample = sc.pp.subsample(
    adata_filtered, fraction=0.01, copy=True)

fig = sc.pl.umap(adata_filtered_subsample,
                 color="dataset",
                 title="NicheCompass GP embedding",
                 size=1, frameon=False,
                 return_fig=True)
plt.savefig("r3c12_gp_embedding.svg")

In [None]:
sc.pl.umap(adata_filtered_subsample,
           color="nichecompass_latent_cluster",
           size=1,
           palette=niche_color_map)

In [None]:
# Plot niche composition
freq_table = pd.crosstab(
    adata_filtered.obs["dataset"],
    adata_filtered.obs["nichecompass_latent_cluster"]
)
freq_table

fig, ax = plt.subplots()
ax = freq_table.transpose().plot(kind="bar", stacked=True, ylabel="Number of cells", xlabel="NicheCompass niche", ax=ax)

ax.grid(which='major', axis='y', linestyle='--')
ax.grid(False, axis='x')
ax.spines[['right', 'top']].set_visible(False)
ax.spines[['left', 'bottom']].set_linewidth(1)
ax.spines[['left', 'bottom']].set_color("black")

plt.xticks(rotation=0)

ax.get_yaxis().set_major_formatter(ticker.FuncFormatter(lambda x, p: format(int(x), ',')))

plt.savefig("r3c12_number_of_cells.svg")

In [None]:
# Plot spatial distribution
merfish_section_label = "C57BL6J-1.083"
starmap_section_label = "well11"

fig, axs = plt.subplots(1, 2)

merfish_selected_section_adata = adata_filtered[
    adata_filtered.obs["section"] == merfish_section_label]
sc.pl.spatial(merfish_selected_section_adata,
              spot_size=20,
              title="MERFISH",
              color="nichecompass_latent_cluster",
              palette=niche_color_map,
              ax=axs[0],
              return_fig=False,
              show=False,
              frameon=False)
axs[0].legend().set_visible(False)

def rotate_origin_only(xy, radians):
    """Only rotate a point around the origin (0, 0)."""
    x, y = xy
    xx = x * math.cos(radians) + y * math.sin(radians)
    yy = -x * math.sin(radians) + y * math.cos(radians)

    return [xx, yy]

starmap_selected_section_adata = adata_filtered[
    adata_filtered.obs["section"] == starmap_section_label]
spatial_coordinates = starmap_selected_section_adata.obsm["spatial"].tolist()
rotated_spatial_coordinates = [rotate_origin_only(xy, math.pi/2) for xy in spatial_coordinates]
starmap_selected_section_adata.obsm["spatial"] = np.array(rotated_spatial_coordinates)
sc.pl.spatial(starmap_selected_section_adata,
              spot_size=0.12,
              title="STARmap PLUS",
              color="nichecompass_latent_cluster",
              palette=niche_color_map,
              ax=axs[1],
              return_fig=False,
              show=False,
              frameon=False)

legend_elements = [matplotlib.patches.Patch(facecolor=y, edgecolor=y, label=x) for x, y in niche_color_map.items()]

leg = axs[1].legend(handles=legend_elements,
                    loc="right",
                    bbox_to_anchor=(1.5, 0.5),
                    frameon=False)

plt.savefig("r3c12_slide_overview.svg")

In [None]:
# Visualize niches
color_map = {"True": "blue", "False": "lightgrey"}

for selected_nichecompass_latent_cluster in retained_niches:
    
    fig, axs = plt.subplots(1, 2)
    
    # plot the merfish cluster
    merfish_selected_section_adata.obs["is_cluster"] = merfish_selected_section_adata.obs["nichecompass_latent_cluster"] == selected_nichecompass_latent_cluster
    merfish_selected_section_adata.obs["is_cluster"] = merfish_selected_section_adata.obs["is_cluster"].astype("str")
    sc.pl.spatial(merfish_selected_section_adata,
                  spot_size=20,
                  return_fig=False,
                  title="MERFISH",
                  color="is_cluster",
                  show=False,
                  ax=axs[0],
                  palette=color_map,
                  frameon=False)
    
    # plot the starmap cluster
    starmap_selected_section_adata.obs["is_cluster"] = starmap_selected_section_adata.obs["nichecompass_latent_cluster"] == selected_nichecompass_latent_cluster
    starmap_selected_section_adata.obs["is_cluster"] = starmap_selected_section_adata.obs["is_cluster"].astype("str")
    sc.pl.spatial(starmap_selected_section_adata,
                  spot_size=0.12,
                  return_fig=False,
                  title="STARmap PLUS",
                  color="is_cluster",
                  show=False,
                  ax=axs[1],
                  palette=color_map, 
                  frameon=False)
    
    axs[0].legend().set_visible(False)
    axs[1].legend().set_visible(False)
    
    fig.suptitle(f"niche {selected_nichecompass_latent_cluster}")
    plt.savefig(f"{figure_folder_path}/r3c12_slide_detail_{selected_nichecompass_latent_cluster}.svg")   

### 3.2 Save Results

In [None]:
# Log normalize counts for cellxgene server
model.adata.layers['counts'] = model.adata.X
sc.pp.normalize_total(model.adata, target_sum=1e4)
sc.pp.log1p(model.adata)

# Store gp summary in adata
gp_summary = model.get_gp_summary()
for col in gp_summary.columns:
    gp_summary[col] = gp_summary[col].astype(str)
model.adata.uns["nichecompass_gp_summary"] = gp_summary

model.adata.write(f"{result_folder_path}/{dataset}_analysis.h5ad")