In [None]:
import matplotlib.pyplot as plt
import spatialdata_plot
import seaborn as sns
from spatialdata import bounding_box_query
import squidpy as sq
import scanpy as sc
import spatialdata as sd
import anndata
import pandas as pd
import numpy as np
import scipy.sparse as sp
from tqdm import tqdm 

import pandas as pd

from sklearn.cluster import DBSCAN

In [None]:
adata = sc.read('xenium-decidua-annotated.h5ad')

In [None]:
def set_elements_to_zero_lil(sparse_array, mask):
    sparse_array = sparse_array.tolil()
    mask = mask.tolil()
 
    nz = mask.nonzero()
    sparse_array[nz] = 0
 
    return sparse_array.tocsr()

def bound_radius(adata,
                distances_key = 'spatial_distances',
                connectivities_key = 'spatial_connectivities',
                radius = 200):

    dist_mat = adata.obsp[distances_key]
    connect_mat = adata.obsp[connectivities_key]
    bool_idx = dist_mat >= radius

    connect_mat = set_elements_to_zero_lil(connect_mat, bool_idx)
    adata.obsp[connectivities_key] = connect_mat
    return adata


def compute_spatial_neighbors(adata,
                            radius = 200,
                            n_neighbors = None,
                            spatial_key = 'spatial',
                            delaunay = True,
                            fov_key = 'fov',
                            coord_type = 'generic'):

    adata.obs[fov_key] = pd.Categorical(adata.obs[fov_key])
    if n_neighbors is not None:
        sq.gr.spatial_neighbors(adata, spatial_key = spatial_key, library_key = fov_key, n_neighs = n_neighbors, coord_type = coord_type)
        adata = bound_radius(adata, distances_key = 'spatial_distances', connectivities_key = 'spatial_connectivities', radius = radius)
    elif delaunay == True:
        sq.gr.spatial_neighbors(adata, spatial_key = spatial_key, library_key = fov_key, delaunay = delaunay, coord_type = coord_type)
        adata = bound_radius(adata, distances_key = 'spatial_distances', connectivities_key = 'spatial_connectivities', radius = radius)
    else:
        sq.gr.spatial_neighbors(adata, spatial_key = spatial_key, library_key = fov_key, radius = radius, coord_type = coord_type)
    
    return adata

def compute_niche_composition(adata,
                            connectivities_key = 'spatial_connectivities',
                            labels_key = 'mask_name',
                            min_cells = 0):

    count_list = []
    for i, name in tqdm(enumerate(adata.obs_names), desc="computing niche composition"):
        row, col = adata.obsp[connectivities_key][i, :].nonzero()
        count = adata.obs[labels_key][col].value_counts()
        count_list.append(count)

    neighborhood_counts = pd.DataFrame(count_list, index=adata.obs_names)
    neighborhood_counts.fillna(0, inplace = True)
    neighborhood_freq = neighborhood_counts 

    cells_nonn = list(neighborhood_counts.index[np.where(neighborhood_counts.sum(axis = 1) < min_cells)[0]])
    adata_niche = anndata.AnnData(neighborhood_freq)
    adata_niche.obs = adata.obs.loc[neighborhood_freq.index, :]
    
    return adata_niche, cells_nonn

def compute_niches_fov(adata,
                       radius = 200,
                       fov_key = 'fov',
                       labels_key = 'cell_cluster',
                        spatial_key = 'spatial',
                        n_neighbors = None,
                        delaunay = False,
                        min_cells = 0,
                        nlargest = 3,
                        min_perc = 0.1,
                        coord_type = 'generic',
                        n_jobs = -1,
                        annotation_key = 'quiche_niche_fov',
                        **kwargs):
    """Compute niches

    Parameters
    adata: anndata.AnnData
        annotated data object where adata.X is the attribute for preprocessed data (dimensions = cells x features)
    radius: int (default = 200)
        number of pixels for bounding local niches
    n_neighbors: int (default = 10)
        number of nearest neighbors for between niche affinity kNN graph construction
    fov_key: str (default = 'fov')
        string specifying column in adata.obs corresponding to core-level information 
    labels_key: str (default = 'cell_cluster')
        string specifying column in adata.obs containing cell phenotypes 
    spatial_key: str (default = 'spatial')
        string in adata.obsm corresponding to x-y coordinates 
    delaunay: bool (default = False)
        boolean corresponding to whether delaunay triangulation should be computed
    min_cells: int (default = 5):
        integer corresponding to minimum number of cells in radii to be considered as a niche
    nlargest: int (default = 3) 
        number of cell types for niche annotation
    min_perc: float (default = 0.1)
        minimum percentage of frequency vector to be considered when labeling
    coord_type: str (default = 'generic')
        coordinate system 
    n_jobs: int (default = -1)
        number of tasks  

    ----------
    Returns
    adata: anndata.AnnData
        annotated data object where adata.X corresponds to expression
    adata_niche: anndata.AnnData
        annotated data object where adata.X corresponds to cell x cell type frequency vector and adata_niche.obs contains niche annotations
    """    
    adata = compute_spatial_neighbors(adata, radius = radius, n_neighbors = n_neighbors, spatial_key = spatial_key, delaunay = delaunay, fov_key = fov_key, coord_type = coord_type)
    adata_niche, cells_nonn = compute_niche_composition(adata, labels_key = labels_key, min_cells = min_cells)

    return adata, adata_niche

In [None]:
adata, adata_niche = compute_niches_fov(adata,
                       radius=100,
                       fov_key='core',
                        labels_key = 'celltype_lvl1',
                       spatial_key='spatial',
                       coord_type='generic', min_cells = 0)

## Figure S1F

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(15, 4))


axs[0].hist(
    adata.obs["total_counts"],
    bins=50,
    edgecolor="black"
)
axs[0].set_title("Total transcripts per cell")


axs[1].hist(
    adata.obs["cell_area"],
    bins=50,
    edgecolor="black"
)
axs[1].set_title("Area of segmented cells")

axs[2].hist(
    adata.obs["nucleus_area"] / adata.obs["cell_area"],
    bins=50,
    edgecolor="black"
)
axs[2].set_title("Nucleus ratio")

plt.tight_layout()

plt.savefig('Xenium_QC.pdf')

plt.show()

## decPAM2 range testing (3 and 7; Figure S4A)

In [None]:
# Create a boolean mask for cells with decPAM2 > 5.
mask_decPAM2 = adata_niche.to_df()['decPAM2'] > 7

# Filter the AnnData objects based on decPAM2.
adata_niche_filtered = adata_niche[mask_decPAM2]
adata_filtered = adata[mask_decPAM2]

# Find core groups with more than 10 cells using value_counts.
core_counts = adata_niche_filtered.obs['core'].value_counts()
selected_cores = core_counts[core_counts > 10].index

# Further filter data based on selected core groups.
adata_filtered_filtered = adata_filtered[adata_filtered.obs['core'].isin(selected_cores)]
adata_filtered_3 = adata[adata.obs['core'].isin(selected_cores)]

# Set the 'niche' label: True for cells present in adata_filtered_filtered.
adata_filtered_3.obs['niche'] = adata_filtered_3.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset cells in adata_filtered_3 for tissues 'DB' and 'DP'.
adata_filtered_3 = adata_filtered_3[adata_filtered_3.obs['tissue'].isin(['DB', 'DP'])]

# Convert 'core' in adata_filtered_filtered to a categorical variable.
adata_filtered_filtered.obs['core'] = adata_filtered_filtered.obs['core'].astype('category')
print(adata_filtered_filtered.obs['core'].cat.categories)

# Set niche labels in adata2 similarly.
adata.obs['niche'] = adata.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset hub cells
hub_cells = adata.obs[adata.obs['niche'] == True].copy()

# Extract coordinates for clustering
coords = hub_cells[['x_centroid', 'y_centroid']].values



 
 
db = DBSCAN(eps=40, min_samples=10)  
hub_cells['hub_cluster'] = db.fit_predict(coords)

 
 
hub_counts = hub_cells[hub_cells['hub_cluster'] != -1].groupby('core')['hub_cluster'].nunique()

print(hub_counts)

hub_cells['hub_cluster'].max()



hub_counts = hub_cells.groupby(['tissue', 'core'])['hub_cluster'].nunique().reset_index()
hub_counts = hub_counts.rename(columns={'hub_cluster': 'n_hubs'})

# Filter only for the DB and DP tissue types.
hub_counts = hub_counts[hub_counts['tissue'].isin(['DB', 'DP'])]

ct = pd.crosstab(adata.obs['core'], adata.obs['tissue'])

# Filter hub_counts: for each row, check if the count in `ct` is non-zero.
hub_counts = hub_counts[hub_counts.apply(
    lambda row: ct.loc[row['core'], row['tissue']] != 0, axis=1
)]


manual_core_totals = {'DB': 13, 'DP': 19}


freq_annotation = {}    # To hold nonzero frequency per tissue.
mean_hubs_per_tissue = {}  # To hold the mean hubs per core (including zeros for missing cores).

for tissue in ['DB', 'DP']:
    # Subset the data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Count cores detected with at least one hub.
    nonzero_count = (tissue_data['n_hubs'] > 0).sum()
    total_cores = manual_core_totals[tissue]
    freq_percentage = (nonzero_count / total_cores) * 100
    freq_annotation[tissue] = (nonzero_count, total_cores, freq_percentage)
    
    # Calculate the mean hubs per core.

    total_hubs = tissue_data['n_hubs'].sum()  # Sum hubs from cores with hubs.
    mean_hubs = total_hubs / total_cores      # Divide by manual total.
    mean_hubs_per_tissue[tissue] = mean_hubs

    print(f"Tissue {tissue}: {nonzero_count}/{total_cores} cores with hubs "
          f"({freq_percentage:.1f}%), Mean hubs/core = {mean_hubs:.2f}")


plt.figure(figsize=(3, 5))

unique_tissues = ['DP', 'DB']

# Prepare data for the boxplot (one list per tissue type).
data_boxplot = [hub_counts[hub_counts['tissue'] == tissue]['n_hubs'] for tissue in unique_tissues]

# Plot the boxplot first with a lower z-order so jittered dots will overlay it.
plt.boxplot(data_boxplot,
            positions=np.arange(len(unique_tissues)),
            widths=0.2,
            patch_artist=True,
            boxprops=dict(facecolor='lightgray', alpha=0.5, color='black'),
            medianprops=dict(color='black', linewidth=1),
            whiskerprops=dict(color='black'),
            capprops=dict(color='black'),
            flierprops=dict(marker='o', markerfacecolor='black', markersize=0),
            zorder=0)




for i, tissue in enumerate(unique_tissues):
    # Select data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Create x positions with random jitter.
    jitter = np.random.uniform(-0.2, 0.2, size=len(tissue_data))
    x_positions = np.full(len(tissue_data), i) + jitter
    
    # Plot each core's hub count as a dot.
    plt.scatter(x_positions, tissue_data['n_hubs'], alpha=0.9, s=80)
    
    # Retrieve annotation values.
    nonzero_count, total_cores, perc = freq_annotation[tissue]
    mean_val = mean_hubs_per_tissue[tissue]
    
    # Determine y position for frequency annotation.
    # Using the maximum hub count in this tissue (or 0 if none present), plus an offset.
    y_top = tissue_data['n_hubs'].max() if not tissue_data['n_hubs'].empty else 0
    freq_y_text = y_top + 0.5
    plt.text(i, freq_y_text, f"{nonzero_count}/{total_cores}\n({perc:.1f}%)",
             ha='center', va='bottom', fontsize=10)
    
    # Annotate the mean hubs per core at a lower y position.
    plt.text(i, -0.4, f"Mean: {mean_val:.2f}",
             ha='center', va='top', fontsize=9, color='green')

plt.xticks(np.arange(len(unique_tissues)), unique_tissues)
plt.xlabel('Tissue Type')
plt.ylabel('Number of Hubs per Core')
plt.title('Hubs per Core (DP and DB)')
plt.ylim(-0.5, plt.ylim()[1] + 1)
plt.savefig('./Hubspercore_r100_decPAM27.pdf')
plt.show()


In [None]:
# Create a boolean mask for cells with decPAM2 > 5.
mask_decPAM2 = adata_niche.to_df()['decPAM2'] > 3

# Filter the AnnData objects based on decPAM2.
adata_niche_filtered = adata_niche[mask_decPAM2]
adata_filtered = adata[mask_decPAM2]

# Find core groups with more than 10 cells using value_counts.
core_counts = adata_niche_filtered.obs['core'].value_counts()
selected_cores = core_counts[core_counts > 10].index

# Further filter data based on selected core groups.
adata_filtered_filtered = adata_filtered[adata_filtered.obs['core'].isin(selected_cores)]
adata_filtered_3 = adata[adata.obs['core'].isin(selected_cores)]

# Set the 'niche' label: True for cells present in adata_filtered_filtered.
adata_filtered_3.obs['niche'] = adata_filtered_3.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset cells in adata_filtered_3 for tissues 'DB' and 'DP'.
adata_filtered_3 = adata_filtered_3[adata_filtered_3.obs['tissue'].isin(['DB', 'DP'])]

# Convert 'core' in adata_filtered_filtered to a categorical variable.
adata_filtered_filtered.obs['core'] = adata_filtered_filtered.obs['core'].astype('category')
print(adata_filtered_filtered.obs['core'].cat.categories)

# Set niche labels in adata2 similarly.
adata.obs['niche'] = adata.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset hub cells
hub_cells = adata.obs[adata.obs['niche'] == True].copy()

# Extract coordinates for clustering
coords = hub_cells[['x_centroid', 'y_centroid']].values

 
 
db = DBSCAN(eps=40, min_samples=10)  
hub_cells['hub_cluster'] = db.fit_predict(coords)

 
 
hub_counts = hub_cells[hub_cells['hub_cluster'] != -1].groupby('core')['hub_cluster'].nunique()

print(hub_counts)

hub_cells['hub_cluster'].max()



hub_counts = hub_cells.groupby(['tissue', 'core'])['hub_cluster'].nunique().reset_index()
hub_counts = hub_counts.rename(columns={'hub_cluster': 'n_hubs'})

# Filter only for the DB and DP tissue types.
hub_counts = hub_counts[hub_counts['tissue'].isin(['DB', 'DP'])]

ct = pd.crosstab(adata.obs['core'], adata.obs['tissue'])

# Filter hub_counts: for each row, check if the count in `ct` is non-zero.
hub_counts = hub_counts[hub_counts.apply(
    lambda row: ct.loc[row['core'], row['tissue']] != 0, axis=1
)]


manual_core_totals = {'DB': 13, 'DP': 19}


freq_annotation = {}    # To hold nonzero frequency per tissue.
mean_hubs_per_tissue = {}  # To hold the mean hubs per core (including zeros for missing cores).

for tissue in ['DB', 'DP']:
    # Subset the data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Count cores detected with at least one hub.
    nonzero_count = (tissue_data['n_hubs'] > 0).sum()
    total_cores = manual_core_totals[tissue]
    freq_percentage = (nonzero_count / total_cores) * 100
    freq_annotation[tissue] = (nonzero_count, total_cores, freq_percentage)
    
    # Calculate the mean hubs per core.
    total_hubs = tissue_data['n_hubs'].sum()  # Sum hubs from cores with hubs.
    mean_hubs = total_hubs / total_cores      # Divide by manual total.
    mean_hubs_per_tissue[tissue] = mean_hubs

    print(f"Tissue {tissue}: {nonzero_count}/{total_cores} cores with hubs "
          f"({freq_percentage:.1f}%), Mean hubs/core = {mean_hubs:.2f}")


plt.figure(figsize=(3, 5))

unique_tissues = ['DP', 'DB']

# Prepare data for the boxplot (one list per tissue type).
data_boxplot = [hub_counts[hub_counts['tissue'] == tissue]['n_hubs'] for tissue in unique_tissues]

# Plot the boxplot first with a lower z-order so jittered dots will overlay it.
plt.boxplot(data_boxplot,
            positions=np.arange(len(unique_tissues)),
            widths=0.2,
            patch_artist=True,
            boxprops=dict(facecolor='lightgray', alpha=0.5, color='black'),
            medianprops=dict(color='black', linewidth=1),
            whiskerprops=dict(color='black'),
            capprops=dict(color='black'),
            flierprops=dict(marker='o', markerfacecolor='black', markersize=0),
            zorder=0)




for i, tissue in enumerate(unique_tissues):
    # Select data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Create x positions with random jitter.
    jitter = np.random.uniform(-0.2, 0.2, size=len(tissue_data))
    x_positions = np.full(len(tissue_data), i) + jitter
    
    # Plot each core's hub count as a dot.
    plt.scatter(x_positions, tissue_data['n_hubs'], alpha=0.9, s=80)
    
    # Retrieve annotation values.
    nonzero_count, total_cores, perc = freq_annotation[tissue]
    mean_val = mean_hubs_per_tissue[tissue]
    
    # Determine y position for frequency annotation.
    # Using the maximum hub count in this tissue (or 0 if none present), plus an offset.
    y_top = tissue_data['n_hubs'].max() if not tissue_data['n_hubs'].empty else 0
    freq_y_text = y_top + 0.5
    plt.text(i, freq_y_text, f"{nonzero_count}/{total_cores}\n({perc:.1f}%)",
             ha='center', va='bottom', fontsize=10)
    
    # Annotate the mean hubs per core at a lower y position.
    plt.text(i, -0.4, f"Mean: {mean_val:.2f}",
             ha='center', va='top', fontsize=9, color='green')

plt.xticks(np.arange(len(unique_tissues)), unique_tissues)
plt.xlabel('Tissue Type')
plt.ylabel('Number of Hubs per Core')
plt.title('Hubs per Core (DP and DB)')
plt.ylim(-0.5, plt.ylim()[1] + 1)
plt.savefig('./Hubspercore_r100_decPAM23.pdf')
plt.show()


## decPAM2 >5 (Figure 5B)

In [None]:
# Create a boolean mask for cells with decPAM2 > 5.
mask_decPAM2 = adata_niche.to_df()['decPAM2'] > 5

# Filter the AnnData objects based on decPAM2.
adata_niche_filtered = adata_niche[mask_decPAM2]
adata_filtered = adata[mask_decPAM2]

# Find core groups with more than 10 cells using value_counts.
core_counts = adata_niche_filtered.obs['core'].value_counts()
selected_cores = core_counts[core_counts > 10].index

# Further filter data based on selected core groups.
adata_filtered_filtered = adata_filtered[adata_filtered.obs['core'].isin(selected_cores)]
adata_filtered_3 = adata[adata.obs['core'].isin(selected_cores)]

# Set the 'niche' label: True for cells present in adata_filtered_filtered.
adata_filtered_3.obs['niche'] = adata_filtered_3.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset cells in adata_filtered_3 for tissues 'DB' and 'DP'.
adata_filtered_3 = adata_filtered_3[adata_filtered_3.obs['tissue'].isin(['DB', 'DP'])]

# Convert 'core' in adata_filtered_filtered to a categorical variable.
adata_filtered_filtered.obs['core'] = adata_filtered_filtered.obs['core'].astype('category')
print(adata_filtered_filtered.obs['core'].cat.categories)

# Set niche labels in adata2 similarly.
adata.obs['niche'] = adata.obs.index.isin(adata_filtered_filtered.obs.index)

In [None]:
# Subset hub cells
hub_cells = adata.obs[adata.obs['niche'] == True].copy()

# Extract coordinates for clustering
coords = hub_cells[['x_centroid', 'y_centroid']].values

In [None]:
db = DBSCAN(eps=40, min_samples=10)  
hub_cells['hub_cluster'] = db.fit_predict(coords)

 
 
hub_counts = hub_cells[hub_cells['hub_cluster'] != -1].groupby('core')['hub_cluster'].nunique()

print(hub_counts)

In [None]:
db = DBSCAN(eps=40, min_samples=10)  
hub_cells['hub_cluster'] = db.fit_predict(coords)

hub_counts = hub_cells[hub_cells['hub_cluster'] != -1].groupby('core')['hub_cluster'].nunique()

print(hub_counts)

In [None]:
hub_cells['hub_cluster'].max()

In [None]:
hub_counts = hub_cells.groupby(['tissue', 'core'])['hub_cluster'].nunique().reset_index()
hub_counts = hub_counts.rename(columns={'hub_cluster': 'n_hubs'})

# Filter only for the DB and DP tissue types.
hub_counts = hub_counts[hub_counts['tissue'].isin(['DB', 'DP'])]

ct = pd.crosstab(adata.obs['core'], adata.obs['tissue'])

# Filter hub_counts: for each row, check if the count in `ct` is non-zero.
hub_counts = hub_counts[hub_counts.apply(
    lambda row: ct.loc[row['core'], row['tissue']] != 0, axis=1
)]


manual_core_totals = {'DB': 13, 'DP': 19}


freq_annotation = {}    # To hold nonzero frequency per tissue.
mean_hubs_per_tissue = {}  # To hold the mean hubs per core (including zeros for missing cores).

for tissue in ['DB', 'DP']:
    # Subset the data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Count cores detected with at least one hub.
    nonzero_count = (tissue_data['n_hubs'] > 0).sum()
    total_cores = manual_core_totals[tissue]
    freq_percentage = (nonzero_count / total_cores) * 100
    freq_annotation[tissue] = (nonzero_count, total_cores, freq_percentage)
    
    # Calculate the mean hubs per core.

    total_hubs = tissue_data['n_hubs'].sum()  # Sum hubs from cores with hubs.
    mean_hubs = total_hubs / total_cores      # Divide by manual total.
    mean_hubs_per_tissue[tissue] = mean_hubs

    print(f"Tissue {tissue}: {nonzero_count}/{total_cores} cores with hubs "
          f"({freq_percentage:.1f}%), Mean hubs/core = {mean_hubs:.2f}")


plt.figure(figsize=(3, 5))

unique_tissues = ['DP', 'DB']

# Prepare data for the boxplot (one list per tissue type).
data_boxplot = [hub_counts[hub_counts['tissue'] == tissue]['n_hubs'] for tissue in unique_tissues]

# Plot the boxplot first with a lower z-order so jittered dots will overlay it.
plt.boxplot(data_boxplot,
            positions=np.arange(len(unique_tissues)),
            widths=0.2,
            patch_artist=True,
            boxprops=dict(facecolor='lightgray', alpha=0.5, color='black'),
            medianprops=dict(color='black', linewidth=1),
            whiskerprops=dict(color='black'),
            capprops=dict(color='black'),
            flierprops=dict(marker='o', markerfacecolor='black', markersize=0),
            zorder=0)


for i, tissue in enumerate(unique_tissues):
    # Select data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Create x positions with random jitter.
    jitter = np.random.uniform(-0.2, 0.2, size=len(tissue_data))
    x_positions = np.full(len(tissue_data), i) + jitter
    
    # Plot each core's hub count as a dot.
    plt.scatter(x_positions, tissue_data['n_hubs'], alpha=0.9, s=80)
    
    # Retrieve annotation values.
    nonzero_count, total_cores, perc = freq_annotation[tissue]
    mean_val = mean_hubs_per_tissue[tissue]
    
    # Determine y position for frequency annotation.
    # Using the maximum hub count in this tissue (or 0 if none present), plus an offset.
    y_top = tissue_data['n_hubs'].max() if not tissue_data['n_hubs'].empty else 0
    freq_y_text = y_top + 0.5
    plt.text(i, freq_y_text, f"{nonzero_count}/{total_cores}\n({perc:.1f}%)",
             ha='center', va='bottom', fontsize=10)
    
    # Annotate the mean hubs per core at a lower y position.
    plt.text(i, -0.4, f"Mean: {mean_val:.2f}",
             ha='center', va='top', fontsize=9, color='green')

plt.xticks(np.arange(len(unique_tissues)), unique_tissues)
plt.xlabel('Tissue Type')
plt.ylabel('Number of Hubs per Core')
plt.title('Hubs per Core (DP and DB)')
plt.ylim(-0.5, plt.ylim()[1] + 1)
plt.savefig('./Hubspercore.pdf')
plt.show()

In [None]:
adata.obs['hub_cluster'] = -1  # default value for cells that are not part of a hub.

adata.obs.loc[hub_cells.index, 'hub_cluster'] = hub_cells['hub_cluster']

print(adata.obs['hub_cluster'].value_counts())

In [None]:
adata.obs['hub_cluster'] = adata.obs['hub_cluster'].astype('category')

In [None]:
adata.obs['niche'] = adata.obs['niche'].astype('category')

## Figure 4A - hub demarcation

In [None]:
adata_core = adata[adata.obs['core']=='50.0']

sq.pl.spatial_scatter(
    adata_core,
    library_id="spatial",
    shape=None,
    color=['hub_cluster', 'niche'],
    wspace=0.1, 
    ncols=1, 
    figsize=(10, 5),
    size=8,
    save='./hubsincore50.pdf')

In [None]:
adata_core = adata[adata.obs['core']=='21.0']

sq.pl.spatial_scatter(
    adata_core,
    library_id="spatial",
    shape=None,
    color=['hub_cluster', 'niche'],
    wspace=0.1, 
    ncols=1, 
    figsize=(10, 5),
    size=8,
    save='./hubsincore21.pdf')

In [None]:
adata_core = adata[adata.obs['core']=='28.0']

sq.pl.spatial_scatter(
    adata_core,
    library_id="spatial",
    shape=None,
    color=['hub_cluster', 'niche'],
    wspace=0.1, 
    ncols=1, 
    figsize=(10, 5),
    size=8,
    save='./hubsincore28.pdf'
)

In [None]:
adata_core = adata[adata.obs['core']=='22.0']

sq.pl.spatial_scatter(
    adata_core,
    library_id="spatial",
    shape=None,
    color=['hub_cluster', 'niche'],
    wspace=0.1, 
    ncols=1, 
    figsize=(10, 5),
    size=8,
    save='./hubsincore22.pdf'
)

In [None]:
hub_counts[hub_counts['n_hubs']>0]['core']

In [None]:
adata_hub = adata[adata.obs['core'].isin(hub_counts[hub_counts['n_hubs']>0]['core'])]

In [None]:
cell_types = adata_hub.obs['celltype_lvl1']
areas = adata_hub.obs['niche']
cores = adata_hub.obs['core']

df = pd.DataFrame({
    'cell_type': cell_types,
    'area': areas,
    'core': cores
})

# Ensure that 'area' is boolean.
if df['area'].dtype != bool:
    df['area'] = df['area'].astype(bool)


results = []
skipped_tests = [] 

# Get the list of unique cores.
unique_cores = df['core'].unique()

# Loop through each core.
for core in unique_cores:
    # Subset the data for the current core.
    df_core = df[df['core'] == core]
    
    # Get unique cell types within this core.
    unique_cell_types = df_core['cell_type'].unique()
    
    # Loop over each cell type.
    for cell_type in unique_cell_types:
        # Calculate counts for the 2×2 contingency table.
        # a: In niche (Area A), cells of the given cell type.
        a = ((df_core['cell_type'] == cell_type) & (df_core['area'] == True)).sum()
        # b: In niche, cells that are not of that cell type.
        b = ((df_core['cell_type'] != cell_type) & (df_core['area'] == True)).sum()
        # c: In non-niche (Area B), cells of the given cell type.
        c = ((df_core['cell_type'] == cell_type) & (df_core['area'] == False)).sum()
        # d: In non-niche, cells that are not of that cell type.
        d = ((df_core['cell_type'] != cell_type) & (df_core['area'] == False)).sum()
        
        # Skip tests if conditions are not met.
        # If c is below 10, skip and record reason.
        if c + a < 10:
            skipped_tests.append({
                'core': core,
                'cell_type': cell_type,
                'a': a,
                'b': b,
                'c': c,
                'd': d,
                'skip_reason': 'c_below_10'
            })
            continue
        
        # Compute the odds ratio.
        try:
            odds_ratio = (a * d) / (b * c)
        except ZeroDivisionError:
            odds_ratio = np.nan
        
        # Compute enrichment score as log2(odds_ratio).
        enrichment_score = np.log2(odds_ratio) if odds_ratio > 0 and not np.isnan(odds_ratio) else np.nan
        
        
        # Save the result.
        results.append({
            'core': core,
            'cell_type': cell_type,
            'a (Area A, cell type)': a,
            'b (Area A, other types)': b,
            'c (Area B, cell type)': c,
            'd (Area B, other types)': d,
            'odds_ratio': odds_ratio,
            'enrichment_score': enrichment_score
        })

# Convert the enrichment analysis results into a DataFrame.
OR_results_df = pd.DataFrame(results)

OR_results_df = OR_results_df.groupby('core').apply(adjust_pvalues)

# Reset index to avoid ambiguity.
OR_results_df = OR_results_df.reset_index(drop=True)

# Sort the results by core and enrichment score.
OR_results_df = OR_results_df.sort_values(by=['core', 'enrichment_score'], ascending=[True, True])

print("Enrichment Analysis Results:")
print(OR_results_df)


# Generate a summary table of skipped tests.
if skipped_tests:
    skipped_df = pd.DataFrame(skipped_tests)
    
    # Group by cell_type and skip_reason and aggregate counts and unique cores.
    summary_skipped = skipped_df.groupby(['cell_type', 'skip_reason']).agg(
        tests_skipped=('core', 'count'),
        cores_skipped=('core', lambda x: list(np.unique(x)))
    ).reset_index()
    
    print("\nSummary of Skipped Tests:")
    print(summary_skipped)
else:
    print("\nNo tests were skipped due to zero counts or c < 10.")


## Figure 4C

In [None]:
# Count non-NaN enrichment scores per cell_type.
counts = OR_results_df.groupby("cell_type")["enrichment_score"].apply(lambda x: x.notna().sum())

# Keep only those cell types with at least 10 non-NaN values.
keep_cell_types = counts[counts >= 8].index
df_filtered = OR_results_df[OR_results_df["cell_type"].isin(keep_cell_types)]

order = df_filtered.groupby("cell_type")["enrichment_score"].median().sort_values(ascending=False).index

# Plot the boxplot.
plt.figure(figsize=(6, 6))
sns.boxplot(data=df_filtered, x='cell_type', y='enrichment_score', order=order, color="lightgray")

# Rotate x-axis labels by 90 degrees.
plt.xticks(rotation=90)

# Draw a horizontal dashed line at y = 0.
plt.axhline(0, color='black', linestyle='--', linewidth=1)

plt.xlabel("Cell Type")
plt.ylabel("Enrichment Score (log₂ odds ratio)")
plt.title("Enrichment Score Hub vs. Rest per Cell Type")
plt.tight_layout()
plt.savefig('./enrichmentscore_hubs.pdf')
plt.show()

## Figure S4A (testing varying radii)

In [None]:
adata, adata_niche = compute_niches_fov(adata,
                       radius=100,
                       fov_key='core',
                        labels_key = 'celltype_lvl1',
                       spatial_key='spatial',
                       coord_type='generic', min_cells = 0)



# Create a boolean mask for cells with decPAM2 > 5.
mask_decPAM2 = adata_niche.to_df()['decPAM2'] > 7

# Filter the AnnData objects based on decPAM2.
adata_niche_filtered = adata_niche[mask_decPAM2]
adata_filtered = adata[mask_decPAM2]

# Find core groups with more than 10 cells using value_counts.
core_counts = adata_niche_filtered.obs['core'].value_counts()
selected_cores = core_counts[core_counts > 10].index

# Further filter data based on selected core groups.
adata_filtered_filtered = adata_filtered[adata_filtered.obs['core'].isin(selected_cores)]
adata_filtered_3 = adata[adata.obs['core'].isin(selected_cores)]

# Set the 'niche' label: True for cells present in adata_filtered_filtered.
adata_filtered_3.obs['niche'] = adata_filtered_3.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset cells in adata_filtered_3 for tissues 'DB' and 'DP'.
adata_filtered_3 = adata_filtered_3[adata_filtered_3.obs['tissue'].isin(['DB', 'DP'])]

# Convert 'core' in adata_filtered_filtered to a categorical variable.
adata_filtered_filtered.obs['core'] = adata_filtered_filtered.obs['core'].astype('category')
print(adata_filtered_filtered.obs['core'].cat.categories)

# Set niche labels in adata2 similarly.
adata.obs['niche'] = adata.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset hub cells
hub_cells = adata.obs[adata.obs['niche'] == True].copy()

# Extract coordinates for clustering
coords = hub_cells[['x_centroid', 'y_centroid']].values


db = DBSCAN(eps=40, min_samples=10)  
hub_cells['hub_cluster'] = db.fit_predict(coords)

 
 
hub_counts = hub_cells[hub_cells['hub_cluster'] != -1].groupby('core')['hub_cluster'].nunique()

print(hub_counts)

hub_cells['hub_cluster'].max()



hub_counts = hub_cells.groupby(['tissue', 'core'])['hub_cluster'].nunique().reset_index()
hub_counts = hub_counts.rename(columns={'hub_cluster': 'n_hubs'})

# Filter only for the DB and DP tissue types.
hub_counts = hub_counts[hub_counts['tissue'].isin(['DB', 'DP'])]

ct = pd.crosstab(adata.obs['core'], adata.obs['tissue'])

# Filter hub_counts: for each row, check if the count in `ct` is non-zero.
hub_counts = hub_counts[hub_counts.apply(
    lambda row: ct.loc[row['core'], row['tissue']] != 0, axis=1
)]


manual_core_totals = {'DB': 13, 'DP': 19}


freq_annotation = {}    # To hold nonzero frequency per tissue.
mean_hubs_per_tissue = {}  # To hold the mean hubs per core (including zeros for missing cores).

for tissue in ['DB', 'DP']:
    # Subset the data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Count cores detected with at least one hub.
    nonzero_count = (tissue_data['n_hubs'] > 0).sum()
    total_cores = manual_core_totals[tissue]
    freq_percentage = (nonzero_count / total_cores) * 100
    freq_annotation[tissue] = (nonzero_count, total_cores, freq_percentage)
    
    # Calculate the mean hubs per core.

    total_hubs = tissue_data['n_hubs'].sum()  # Sum hubs from cores with hubs.
    mean_hubs = total_hubs / total_cores      # Divide by manual total.
    mean_hubs_per_tissue[tissue] = mean_hubs

    print(f"Tissue {tissue}: {nonzero_count}/{total_cores} cores with hubs "
          f"({freq_percentage:.1f}%), Mean hubs/core = {mean_hubs:.2f}")


plt.figure(figsize=(3, 5))

unique_tissues = ['DP', 'DB']

# Prepare data for the boxplot (one list per tissue type).
data_boxplot = [hub_counts[hub_counts['tissue'] == tissue]['n_hubs'] for tissue in unique_tissues]

# Plot the boxplot first with a lower z-order so jittered dots will overlay it.
plt.boxplot(data_boxplot,
            positions=np.arange(len(unique_tissues)),
            widths=0.2,
            patch_artist=True,
            boxprops=dict(facecolor='lightgray', alpha=0.5, color='black'),
            medianprops=dict(color='black', linewidth=1),
            whiskerprops=dict(color='black'),
            capprops=dict(color='black'),
            flierprops=dict(marker='o', markerfacecolor='black', markersize=0),
            zorder=0)




for i, tissue in enumerate(unique_tissues):
    # Select data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Create x positions with random jitter.
    jitter = np.random.uniform(-0.2, 0.2, size=len(tissue_data))
    x_positions = np.full(len(tissue_data), i) + jitter
    
    # Plot each core's hub count as a dot.
    plt.scatter(x_positions, tissue_data['n_hubs'], alpha=0.9, s=80)
    
    # Retrieve annotation values.
    nonzero_count, total_cores, perc = freq_annotation[tissue]
    mean_val = mean_hubs_per_tissue[tissue]
    
    # Determine y position for frequency annotation.
    # Using the maximum hub count in this tissue (or 0 if none present), plus an offset.
    y_top = tissue_data['n_hubs'].max() if not tissue_data['n_hubs'].empty else 0
    freq_y_text = y_top + 0.5
    plt.text(i, freq_y_text, f"{nonzero_count}/{total_cores}\n({perc:.1f}%)",
             ha='center', va='bottom', fontsize=10)
    
    # Annotate the mean hubs per core at a lower y position.
    plt.text(i, -0.4, f"Mean: {mean_val:.2f}",
             ha='center', va='top', fontsize=9, color='green')

plt.xticks(np.arange(len(unique_tissues)), unique_tissues)
plt.xlabel('Tissue Type')
plt.ylabel('Number of Hubs per Core')
plt.title('Hubs per Core (DP and DB)')
plt.ylim(-0.5, plt.ylim()[1] + 1)
plt.savefig('./Hubspercore_r150_decPAM27.pdf')
plt.show()



# Create a boolean mask for cells with decPAM2 > 5.
mask_decPAM2 = adata_niche.to_df()['decPAM2'] > 5

# Filter the AnnData objects based on decPAM2.
adata_niche_filtered = adata_niche[mask_decPAM2]
adata_filtered = adata[mask_decPAM2]

# Find core groups with more than 10 cells using value_counts.
core_counts = adata_niche_filtered.obs['core'].value_counts()
selected_cores = core_counts[core_counts > 10].index

# Further filter data based on selected core groups.
adata_filtered_filtered = adata_filtered[adata_filtered.obs['core'].isin(selected_cores)]
adata_filtered_3 = adata[adata.obs['core'].isin(selected_cores)]

# Set the 'niche' label: True for cells present in adata_filtered_filtered.
adata_filtered_3.obs['niche'] = adata_filtered_3.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset cells in adata_filtered_3 for tissues 'DB' and 'DP'.
adata_filtered_3 = adata_filtered_3[adata_filtered_3.obs['tissue'].isin(['DB', 'DP'])]

# Convert 'core' in adata_filtered_filtered to a categorical variable.
adata_filtered_filtered.obs['core'] = adata_filtered_filtered.obs['core'].astype('category')
print(adata_filtered_filtered.obs['core'].cat.categories)

# Set niche labels in adata2 similarly.
adata.obs['niche'] = adata.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset hub cells
hub_cells = adata.obs[adata.obs['niche'] == True].copy()

# Extract coordinates for clustering
coords = hub_cells[['x_centroid', 'y_centroid']].values

 
db = DBSCAN(eps=40, min_samples=10)  
hub_cells['hub_cluster'] = db.fit_predict(coords)

 
 
hub_counts = hub_cells[hub_cells['hub_cluster'] != -1].groupby('core')['hub_cluster'].nunique()

print(hub_counts)

hub_cells['hub_cluster'].max()



hub_counts = hub_cells.groupby(['tissue', 'core'])['hub_cluster'].nunique().reset_index()
hub_counts = hub_counts.rename(columns={'hub_cluster': 'n_hubs'})

# Filter only for the DB and DP tissue types.
hub_counts = hub_counts[hub_counts['tissue'].isin(['DB', 'DP'])]

ct = pd.crosstab(adata.obs['core'], adata.obs['tissue'])

# Filter hub_counts: for each row, check if the count in `ct` is non-zero.
hub_counts = hub_counts[hub_counts.apply(
    lambda row: ct.loc[row['core'], row['tissue']] != 0, axis=1
)]


manual_core_totals = {'DB': 13, 'DP': 19}


freq_annotation = {}    # To hold nonzero frequency per tissue.
mean_hubs_per_tissue = {}  # To hold the mean hubs per core (including zeros for missing cores).

for tissue in ['DB', 'DP']:
    # Subset the data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Count cores detected with at least one hub.
    nonzero_count = (tissue_data['n_hubs'] > 0).sum()
    total_cores = manual_core_totals[tissue]
    freq_percentage = (nonzero_count / total_cores) * 100
    freq_annotation[tissue] = (nonzero_count, total_cores, freq_percentage)
    
    # Calculate the mean hubs per core.

    total_hubs = tissue_data['n_hubs'].sum()  # Sum hubs from cores with hubs.
    mean_hubs = total_hubs / total_cores      # Divide by manual total.
    mean_hubs_per_tissue[tissue] = mean_hubs

    print(f"Tissue {tissue}: {nonzero_count}/{total_cores} cores with hubs "
          f"({freq_percentage:.1f}%), Mean hubs/core = {mean_hubs:.2f}")


plt.figure(figsize=(3, 5))

unique_tissues = ['DP', 'DB']

# Prepare data for the boxplot (one list per tissue type).
data_boxplot = [hub_counts[hub_counts['tissue'] == tissue]['n_hubs'] for tissue in unique_tissues]

# Plot the boxplot first with a lower z-order so jittered dots will overlay it.
plt.boxplot(data_boxplot,
            positions=np.arange(len(unique_tissues)),
            widths=0.2,
            patch_artist=True,
            boxprops=dict(facecolor='lightgray', alpha=0.5, color='black'),
            medianprops=dict(color='black', linewidth=1),
            whiskerprops=dict(color='black'),
            capprops=dict(color='black'),
            flierprops=dict(marker='o', markerfacecolor='black', markersize=0),
            zorder=0)


for i, tissue in enumerate(unique_tissues):
    # Select data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Create x positions with random jitter.
    jitter = np.random.uniform(-0.2, 0.2, size=len(tissue_data))
    x_positions = np.full(len(tissue_data), i) + jitter
    
    # Plot each core's hub count as a dot.
    plt.scatter(x_positions, tissue_data['n_hubs'], alpha=0.9, s=80)
    
    # Retrieve annotation values.
    nonzero_count, total_cores, perc = freq_annotation[tissue]
    mean_val = mean_hubs_per_tissue[tissue]
    
    # Determine y position for frequency annotation.
    # Using the maximum hub count in this tissue (or 0 if none present), plus an offset.
    y_top = tissue_data['n_hubs'].max() if not tissue_data['n_hubs'].empty else 0
    freq_y_text = y_top + 0.5
    plt.text(i, freq_y_text, f"{nonzero_count}/{total_cores}\n({perc:.1f}%)",
             ha='center', va='bottom', fontsize=10)
    
    # Annotate the mean hubs per core at a lower y position.
    plt.text(i, -0.4, f"Mean: {mean_val:.2f}",
             ha='center', va='top', fontsize=9, color='green')

plt.xticks(np.arange(len(unique_tissues)), unique_tissues)
plt.xlabel('Tissue Type')
plt.ylabel('Number of Hubs per Core')
plt.title('Hubs per Core (DP and DB)')
plt.ylim(-0.5, plt.ylim()[1] + 1)
plt.savefig('./Hubspercore_r150_decPAM25.pdf')
plt.show()


# Create a boolean mask for cells with decPAM2 > 5.
mask_decPAM2 = adata_niche.to_df()['decPAM2'] > 3

# Filter the AnnData objects based on decPAM2.
adata_niche_filtered = adata_niche[mask_decPAM2]
adata_filtered = adata[mask_decPAM2]

# Find core groups with more than 10 cells using value_counts.
core_counts = adata_niche_filtered.obs['core'].value_counts()
selected_cores = core_counts[core_counts > 10].index

# Further filter data based on selected core groups.
adata_filtered_filtered = adata_filtered[adata_filtered.obs['core'].isin(selected_cores)]
adata_filtered_3 = adata[adata.obs['core'].isin(selected_cores)]

# Set the 'niche' label: True for cells present in adata_filtered_filtered.
adata_filtered_3.obs['niche'] = adata_filtered_3.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset cells in adata_filtered_3 for tissues 'DB' and 'DP'.
adata_filtered_3 = adata_filtered_3[adata_filtered_3.obs['tissue'].isin(['DB', 'DP'])]

# Convert 'core' in adata_filtered_filtered to a categorical variable.
adata_filtered_filtered.obs['core'] = adata_filtered_filtered.obs['core'].astype('category')
print(adata_filtered_filtered.obs['core'].cat.categories)

# Set niche labels in adata2 similarly.
adata.obs['niche'] = adata.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset hub cells
hub_cells = adata.obs[adata.obs['niche'] == True].copy()

# Extract coordinates for clustering
coords = hub_cells[['x_centroid', 'y_centroid']].values


 
db = DBSCAN(eps=40, min_samples=10)  
hub_cells['hub_cluster'] = db.fit_predict(coords)

 
 
hub_counts = hub_cells[hub_cells['hub_cluster'] != -1].groupby('core')['hub_cluster'].nunique()

print(hub_counts)

hub_cells['hub_cluster'].max()



hub_counts = hub_cells.groupby(['tissue', 'core'])['hub_cluster'].nunique().reset_index()
hub_counts = hub_counts.rename(columns={'hub_cluster': 'n_hubs'})

# Filter only for the DB and DP tissue types.
hub_counts = hub_counts[hub_counts['tissue'].isin(['DB', 'DP'])]

ct = pd.crosstab(adata.obs['core'], adata.obs['tissue'])

# Filter hub_counts: for each row, check if the count in `ct` is non-zero.
hub_counts = hub_counts[hub_counts.apply(
    lambda row: ct.loc[row['core'], row['tissue']] != 0, axis=1
)]


manual_core_totals = {'DB': 13, 'DP': 19}


freq_annotation = {}    # To hold nonzero frequency per tissue.
mean_hubs_per_tissue = {}  # To hold the mean hubs per core (including zeros for missing cores).

for tissue in ['DB', 'DP']:
    # Subset the data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Count cores detected with at least one hub.
    nonzero_count = (tissue_data['n_hubs'] > 0).sum()
    total_cores = manual_core_totals[tissue]
    freq_percentage = (nonzero_count / total_cores) * 100
    freq_annotation[tissue] = (nonzero_count, total_cores, freq_percentage)
    
    # Calculate the mean hubs per core.

    total_hubs = tissue_data['n_hubs'].sum()  # Sum hubs from cores with hubs.
    mean_hubs = total_hubs / total_cores      # Divide by manual total.
    mean_hubs_per_tissue[tissue] = mean_hubs

    print(f"Tissue {tissue}: {nonzero_count}/{total_cores} cores with hubs "
          f"({freq_percentage:.1f}%), Mean hubs/core = {mean_hubs:.2f}")


plt.figure(figsize=(3, 5))

unique_tissues = ['DP', 'DB']

# Prepare data for the boxplot (one list per tissue type).
data_boxplot = [hub_counts[hub_counts['tissue'] == tissue]['n_hubs'] for tissue in unique_tissues]

# Plot the boxplot first with a lower z-order so jittered dots will overlay it.
plt.boxplot(data_boxplot,
            positions=np.arange(len(unique_tissues)),
            widths=0.2,
            patch_artist=True,
            boxprops=dict(facecolor='lightgray', alpha=0.5, color='black'),
            medianprops=dict(color='black', linewidth=1),
            whiskerprops=dict(color='black'),
            capprops=dict(color='black'),
            flierprops=dict(marker='o', markerfacecolor='black', markersize=0),
            zorder=0)




for i, tissue in enumerate(unique_tissues):
    # Select data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Create x positions with random jitter.
    jitter = np.random.uniform(-0.2, 0.2, size=len(tissue_data))
    x_positions = np.full(len(tissue_data), i) + jitter
    
    # Plot each core's hub count as a dot.
    plt.scatter(x_positions, tissue_data['n_hubs'], alpha=0.9, s=80)
    
    # Retrieve annotation values.
    nonzero_count, total_cores, perc = freq_annotation[tissue]
    mean_val = mean_hubs_per_tissue[tissue]
    
    # Determine y position for frequency annotation.
    # Using the maximum hub count in this tissue (or 0 if none present), plus an offset.
    y_top = tissue_data['n_hubs'].max() if not tissue_data['n_hubs'].empty else 0
    freq_y_text = y_top + 0.5
    plt.text(i, freq_y_text, f"{nonzero_count}/{total_cores}\n({perc:.1f}%)",
             ha='center', va='bottom', fontsize=10)
    
    # Annotate the mean hubs per core at a lower y position.
    plt.text(i, -0.4, f"Mean: {mean_val:.2f}",
             ha='center', va='top', fontsize=9, color='green')

plt.xticks(np.arange(len(unique_tissues)), unique_tissues)
plt.xlabel('Tissue Type')
plt.ylabel('Number of Hubs per Core')
plt.title('Hubs per Core (DP and DB)')
plt.ylim(-0.5, plt.ylim()[1] + 1)
plt.savefig('./Hubspercore_r150_decPAM23.pdf')
plt.show()

In [None]:
adata, adata_niche = compute_niches_fov(adata,
                       radius=75,
                       fov_key='core',
                        labels_key = 'celltype_lvl1',
                       spatial_key='spatial',
                       coord_type='generic', min_cells = 0)


# Create a boolean mask for cells with decPAM2 > 5.
mask_decPAM2 = adata_niche.to_df()['decPAM2'] > 7

# Filter the AnnData objects based on decPAM2.
adata_niche_filtered = adata_niche[mask_decPAM2]
adata_filtered = adata[mask_decPAM2]

# Find core groups with more than 10 cells using value_counts.
core_counts = adata_niche_filtered.obs['core'].value_counts()
selected_cores = core_counts[core_counts > 10].index

# Further filter data based on selected core groups.
adata_filtered_filtered = adata_filtered[adata_filtered.obs['core'].isin(selected_cores)]
adata_filtered_3 = adata[adata.obs['core'].isin(selected_cores)]

# Set the 'niche' label: True for cells present in adata_filtered_filtered.
adata_filtered_3.obs['niche'] = adata_filtered_3.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset cells in adata_filtered_3 for tissues 'DB' and 'DP'.
adata_filtered_3 = adata_filtered_3[adata_filtered_3.obs['tissue'].isin(['DB', 'DP'])]

# Convert 'core' in adata_filtered_filtered to a categorical variable.
adata_filtered_filtered.obs['core'] = adata_filtered_filtered.obs['core'].astype('category')
print(adata_filtered_filtered.obs['core'].cat.categories)

# Set niche labels in adata2 similarly.
adata.obs['niche'] = adata.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset hub cells
hub_cells = adata.obs[adata.obs['niche'] == True].copy()

# Extract coordinates for clustering
coords = hub_cells[['x_centroid', 'y_centroid']].values


 
db = DBSCAN(eps=40, min_samples=10)  
hub_cells['hub_cluster'] = db.fit_predict(coords)

 
 
hub_counts = hub_cells[hub_cells['hub_cluster'] != -1].groupby('core')['hub_cluster'].nunique()

print(hub_counts)

hub_cells['hub_cluster'].max()



hub_counts = hub_cells.groupby(['tissue', 'core'])['hub_cluster'].nunique().reset_index()
hub_counts = hub_counts.rename(columns={'hub_cluster': 'n_hubs'})

# Filter only for the DB and DP tissue types.
hub_counts = hub_counts[hub_counts['tissue'].isin(['DB', 'DP'])]

ct = pd.crosstab(adata.obs['core'], adata.obs['tissue'])

# Filter hub_counts: for each row, check if the count in `ct` is non-zero.
hub_counts = hub_counts[hub_counts.apply(
    lambda row: ct.loc[row['core'], row['tissue']] != 0, axis=1
)]


manual_core_totals = {'DB': 13, 'DP': 19}


freq_annotation = {}    # To hold nonzero frequency per tissue.
mean_hubs_per_tissue = {}  # To hold the mean hubs per core (including zeros for missing cores).

for tissue in ['DB', 'DP']:
    # Subset the data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Count cores detected with at least one hub.
    nonzero_count = (tissue_data['n_hubs'] > 0).sum()
    total_cores = manual_core_totals[tissue]
    freq_percentage = (nonzero_count / total_cores) * 100
    freq_annotation[tissue] = (nonzero_count, total_cores, freq_percentage)
    
    # Calculate the mean hubs per core.

    total_hubs = tissue_data['n_hubs'].sum()  # Sum hubs from cores with hubs.
    mean_hubs = total_hubs / total_cores      # Divide by manual total.
    mean_hubs_per_tissue[tissue] = mean_hubs

    print(f"Tissue {tissue}: {nonzero_count}/{total_cores} cores with hubs "
          f"({freq_percentage:.1f}%), Mean hubs/core = {mean_hubs:.2f}")


plt.figure(figsize=(3, 5))

unique_tissues = ['DP', 'DB']

# Prepare data for the boxplot (one list per tissue type).
data_boxplot = [hub_counts[hub_counts['tissue'] == tissue]['n_hubs'] for tissue in unique_tissues]

# Plot the boxplot first with a lower z-order so jittered dots will overlay it.
plt.boxplot(data_boxplot,
            positions=np.arange(len(unique_tissues)),
            widths=0.2,
            patch_artist=True,
            boxprops=dict(facecolor='lightgray', alpha=0.5, color='black'),
            medianprops=dict(color='black', linewidth=1),
            whiskerprops=dict(color='black'),
            capprops=dict(color='black'),
            flierprops=dict(marker='o', markerfacecolor='black', markersize=0),
            zorder=0)


for i, tissue in enumerate(unique_tissues):
    # Select data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Create x positions with random jitter.
    jitter = np.random.uniform(-0.2, 0.2, size=len(tissue_data))
    x_positions = np.full(len(tissue_data), i) + jitter
    
    # Plot each core's hub count as a dot.
    plt.scatter(x_positions, tissue_data['n_hubs'], alpha=0.9, s=80)
    
    # Retrieve annotation values.
    nonzero_count, total_cores, perc = freq_annotation[tissue]
    mean_val = mean_hubs_per_tissue[tissue]
    
    # Determine y position for frequency annotation.
    # Using the maximum hub count in this tissue (or 0 if none present), plus an offset.
    y_top = tissue_data['n_hubs'].max() if not tissue_data['n_hubs'].empty else 0
    freq_y_text = y_top + 0.5
    plt.text(i, freq_y_text, f"{nonzero_count}/{total_cores}\n({perc:.1f}%)",
             ha='center', va='bottom', fontsize=10)
    
    # Annotate the mean hubs per core at a lower y position.
    plt.text(i, -0.4, f"Mean: {mean_val:.2f}",
             ha='center', va='top', fontsize=9, color='green')

plt.xticks(np.arange(len(unique_tissues)), unique_tissues)
plt.xlabel('Tissue Type')
plt.ylabel('Number of Hubs per Core')
plt.title('Hubs per Core (DP and DB)')
plt.ylim(-0.5, plt.ylim()[1] + 1)
plt.savefig('./Hubspercore_r75_decPAM27.pdf')
plt.show()


# Create a boolean mask for cells with decPAM2 > 5.
mask_decPAM2 = adata_niche.to_df()['decPAM2'] > 5

# Filter the AnnData objects based on decPAM2.
adata_niche_filtered = adata_niche[mask_decPAM2]
adata_filtered = adata[mask_decPAM2]

# Find core groups with more than 10 cells using value_counts.
core_counts = adata_niche_filtered.obs['core'].value_counts()
selected_cores = core_counts[core_counts > 10].index

# Further filter data based on selected core groups.
adata_filtered_filtered = adata_filtered[adata_filtered.obs['core'].isin(selected_cores)]
adata_filtered_3 = adata[adata.obs['core'].isin(selected_cores)]

# Set the 'niche' label: True for cells present in adata_filtered_filtered.
adata_filtered_3.obs['niche'] = adata_filtered_3.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset cells in adata_filtered_3 for tissues 'DB' and 'DP'.
adata_filtered_3 = adata_filtered_3[adata_filtered_3.obs['tissue'].isin(['DB', 'DP'])]

# Convert 'core' in adata_filtered_filtered to a categorical variable.
adata_filtered_filtered.obs['core'] = adata_filtered_filtered.obs['core'].astype('category')
print(adata_filtered_filtered.obs['core'].cat.categories)

# Set niche labels in adata2 similarly.
adata.obs['niche'] = adata.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset hub cells
hub_cells = adata.obs[adata.obs['niche'] == True].copy()

# Extract coordinates for clustering
coords = hub_cells[['x_centroid', 'y_centroid']].values

 
db = DBSCAN(eps=40, min_samples=10)  
hub_cells['hub_cluster'] = db.fit_predict(coords)

 
 
hub_counts = hub_cells[hub_cells['hub_cluster'] != -1].groupby('core')['hub_cluster'].nunique()

print(hub_counts)

hub_cells['hub_cluster'].max()


hub_counts = hub_cells.groupby(['tissue', 'core'])['hub_cluster'].nunique().reset_index()
hub_counts = hub_counts.rename(columns={'hub_cluster': 'n_hubs'})

# Filter only for the DB and DP tissue types.
hub_counts = hub_counts[hub_counts['tissue'].isin(['DB', 'DP'])]

ct = pd.crosstab(adata.obs['core'], adata.obs['tissue'])

# Filter hub_counts: for each row, check if the count in `ct` is non-zero.
hub_counts = hub_counts[hub_counts.apply(
    lambda row: ct.loc[row['core'], row['tissue']] != 0, axis=1
)]


manual_core_totals = {'DB': 13, 'DP': 19}


freq_annotation = {}    # To hold nonzero frequency per tissue.
mean_hubs_per_tissue = {}  # To hold the mean hubs per core (including zeros for missing cores).

for tissue in ['DB', 'DP']:
    # Subset the data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Count cores detected with at least one hub.
    nonzero_count = (tissue_data['n_hubs'] > 0).sum()
    total_cores = manual_core_totals[tissue]
    freq_percentage = (nonzero_count / total_cores) * 100
    freq_annotation[tissue] = (nonzero_count, total_cores, freq_percentage)
    
    # Calculate the mean hubs per core.

    total_hubs = tissue_data['n_hubs'].sum()  # Sum hubs from cores with hubs.
    mean_hubs = total_hubs / total_cores      # Divide by manual total.
    mean_hubs_per_tissue[tissue] = mean_hubs

    print(f"Tissue {tissue}: {nonzero_count}/{total_cores} cores with hubs "
          f"({freq_percentage:.1f}%), Mean hubs/core = {mean_hubs:.2f}")


plt.figure(figsize=(3, 5))

unique_tissues = ['DP', 'DB']

# Prepare data for the boxplot (one list per tissue type).
data_boxplot = [hub_counts[hub_counts['tissue'] == tissue]['n_hubs'] for tissue in unique_tissues]

# Plot the boxplot first with a lower z-order so jittered dots will overlay it.
plt.boxplot(data_boxplot,
            positions=np.arange(len(unique_tissues)),
            widths=0.2,
            patch_artist=True,
            boxprops=dict(facecolor='lightgray', alpha=0.5, color='black'),
            medianprops=dict(color='black', linewidth=1),
            whiskerprops=dict(color='black'),
            capprops=dict(color='black'),
            flierprops=dict(marker='o', markerfacecolor='black', markersize=0),
            zorder=0)


for i, tissue in enumerate(unique_tissues):
    # Select data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Create x positions with random jitter.
    jitter = np.random.uniform(-0.2, 0.2, size=len(tissue_data))
    x_positions = np.full(len(tissue_data), i) + jitter
    
    # Plot each core's hub count as a dot.
    plt.scatter(x_positions, tissue_data['n_hubs'], alpha=0.9, s=80)
    
    # Retrieve annotation values.
    nonzero_count, total_cores, perc = freq_annotation[tissue]
    mean_val = mean_hubs_per_tissue[tissue]
    
    # Determine y position for frequency annotation.
    # Using the maximum hub count in this tissue (or 0 if none present), plus an offset.
    y_top = tissue_data['n_hubs'].max() if not tissue_data['n_hubs'].empty else 0
    freq_y_text = y_top + 0.5
    plt.text(i, freq_y_text, f"{nonzero_count}/{total_cores}\n({perc:.1f}%)",
             ha='center', va='bottom', fontsize=10)
    
    # Annotate the mean hubs per core at a lower y position.
    plt.text(i, -0.4, f"Mean: {mean_val:.2f}",
             ha='center', va='top', fontsize=9, color='green')

plt.xticks(np.arange(len(unique_tissues)), unique_tissues)
plt.xlabel('Tissue Type')
plt.ylabel('Number of Hubs per Core')
plt.title('Hubs per Core (DP and DB)')
plt.ylim(-0.5, plt.ylim()[1] + 1)
plt.savefig('./Hubspercore_r75_decPAM25.pdf')
plt.show()


# Create a boolean mask for cells with decPAM2 > 5.
mask_decPAM2 = adata_niche.to_df()['decPAM2'] > 3

# Filter the AnnData objects based on decPAM2.
adata_niche_filtered = adata_niche[mask_decPAM2]
adata_filtered = adata[mask_decPAM2]

# Find core groups with more than 10 cells using value_counts.
core_counts = adata_niche_filtered.obs['core'].value_counts()
selected_cores = core_counts[core_counts > 10].index

# Further filter data based on selected core groups.
adata_filtered_filtered = adata_filtered[adata_filtered.obs['core'].isin(selected_cores)]
adata_filtered_3 = adata[adata.obs['core'].isin(selected_cores)]

# Set the 'niche' label: True for cells present in adata_filtered_filtered.
adata_filtered_3.obs['niche'] = adata_filtered_3.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset cells in adata_filtered_3 for tissues 'DB' and 'DP'.
adata_filtered_3 = adata_filtered_3[adata_filtered_3.obs['tissue'].isin(['DB', 'DP'])]

# Convert 'core' in adata_filtered_filtered to a categorical variable.
adata_filtered_filtered.obs['core'] = adata_filtered_filtered.obs['core'].astype('category')
print(adata_filtered_filtered.obs['core'].cat.categories)

# Set niche labels in adata2 similarly.
adata.obs['niche'] = adata.obs.index.isin(adata_filtered_filtered.obs.index)

# Subset hub cells
hub_cells = adata.obs[adata.obs['niche'] == True].copy()

# Extract coordinates for clustering
coords = hub_cells[['x_centroid', 'y_centroid']].values

 
db = DBSCAN(eps=40, min_samples=10)  
hub_cells['hub_cluster'] = db.fit_predict(coords)

 
hub_counts = hub_cells[hub_cells['hub_cluster'] != -1].groupby('core')['hub_cluster'].nunique()

print(hub_counts)

hub_cells['hub_cluster'].max()


hub_counts = hub_cells.groupby(['tissue', 'core'])['hub_cluster'].nunique().reset_index()
hub_counts = hub_counts.rename(columns={'hub_cluster': 'n_hubs'})

# Filter only for the DB and DP tissue types.
hub_counts = hub_counts[hub_counts['tissue'].isin(['DB', 'DP'])]

ct = pd.crosstab(adata.obs['core'], adata.obs['tissue'])

# Filter hub_counts: for each row, check if the count in `ct` is non-zero.
hub_counts = hub_counts[hub_counts.apply(
    lambda row: ct.loc[row['core'], row['tissue']] != 0, axis=1
)]


manual_core_totals = {'DB': 13, 'DP': 19}


freq_annotation = {}    # To hold nonzero frequency per tissue.
mean_hubs_per_tissue = {}  # To hold the mean hubs per core (including zeros for missing cores).

for tissue in ['DB', 'DP']:
    # Subset the data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Count cores detected with at least one hub.
    nonzero_count = (tissue_data['n_hubs'] > 0).sum()
    total_cores = manual_core_totals[tissue]
    freq_percentage = (nonzero_count / total_cores) * 100
    freq_annotation[tissue] = (nonzero_count, total_cores, freq_percentage)
    
    # Calculate the mean hubs per core.

    total_hubs = tissue_data['n_hubs'].sum()  # Sum hubs from cores with hubs.
    mean_hubs = total_hubs / total_cores      # Divide by manual total.
    mean_hubs_per_tissue[tissue] = mean_hubs

    print(f"Tissue {tissue}: {nonzero_count}/{total_cores} cores with hubs "
          f"({freq_percentage:.1f}%), Mean hubs/core = {mean_hubs:.2f}")


plt.figure(figsize=(3, 5))

unique_tissues = ['DP', 'DB']

# Prepare data for the boxplot (one list per tissue type).
data_boxplot = [hub_counts[hub_counts['tissue'] == tissue]['n_hubs'] for tissue in unique_tissues]

# Plot the boxplot first with a lower z-order so jittered dots will overlay it.
plt.boxplot(data_boxplot,
            positions=np.arange(len(unique_tissues)),
            widths=0.2,
            patch_artist=True,
            boxprops=dict(facecolor='lightgray', alpha=0.5, color='black'),
            medianprops=dict(color='black', linewidth=1),
            whiskerprops=dict(color='black'),
            capprops=dict(color='black'),
            flierprops=dict(marker='o', markerfacecolor='black', markersize=0),
            zorder=0)



for i, tissue in enumerate(unique_tissues):
    # Select data for the current tissue.
    tissue_data = hub_counts[hub_counts['tissue'] == tissue]
    
    # Create x positions with random jitter.
    jitter = np.random.uniform(-0.2, 0.2, size=len(tissue_data))
    x_positions = np.full(len(tissue_data), i) + jitter
    
    # Plot each core's hub count as a dot.
    plt.scatter(x_positions, tissue_data['n_hubs'], alpha=0.9, s=80)
    
    # Retrieve annotation values.
    nonzero_count, total_cores, perc = freq_annotation[tissue]
    mean_val = mean_hubs_per_tissue[tissue]
    
    # Determine y position for frequency annotation.
    # Using the maximum hub count in this tissue (or 0 if none present), plus an offset.
    y_top = tissue_data['n_hubs'].max() if not tissue_data['n_hubs'].empty else 0
    freq_y_text = y_top + 0.5
    plt.text(i, freq_y_text, f"{nonzero_count}/{total_cores}\n({perc:.1f}%)",
             ha='center', va='bottom', fontsize=10)
    
    # Annotate the mean hubs per core at a lower y position.
    plt.text(i, -0.4, f"Mean: {mean_val:.2f}",
             ha='center', va='top', fontsize=9, color='green')

plt.xticks(np.arange(len(unique_tissues)), unique_tissues)
plt.xlabel('Tissue Type')
plt.ylabel('Number of Hubs per Core')
plt.title('Hubs per Core (DP and DB)')
plt.ylim(-0.5, plt.ylim()[1] + 1)
plt.savefig('./Hubspercore_r75_decPAM23.pdf')
plt.show()

## Composition plots (Figure S2F, Figure S4B, Figure S4F)

In [None]:
adata_subset = adata[adata.obs['tissue'].isin(['DB', 'DP'])]

# Create a DataFrame from adata_subset.obs
df = adata_subset.obs[['tissue', 'Selection', 'celltype_lvl1']]

# Calculate the total number of cells per selection
total_cells_per_selection = df.groupby('Selection').size().reset_index(name='total_cells')

# Calculate the number of cells per cell type per selection
cells_per_celltype_selection = df.groupby(['Selection', 'celltype_lvl1']).size().reset_index(name='cell_count')

# Merge the total cells per selection with cells per cell type per selection
merged_df = pd.merge(cells_per_celltype_selection, total_cells_per_selection, on='Selection')

# Calculate the fraction of each cell type per selection
merged_df['fraction'] = merged_df['cell_count'] / merged_df['total_cells']

# Merge with tissue information
merged_df = pd.merge(merged_df, df[['Selection', 'tissue']].drop_duplicates(), on='Selection')

tissues = merged_df['tissue'].unique()
pairs = [(tissues[i], tissues[j]) for i in range(len(tissues)) for j in range(i+1, len(tissues))]

for celltype in merged_df['celltype_lvl1'].unique():
    df_subset = merged_df[merged_df['celltype_lvl1'].isin([celltype])]
    plt.figure(figsize=(5, 5))
    ax = sns.boxplot(x='tissue', y='fraction', data=df_subset, showfliers=False, dodge=False, width = 0.5, palette=['#87001a','#005f87'])
    sns.stripplot(x='tissue', y='fraction', hue='tissue', data=df_subset, dodge=False, jitter=True, size=10
                  , palette='dark:.3', alpha=0.5, legend=None)
 
    
    plt.xticks(rotation=90)
    ax.set_ylim(0, 1)
    plt.title(f'Fraction of {celltype} per core by tissue')
    plt.xlabel('Tissue')
    plt.ylabel(f'Fraction of {celltype}')
    plt.savefig(f'./celltype_lvl1_composition_stat_mac_boxplot_{celltype}.pdf')
    plt.show()

In [None]:
adata_subset = adata[adata.obs['tissue'].isin(['DB', 'DP'])]

adata_subset = adata_subset[adata_subset.obs['celltype_lvl1'].isin(['Mono','decPAM2','decBAM1','decBAM2','pMac','decPAM1'])]

# Create a DataFrame from adata_subset.obs
df = adata_subset.obs[['tissue', 'Selection', 'celltype_lvl1']]

# Calculate the total number of cells per selection
total_cells_per_selection = df.groupby('Selection').size().reset_index(name='total_cells')

# Calculate the number of cells per cell type per selection
cells_per_celltype_selection = df.groupby(['Selection', 'celltype_lvl1']).size().reset_index(name='cell_count')

# Merge the total cells per selection with cells per cell type per selection
merged_df = pd.merge(cells_per_celltype_selection, total_cells_per_selection, on='Selection')

# Calculate the fraction of each cell type per selection
merged_df['fraction'] = merged_df['cell_count'] / merged_df['total_cells']

# Merge with tissue information
merged_df = pd.merge(merged_df, df[['Selection', 'tissue']].drop_duplicates(), on='Selection')

tissues = merged_df['tissue'].unique()
pairs = [(tissues[i], tissues[j]) for i in range(len(tissues)) for j in range(i+1, len(tissues))]

for celltype in merged_df['celltype_lvl1'].unique():
    df_subset = merged_df[merged_df['celltype_lvl1'].isin([celltype])]
    plt.figure(figsize=(5, 5))
    ax = sns.boxplot(x='tissue', y='fraction', data=df_subset, showfliers=False, dodge=False, width = 0.5, palette=['#87001a','#005f87'])
    sns.stripplot(x='tissue', y='fraction', hue='tissue', data=df_subset, dodge=False, jitter=True, size=10
                  , palette='dark:.3', alpha=0.5, legend=None)
 
    
    plt.xticks(rotation=90)
    ax.set_ylim(0, 1)
    plt.title(f'Fraction of {celltype} per core by tissue')
    plt.xlabel('Tissue')
    plt.ylabel(f'Fraction of {celltype}')
    plt.savefig(f'./celltype_lvl1_composition_stat_mac_boxplot_{celltype}.pdf')
    plt.show()

In [None]:
adata_subset = adata[adata.obs['tissue'].isin(['DB', 'DP'])]

adata_subset = adata_subset[adata_subset.obs['celltype_lvl1'].isin(['pNK','NK_CD39-','NK_CD39+'])]

# Create a DataFrame from adata_subset.obs
df = adata_subset.obs[['tissue', 'Selection', 'celltype_lvl1']]

# Calculate the total number of cells per selection
total_cells_per_selection = df.groupby('Selection').size().reset_index(name='total_cells')

# Calculate the number of cells per cell type per selection
cells_per_celltype_selection = df.groupby(['Selection', 'celltype_lvl1']).size().reset_index(name='cell_count')

# Merge the total cells per selection with cells per cell type per selection
merged_df = pd.merge(cells_per_celltype_selection, total_cells_per_selection, on='Selection')

# Calculate the fraction of each cell type per selection
merged_df['fraction'] = merged_df['cell_count'] / merged_df['total_cells']

# Merge with tissue information
merged_df = pd.merge(merged_df, df[['Selection', 'tissue']].drop_duplicates(), on='Selection')

tissues = merged_df['tissue'].unique()
pairs = [(tissues[i], tissues[j]) for i in range(len(tissues)) for j in range(i+1, len(tissues))]

for celltype in merged_df['celltype_lvl1'].unique():
    df_subset = merged_df[merged_df['celltype_lvl1'].isin([celltype])]
    plt.figure(figsize=(5, 5))
    ax = sns.boxplot(x='tissue', y='fraction', data=df_subset, showfliers=False, dodge=False, width = 0.5, palette=['#87001a','#005f87'])
    sns.stripplot(x='tissue', y='fraction', hue='tissue', data=df_subset, dodge=False, jitter=True, size=10
                  , palette='dark:.3', alpha=0.5, legend=None)

    plt.xticks(rotation=90)
    ax.set_ylim(0, 1)
    plt.title(f'Fraction of {celltype} per core by tissue')
    plt.xlabel('Tissue')
    plt.ylabel(f'Fraction of {celltype}')
    plt.savefig(f'./celltype_lvl1_composition_stat_nk_boxplot_{celltype}.pdf')
    plt.show()

In [None]:
adata_subset = adata[adata.obs['tissue'].isin(['DB', 'DP'])]
adata_subset = adata_subset[adata_subset.obs['celltype_lvl1'].isin(['B cell', 'DC1', 'DC2','Mast', 'Mono','NK_CD39+', 'NK_CD39-', 'Neutro',
                                                   'Tcell_CD4+',
       'Tcell_CD8+', 'Treg', 'decPAM2', 'decBAM1', 'decBAM2', 'pMac', 'pNK', 'decPAM1'])]

# Create a DataFrame from adata_subset.obs
df = adata_subset.obs[['tissue', 'Selection', 'celltype_lvl1']]

# Calculate the total number of cells per selection
total_cells_per_selection = df.groupby('Selection').size().reset_index(name='total_cells')

# Calculate the number of cells per cell type per selection
cells_per_celltype_selection = df.groupby(['Selection', 'celltype_lvl1']).size().reset_index(name='cell_count')

# Merge the total cells per selection with cells per cell type per selection
merged_df = pd.merge(cells_per_celltype_selection, total_cells_per_selection, on='Selection')

# Calculate the fraction of each cell type per selection
merged_df['fraction'] = merged_df['cell_count'] / merged_df['total_cells']

# Merge with tissue information
merged_df = pd.merge(merged_df, df[['Selection', 'tissue']].drop_duplicates(), on='Selection')

tissues = merged_df['tissue'].unique()
pairs = [(tissues[i], tissues[j]) for i in range(len(tissues)) for j in range(i+1, len(tissues))]

for celltype in merged_df['celltype_lvl1'].unique():
    df_subset = merged_df[merged_df['celltype_lvl1'].isin([celltype])]
    plt.figure(figsize=(5, 5))
    ax = sns.boxplot(x='tissue', y='fraction', data=df_subset, showfliers=False, dodge=False, width = 0.5, palette=['#87001a','#005f87'])
    sns.stripplot(x='tissue', y='fraction', hue='tissue', data=df_subset, dodge=False, jitter=True, size=10
                  , palette='dark:.3', alpha=0.5, legend=None)

    
    plt.xticks(rotation=90)
    plt.title(f'Fraction of {celltype} per core by tissue')
    plt.xlabel('Tissue')
    plt.ylabel(f'Fraction of {celltype}')
    plt.savefig(f'./celltype_lvl1_immu_composition_stat_boxplot_{celltype}.pdf')
    plt.show()

In [None]:
adata_subset = adata.copy()


In [None]:
adata_subset.obs['celltype_lvl0_int'] = adata_subset.obs['celltype_lvl05']

In [None]:
adata_subset.obs['celltype_lvl0_int'] = adata_subset.obs['celltype_lvl0_int'].replace(
    {'DC1': "DC", 'DC2': "DC", 'Mono': "Mac", 'Mast':'Granulo','Neutro':'Granulo'}
)

In [None]:
sc.tl.rank_genes_groups(adata_subset, 'celltype_lvl0_int', method='wilcoxon')
result = adata_subset.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

## Figure S1B

In [None]:
sc.tl.dendrogram(adata_subset, groupby='celltype_lvl0_int')
sc.pl.rank_genes_groups_matrixplot(adata_subset, n_genes=3, dendrogram=True, standard_scale="var",
                                swap_axes=True, cmap='Reds',save='Xenium_Fig1_matrixplot.pdf')

## Figure S2D

In [None]:
adata_mac = adata_subset[adata_subset.obs['celltype_lvl1'].isin(['decBAM1','decBAM2','decPAM1','decPAM2','pMac','Mono'])]

In [None]:
sc.pl.matrixplot(adata_mac, ['C1QA','MS4A6A','MS4A4A',
                             'FOLR2','MRC1','TREM2','SPP1','CTSD','CD28',
                             'CXCR4','LGALS3','MARCO','CXCL2',
                             'SEMA3C','AQP9',
                            'CXCL10','CXCL9','ISG15','SLAMF7','IDO1','IL4I1',
                            'MKI67','CDK1','TOP2A','CENPF','UBE2C','S100A12','FCN1','CD300E','MNDA','CCR2',
                             'VCAN','AQP9','C1orf162'], groupby='celltype_lvl1', categories_order=['decPAM1','decBAM1',
                                                                                     'decBAM2','decPAM2','pMac','Mono'] ,standard_scale='var',cmap='Reds',
                save='Xenium_mac_selectedmarkers_matrixplot.pdf')

In [None]:
adata.var.to_csv('Xenium_panel.csv')

In [None]:
adata_mac = adata_subset[adata_subset.obs['celltype_lvl1'].isin(['decBAM1','decBAM2','decPAM1','decPAM2','pMac','Mono'])]

In [None]:
adata_subset.obs['celltype_lvl05'].cat.categories

## Figure S4G

In [None]:
adata_mac = adata_subset[adata_subset.obs['celltype_lvl05'].isin(['DC1','DC2','Mac','Mono'])]

In [None]:
sc.tl.rank_genes_groups(adata_mac, 'celltype_lvl05', method='wilcoxon')
result = adata_mac.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
sc.pl.rank_genes_groups_matrixplot(adata_mac, n_genes=5, dendrogram=True, standard_scale="var",
                                swap_axes=False, cmap='Reds',save='Xenium_DC_matrixplot.pdf')

In [None]:
adata_subset.obs['celltype_lvl1'].cat.categories

## Figure S4D

In [None]:
adata_lymph = adata_subset[adata_subset.obs['celltype_lvl1'].isin(['NK_CD39+', 'NK_CD39-','Tcell_CD4+',
       'Tcell_CD8+', 'Treg','pNK'])]

In [None]:
sc.tl.rank_genes_groups(adata_lymph, 'celltype_lvl1', method='wilcoxon')
result = adata_lymph.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
sc.pl.rank_genes_groups_matrixplot(adata_lymph, n_genes=5, dendrogram=True, standard_scale="var",
                                swap_axes=False, cmap='Reds',save='Xenium_NK_matrixplot.pdf')

In [None]:
adata.obs

In [None]:
new_cluster_names = {
    "B cell": "B cell", "DC1": "DC", "DC2": "DC",
    "Endo": "Endo", "EPI": "Epi",'Fib':'Fib',
    "Lymph": "Lymph",'Mast':'Granulo','Mono':'Mac','Mural':'Mural','NK_CD39+':'NK',
    'NK_CD39-':'NK','Neutro':'Granulo','Other':'Other',
    'pNK':'NK','Tcell_CD4+':'T cell','Tcell_CD8+':'T cell','Treg':'T cell'
    ,'decPAM2':'Mac','decBAM1':'Mac','decBAM2':'Mac','eEVT':'Tropho',
    'iEVT':'Tropho','pMac':'Mac','decPAM1':'Mac'
    
}

adata.obs['celltype_lvl025'] = adata.obs.celltype_lvl1.astype("str").values
adata.obs.celltype_lvl025 = adata.obs.celltype_lvl025.replace(new_cluster_names)
adata.obs.celltype_lvl025 = adata.obs.celltype_lvl025.astype("category")

adata.obs['celltype_lvl025'] = adata.obs['celltype_lvl025'].cat.remove_unused_categories()

In [None]:
sq.pl.spatial_scatter(
    adata,
    library_id="spatial",
    shape=None,
    color=['Selection'],
    wspace=0.1, 
    ncols=1, 
    figsize=(20, 10),
    size=8
)

In [None]:
adata_subset2 = adata[adata.obs['tissue'].isin(['DB'])]

In [None]:
sq.gr.nhood_enrichment(adata_subset2, cluster_key="celltype_lvl1", library_key='core')

In [None]:
z_scores = adata_subset2.uns["celltype_lvl1_nhood_enrichment"]['zscore']


cluster_labels = adata_subset.obs['celltype_lvl1'].cat.categories

# Store z-scores in the dictionary
z_scores_df2 = pd.DataFrame(z_scores, index=cluster_labels, columns=cluster_labels)

## Figure 3G

In [None]:
decPAM2_sorted = z_scores_df2.loc['iEVT'].sort_values(ascending=False)
decPAM2_sorted = decPAM2_sorted.loc[['decBAM1','decPAM2','decBAM2','decPAM1']]

# Set bar colors: red for values > 0, blue for values <= 0
colors = ['red' if val > 0 else 'blue' for val in decPAM2_sorted]

# Plot the sorted values as a bar graph
plt.figure(figsize=(6, 6))  # Adjust figure size as needed
decPAM2_sorted.plot(kind='bar', color=colors, edgecolor='black', width=1)  # width=1 makes bars touch

# Customize the plot
plt.title('Sorted Z-Scores for iEVT', fontsize=14)
plt.xlabel('Cell Types', fontsize=12)
plt.ylabel('Z-Score', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()

plt.savefig('./SFig3G-neighborhood-iEVT-to-Mac.pdf')

# Show the plot
plt.show()

# sdata plotting

In [None]:
sdata = sd.read_zarr('./xenium_sd.zarr')

In [None]:
new_cluster_names = {
    "B cell": "B cell", "DC1": "DC", "DC2": "DC",
    "Endo": "Endo", "EPI": "Epi",'Fib':'Fib',
    "Lymph": "Lymph",'Mast':'Granulo','Mono':'Mac','Mural':'Mural','NK_CD39+':'NK',
    'NK_CD39-':'NK','Neutro':'Granulo','Other':'Other',
    'pNK':'NK','Tcell_CD4+':'T cell','Tcell_CD8+':'T cell','Treg':'T cell'
    ,'decPAM2':'Mac','decBAM1':'Mac','decBAM2':'Mac','eEVT':'Tropho',
    'iEVT':'Tropho','pMac':'Mac','decPAM1':'Mac'
    
}

adata.obs['celltype_lvl025'] = adata.obs.celltype_lvl1.astype("str").values
adata.obs.celltype_lvl025 = adata.obs.celltype_lvl025.replace(new_cluster_names)
adata.obs.celltype_lvl025 = adata.obs.celltype_lvl025.astype("category")

adata.obs['celltype_lvl025'] = adata.obs['celltype_lvl025'].cat.remove_unused_categories()

In [None]:
adata_annotations = adata.obs[['cell_id', 'celltype_lvl025']]

# Set the index to cell_id for easy merging
adata_annotations.set_index('cell_id', inplace=True)

# Merge the annotations into the sdata.obs DataFrame based on cell_id
sdata.table.obs = sdata.table.obs.join(adata_annotations, on='cell_id', rsuffix='_from_adata')

In [None]:
adata

In [None]:
adata_annotations = adata.obs[['cell_id', 'niche']]

# Set the index to cell_id for easy merging
adata_annotations.set_index('cell_id', inplace=True)

# Merge the annotations into the sdata.obs DataFrame based on cell_id
sdata.table.obs = sdata.table.obs.join(adata_annotations, on='cell_id', rsuffix='_from_adata')

In [None]:
sdata.tables["table"].obs["region"] = "cell_boundaries"
sdata.set_table_annotates_spatialelement("table", region="cell_boundaries")

In [None]:
#remove genes from sdata IGFBP1, DCN, PRL
sdata.table.var_names

sdata.tables['table'] = sdata.tables['table'][:, ~sdata.tables['table'].var_names.isin(['IGFBP1', 'PRL', 'DCN'])].copy()

In [None]:
# Ensure that adata.X is in a dense format
adata_X_dense = adata.X.toarray() if hasattr(adata.X, "toarray") else adata.X

# Create a DataFrame from adata.X, using cell_ids as index and gene names as columns
adata_X_df = pd.DataFrame(adata_X_dense, index=adata.obs['cell_id'], columns=adata.var_names)

# Store the original index of sdata.table.obs
original_index = sdata.table.obs.index

# Keep cell_id as a column in sdata.table.obs and ensure it matches with the index
if 'cell_id' not in sdata.table.obs.columns:
    sdata.table.obs['cell_id'] = sdata.table.obs.index

# Reindex adata_X_df to match the cells in sdata.table.obs based on cell_id, adding NaN for missing cells
adata_X_df = adata_X_df.reindex(sdata.table.obs['cell_id'])

# Convert the reindexed DataFrame to a sparse matrix in CSR format
adata_X_sparse = sp.csr_matrix(adata_X_df.fillna(0).values, dtype=np.float32)

# Assign the sparse matrix to sdata.table.X
sdata.table.X = adata_X_sparse

# Restore the original index in sdata.table.obs
sdata.table.obs.index = original_index

In [None]:
adata.obs['celltype_lvl1']

adata_annotations = adata.obs[['cell_id', 'celltype_lvl1']]

# Set the index to cell_id for easy merging
adata_annotations.set_index('cell_id', inplace=True)

# Merge the annotations into the sdata.obs DataFrame based on cell_id
sdata.table.obs = sdata.table.obs.join(adata_annotations, on='cell_id', rsuffix='_from_adata')

In [None]:
groups_to_show = sdata.table.obs['celltype_lvl1'].cat.categories.tolist()

In [None]:
groups_to_show

In [None]:
decB_Fig1 = lambda x: bounding_box_query(
    x,
    min_coordinate=[40300, 77300],
    max_coordinate=[50200, 87500],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
plt.figure(figsize=(5, 5))

ax = plt.axes()
ax.set_facecolor("black")

decB_Fig1(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl1',
    #palette=palette,  # Custom color palette
    groups=groups_to_show  # List of specific celltypes to show
).pl.show(ax=ax, title="Morphology image", )

#plt.savefig('xenium_morphology_alpha0_core48_annotations.jpg', dpi=900, format='jpg')

## Figure 2E - decB

In [None]:
groups_to_show = sdata.table.obs['celltype_lvl025'].cat.categories.tolist()

In [None]:
cluster_order = ["Tropho", "Endo", "Lymph", "Epi", "Fib", "Mural", "Mac", "NK", "T cell", "Granulo", "DC", "Other"]

# Convert 'celltype_lvl025' to a categorical variable with the specified order
sdata.tables['table'].obs['celltype_lvl025'] = pd.Categorical(
    sdata.tables['table'].obs['celltype_lvl025'],
    categories=cluster_order,
    ordered=True
)

# Sort the observations based on 'celltype_lvl025'
sorted_index = sdata.tables['table'].obs.sort_values('celltype_lvl025').index

# Reorder the table without reassigning sdata.table
sdata.tables['table'] = sdata.tables['table'][sorted_index]

In [None]:
palette = [
    "#8A8DBE",
    "#FFCCFA", 
    #"#F4E1E0",
    '#DB231A', #red
    '#A32392',
    '#335AF2',
    '#85BDC1',
    '#9FC13E',
    '#EF9203',#MAC
    '#19E6FF',
    #'#06C945',
    '#7A7469',
    '#A36A12',
    '#8E033D',
    '#F5BB00' #yellow
    #,'#D2D2D2'
]

In [None]:
decB_Fig1 = lambda x: bounding_box_query(
    x,
    min_coordinate=[40300, 77300],
    max_coordinate=[50200, 87500],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
plt.figure(figsize=(10, 10))

ax = plt.axes()
ax.set_facecolor("black")

decB_Fig1(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl025',
    palette=palette,  # Custom color palette
    groups=groups_to_show  # List of specific celltypes to show
).pl.show(ax=ax, title="Morphology image", )

plt.savefig('xenium_morphology_alpha0_core48_annotations.jpg', dpi=900, format='jpg')

## Figure S2C

In [None]:
#trophoblast plug
decB_Fig1_sub = lambda x: bounding_box_query(
    x,
    min_coordinate=[41000, 79500],
    max_coordinate=[43000, 81500],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
plt.figure(figsize=(10, 10))

ax = plt.axes()
ax.set_facecolor("black")

decB_Fig1_sub(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl025',
    palette=palette,  # Custom color palette
    groups=groups_to_show  # List of specific celltypes to show
).pl.show(ax=ax, title="Morphology image", )

plt.savefig('xenium_morphology_alpha0_core48_annotations_sub_plugs.jpg', dpi=900, format='jpg')

## Figure 2E - decB zoom-in

In [None]:
decB_Fig1_sub = lambda x: bounding_box_query(
    x,
    min_coordinate=[45000, 80000],
    max_coordinate=[47000, 82000],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
plt.figure(figsize=(10, 10))

ax = plt.axes()
ax.set_facecolor("black")

decB_Fig1_sub(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl025',
    palette=palette,  # Custom color palette
    groups=groups_to_show  # List of specific celltypes to show
).pl.show(ax=ax, title="Morphology image", )

plt.savefig('xenium_morphology_alpha0_core48_annotations_sub.jpg', dpi=900, format='jpg')

## Figure 3F - decP - overview

In [None]:
decB_Fig2 = lambda x: bounding_box_query(
    x,
    min_coordinate=[13000, 4000],
    max_coordinate=[23000, 14000],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
plt.figure(figsize=(10, 10))
# Plot your data and capture the figure

ax = plt.axes()
ax.set_facecolor("black")

decB_Fig2(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl1',
    palette=['#F5BB00','#519e3e','#ef8636','#335AF2','#5B5B5B'],  # Custom color palette
    groups=['iEVT','decBAM1','decBAM2','decPAM1','Fib']  # List of specific celltypes to show
    ,na_color='lightgrey'
).pl.show(ax=ax, title="Annotation")

plt.savefig('./xenium_morphology_alpha0_core11_annotations.jpg', dpi=900, format='jpg')

## Figure 3F - decB

In [None]:
decB_Fig2_sub1 = lambda x: bounding_box_query(
    x,
    min_coordinate=[15500, 9000],
    max_coordinate=[17500, 11000],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
plt.figure(figsize=(10, 10))
# Plot your data and capture the figure

ax = plt.axes()
ax.set_facecolor("black")

decB_Fig2_sub1(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl1',
    palette=['#F5BB00','#519e3e','#ef8636','#335AF2','#5B5B5B'],  # Custom color palette
    groups=['iEVT','decBAM1','decBAM2','decPAM1','Fib']  # List of specific celltypes to show
    ,na_color='lightgrey'
).pl.show(ax=ax, title="Annotation")

plt.savefig('./xenium_morphology_alpha0_core11_annotations_sub1.jpg', dpi=900, format='jpg')

## Figure 3F - decP - overview

In [None]:
decP_Fig2 = lambda x: bounding_box_query(
    x,
    min_coordinate=[41500, 99000],
    max_coordinate=[51500, 109000],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
#fig, ax = plt.subplots(figsize=(10, 10))
ax = plt.axes()
ax.set_facecolor("black")

decP_Fig2(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl1',
    palette=['#F5BB00','#519e3e','#ef8636','#335AF2','#5B5B5B'],  # Custom color palette
    groups=['iEVT','decBAM1','decBAM2','decPAM1','Fib']  # List of specific celltypes to show
    ,na_color='lightgrey'
).pl.show(ax=ax, title="Morphology image", )

plt.savefig('./xenium_morphology_alpha0_core50_annotations.jpg', dpi=900, format='jpg')

## Figure 3F - decP

In [None]:
decP_Fig2_sub = lambda x: bounding_box_query(
    x,
    min_coordinate=[43500, 104000],
    max_coordinate=[45500, 106000],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
plt.figure(figsize=(10, 10))

ax = plt.axes()
ax.set_facecolor("black")

decP_Fig2_sub(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl1',
    palette=['#F5BB00','#519e3e','#ef8636','#335AF2','#5B5B5B'],  # Custom color palette
    groups=['iEVT','decBAM1','decBAM2','decPAM1','Fib']  # List of specific celltypes to show
    ,na_color='lightgrey'
).pl.show(ax=ax, title="Morphology image", )

plt.savefig('./xenium_morphology_alpha0_core50_annotations_sub.jpg', dpi=900, format='jpg')

## Figure 2E - decP

In [None]:
decP_Fig1 = lambda x: bounding_box_query(
    x,
    min_coordinate=[11000, 90200],
    max_coordinate=[21000, 100000],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
plt.figure(figsize=(10, 10))

ax = plt.axes()
ax.set_facecolor("black")

decP_Fig1(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl025',
    palette=palette,  # Custom color palette
    groups=groups_to_show  # List of specific celltypes to show
).pl.show(ax=ax, title="Morphology image", )

plt.savefig('xenium_morphology_alpha0_core19_annotations.jpg', dpi=900, format='jpg')

## Figure 2E - decP zoom-in

In [None]:
decP_Fig1_sub = lambda x: bounding_box_query(
    x,
    min_coordinate=[15000, 93000],
    max_coordinate=[17000, 95000],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
plt.figure(figsize=(10, 10))

ax = plt.axes()
ax.set_facecolor("black")

decP_Fig1_sub(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl025',
    palette=palette,  # Custom color palette
    groups=groups_to_show  # List of specific celltypes to show
).pl.show(ax=ax, title="Morphology image", )

plt.savefig('xenium_morphology_alpha0_core19_annotations_sub.jpg', dpi=900, format='jpg')

In [None]:
palette = [
    "#8A8DBE",
    "#FFCCFA", 
    #"#F4E1E0",
    '#FFDF22',
    '#A32392',
    '#335AF2',
    '#85BDC1',
    '#9FC13E',
    '#EF9203',#MAC
    '#19E6FF',
    #'#06C945',
    '#7A7469',
    '#A36A12',
    '#8E033D',
    '#DB231A'
    #,'#D2D2D2'
]

In [None]:
sdata.tables['table'].obs['celltype_lvl1_aggr'] = sdata.tables['table'].obs['celltype_lvl1']

In [None]:
# Replace specified values with NaN
sdata.tables['table'].obs['celltype_lvl1_aggr'] = sdata.tables['table'].obs['celltype_lvl1_aggr'].replace(
    {'B cell': "NA", 'EPI': "NA", 'Endo': "NA", 'Fib': "NA", 
     'Lymph': "NA", 'Mast': "NA", 'Mono': "NA", 'Mural': "NA", 
     'NK_CD39+': "NA", 'Neutro': "NA", 'Other': "NA", 'Treg': "NA", 
     'decBAM1': "NA", 'decBAM2': "NA", 'pMac': "NA", 'decPAM1': "NA", 'pNK': "NA"}
)

In [None]:
sdata.tables['table'].obs['celltype_lvl1_aggr'].cat.categories

## Figure 4A - core 28

In [None]:
decP_aggr = lambda x: bounding_box_query(
    x,
    min_coordinate=[23500, 83250],
    max_coordinate=[25250, 85000],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
plt.figure(figsize=(10, 10))
# Plot your data and capture the figure

ax = plt.axes()
ax.set_facecolor("black")

decP_aggr(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl1_aggr',
    palette=['#DB231A','#519e3e','#ef8636','#335AF2','#C200FB','#5B5B5B'],  # Custom color palette
    groups=['DC1','NK_CD39-','Tcell_CD4+','Tcell_CD8+','decPAM2','NA'], na_color='lightgrey'
).pl.show(ax = ax, title="Annotation")

# Save the figure if it's returned
plt.savefig(
    './xenium_morphology_alpha0_core28_annotations_aggregate1.jpg',
    dpi=900,
    format='jpg')

In [None]:
gene_name = 'CXCL10'

# Set the figure size before plotting
plt.figure(figsize=(10, 10))

ax = plt.axes()
ax.set_facecolor("black")

# Plot your data
decP_aggr(sdata).pl.render_shapes(
    "cell_boundaries",
    color=gene_name,
    cmap='Reds'
).pl.show(ax=ax, title=f"{gene_name} expression")

plt.savefig('./xenium_morphology_alpha0_core28_CXCL10_aggregate.jpg', dpi=900, format='jpg')


## Figure 4A - core 22

In [None]:
decP_aggr = lambda x: bounding_box_query(
    x,
    min_coordinate=[27500, 15000],
    max_coordinate=[29250, 16750],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
plt.figure(figsize=(10, 10))
# Plot your data and capture the figure

ax = plt.axes()
ax.set_facecolor("black")

decP_aggr(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl1_aggr',
    palette=['#DB231A','#519e3e','#ef8636','#335AF2','#C200FB','#5B5B5B'],  # Custom color palette
    groups=['DC1','NK_CD39-','Tcell_CD4+','Tcell_CD8+','decPAM2','NA'], na_color='lightgrey'
).pl.show(ax = ax, title="Annotation")

# Save the figure if it's returned
plt.savefig(
    './xenium_morphology_alpha0_core22_annotations_aggregate1.jpg',
    dpi=900,
    format='jpg')

In [None]:
gene_name = 'CXCL10'

# Set the figure size before plotting
plt.figure(figsize=(10, 10))

ax = plt.axes()
ax.set_facecolor("black")

# Plot your data
decP_aggr(sdata).pl.render_shapes(
    "cell_boundaries",
    color=gene_name,
    cmap='Reds'
).pl.show(ax=ax, title=f"{gene_name} expression")

plt.savefig('./xenium_morphology_alpha0_core22_CXCL10_aggregate.jpg', dpi=900, format='jpg')


## Figure 4A - core 50

In [None]:
decP_aggr = lambda x: bounding_box_query(
    x,
    min_coordinate=[45000, 102500],
    max_coordinate=[46750, 104250],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
plt.figure(figsize=(10, 10))
# Plot your data and capture the figure

ax = plt.axes()
ax.set_facecolor("black")

decP_aggr(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl1_aggr',
    palette=['#DB231A','#519e3e','#ef8636','#335AF2','#C200FB','#5B5B5B'],  # Custom color palette
    groups=['DC1','NK_CD39-','Tcell_CD4+','Tcell_CD8+','decPAM2','NA'], na_color='lightgrey'
).pl.show(ax = ax, title="Annotation")

# Save the figure if it's returned
plt.savefig(
    './xenium_morphology_alpha0_core50_annotations_aggregate2.jpg',
    dpi=900,
    format='jpg')

In [None]:
gene_name = 'CXCL10'

# Set the figure size before plotting
plt.figure(figsize=(10, 10))

ax = plt.axes()
ax.set_facecolor("black")

# Plot your data
decP_aggr(sdata).pl.render_shapes(
    "cell_boundaries",
    color=gene_name,
    cmap='Reds'
).pl.show(ax=ax, title=f"{gene_name} expression")

plt.savefig('./xenium_morphology_alpha0_core50_CXCL10_aggregate.jpg', dpi=900, format='jpg')

## Figure 5A

In [None]:
decB_agg = lambda x: bounding_box_query(
    x,
    min_coordinate=[43250, 90000],
    max_coordinate=[45000, 91750],
    axes=("x", "y"),
    target_coordinate_system="global",
)

In [None]:
## this works

plt.figure(figsize=(10, 10))

ax = plt.axes()
ax.set_facecolor("black")
# Plot your data and capture the figure
decB_agg(sdata).pl.render_shapes(
    "cell_boundaries",
    color='celltype_lvl1_aggr',
    palette=['#DB231A','#519e3e','#ef8636','#335AF2','#C200FB','#F5BB00','#5B5B5B'],  # Custom color palette
    groups=['DC1','NK_CD39-','Tcell_CD4+','Tcell_CD8+','decPAM2','iEVT','NA'], na_color='lightgrey'
).pl.show(title="Morphology image", ax=ax)

# Save the figure if it's returned
plt.savefig(
    './xenium_morphology_alpha0_core49_annotations_aggregate2.jpg',
    dpi=900,
    format='jpg')