In [None]:
%load_ext autoreload
%autoreload 2

import scanpy as sc
import matplotlib.pyplot as plt
import numpy as np
import pickle
import pandas as pd
import seaborn as sns
import datetime
from pathlib import Path
import matplotlib.colors as mcolors

In [None]:
broad_cell_types_color_mapping = {
    "Lymphatic Endothelial Cells": "#ffb695",
    "Macrophages": "#ff40ff",
    "Vascular Endothelial Cells": "#a4e000",
    "Pericytes": "#9f7704",
    "Fibroblasts": "#c7d0c0",
    "T-cells": "#941100",
    "Keratinocytes": "#181c82",
    "Dendritic cells": "#ff9300",
    "Spinous to Granular Cells": "#034cff",
    "Pilosebaceous Cells": "#bbbde2",
    "B-cells": "#f12d00",
    "Melanocytes": "#00bbbf",
}


niches_order = [
    "Tumor core",
    "Tumor",
    "Tumor boundary",
    "TA VEC stroma",
    "Macrophage stroma",
    "T cell stroma",
    "Stroma",
    "Immune",
    "VEC stroma",
    "Basal epidermis",
    "Differentiated epidermis",
]


niche_colors = {
    # SKIN
    "Basal epidermis": "#b299e3",
    "Differentiated epidermis": "#ffd000",
    # STROMA
    "Stroma": "#646500",
    "VEC stroma": "#0f640d",
    "TA VEC stroma": "#00e50c",
    # "nTA VEC Stroma": "#",
    # IMMUNE
    "Macrophage stroma": "#00dbf4",
    "T cell stroma": "#0051f9",
    "Immune": "#c100f9",
    # TUMOR
    "Tumor core": "#450000",
    "Tumor": "#eb0000",
    "Tumor boundary": "#faa0aa",
}

stage_colors = {
    "nodular": "#FF8783",
    "plaque": "#FFFFE6",
    "patch": "#A6EC9A",
    "control": "#808080",
}


# Example function to calculate median without outliers
def median_without_outliers(series):
    # Calculate Q1 and Q3
    series = series.dropna()
    series = series[np.isfinite(series)]
    try:
        Q1 = series.quantile(0.25)
        Q3 = series.quantile(0.75)
    except Exception as e:
        print(f"Error computing quantiles: {e}")

    # Calculate IQR
    IQR = Q3 - Q1
    # Determine outlier bounds
    # Determine outlier bounds
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Filter out the outliers
    filtered_series = series[(series >= lower_bound) & (series <= upper_bound)]
    # Return the median of the filtered data
    return filtered_series.median()


def get_cores_by_stage(adata, stage="nodular"):
    cores = []
    for path_block in adata.obs["path_block_core"].unique():
        adata_subset = adata[adata.obs["path_block_core"] == path_block]
        stage_pathblock = adata_subset.obs["Stage"].unique()[0]
        if stage == stage_pathblock:
            cores.append(path_block)
    return cores
# adata_obs

# Table of Contents
1. [Distance of niche from tumor](#distance-of-niche-from-tumor)
2. [Vessel size for each niche](#vessel-size-for-each-niche)
3. [Vessel density for each niche](#vessel-density-for-each-niche)
4. [Vessel density for each stage](#vessel-density-for-each-stage)


## Read Data

In [None]:
adata_path = '../data/spatial_single_cell_KS_adata.h5ad'
adata = sc.read_h5ad(adata_path)

## Distance of Niche from Tumor

In [None]:
median_filtered_per_group = adata.obs.groupby(
    'niches', observed=False)['distance_to_core'].apply(
        median_without_outliers).reset_index(name='median_without_outliers')
median_filtered_per_group.sort_values(by='median_without_outliers', inplace=True)
order = median_filtered_per_group['niches'].to_list()

In [None]:
def plot_order(pdata, stage='all', distance_column='distance_to_core', palette=None):
    median_filtered_per_group = pdata.obs.groupby('niches', observed=False)[distance_column] \
                                            .apply(median_without_outliers).reset_index(name='median_without_outliers')
    median_filtered_per_group.sort_values(by='median_without_outliers', inplace=True)
    order = median_filtered_per_group['niches'].to_list()
    fig, ax = plt.subplots(figsize=(8, 8), dpi=100)
    x_col = 'niches'
    if palette is not None:
        sns.boxplot(x=x_col, hue=x_col, y=distance_column, legend=False, palette=palette,
                    data=pdata.obs, order=order, showfliers=False, ax=ax, linecolor='#333333', linewidth=.5)
    else:
        sns.boxplot(x=x_col, y=distance_column, legend=False, color='white',
                    data=pdata.obs, order=order, showfliers=False, ax=ax, linecolor='#333333', linewidth=.5)

    # Add labels
    plt.xlabel('Niche', fontsize=16)
    plt.ylabel('Distance to Tumor Core ($\mu m$)', fontsize=16)

    # Show the plot
    plt.xticks(rotation=90)  # Rotate x-axis labels if needed
    plt.tight_layout()  # Adjust layout

In [None]:
plot_order(adata, palette=niche_colors)


## Vessel size for each niche

In [None]:
nodular_cores = get_cores_by_stage(adata, 'nodular')

In [None]:
sns.set_style("ticks")

fig, ax = plt.subplots(1, 1, figsize=(6, 5), dpi=100)
adata_subset = adata[adata.obs['path_block_core'].isin(nodular_cores)]

sns.boxplot(data=adata_subset.obs, x='niches', y='vessel_area', ax=ax, 
            hue='niches', palette=niche_colors,
            showfliers=False, color='white',     # The width of the boxe
            boxprops=dict(edgecolor='black',),
            whiskerprops=dict(color='black', linewidth=0.5),
            medianprops=dict(color='black', linewidth=0.5),
            capprops=dict(color='black', linewidth=0.5),
            linewidth = 0.5, order=niches_order,
            flierprops=dict(marker='.', color='red', markersize=5, alpha=0.5))  # The width of the box lines

ax.set_xlabel('Niches', fontsize=12)
ax.set_ylabel('Vessel Area ($\mu m^2$)', fontsize=12)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")

# Display the plots
plt.tight_layout()

## Vessel density for each niche

In [None]:
with open('../data/KS_vessel_counts.pkl', 'rb') as f:
    vessel_counts = pickle.load(f)

In [None]:
from matplotlib.ticker import ScalarFormatter

sns.set_style("ticks")

fig, ax = plt.subplots(1,1, figsize=(6,3), dpi=300)
sns.boxplot(x='niches', y='normalized_count',
            hue='niches', palette=niche_colors,
             data=vessel_counts,
            ax=ax, order=niches_order, linewidth=.5, color='white', showfliers=False,
            boxprops=dict(edgecolor='black',),
            whiskerprops=dict(color='black', linewidth=0.5),
            medianprops=dict(color='black', linewidth=0.5),
            capprops=dict(color='black', linewidth=0.5),
            flierprops=dict(marker='.', color='black', markersize=5, alpha=0.5))
ax.yaxis.set_major_formatter(ScalarFormatter(useMathText=True))
ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))  # Force scientific notation

ax.set_ylabel('Vessel Density  (per $\mu$m$^2$)', fontsize=12)
ax.set_xlabel('Niche', fontsize=12)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
plt.show()

## Vessel density for each stage

In [None]:
# How to add alpha (transparency) to per‑box fill and outline
# Works with seaborn 0.11–0.13+, matplotlib 3.x

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.ticker import ScalarFormatter
import seaborn as sns
from statsmodels.stats.multitest import multipletests
from scipy.stats import mannwhitneyu
from statannotations.Annotator import Annotator

data=vessel_counts.copy()
order = ['control', 'patch', 'plaque', 'nodular']

face_palette = {
    'control': '#6c8fad',
    'patch':   '#96bc6b',
    'plaque':  '#fed680',
    'nodular': '#b4746b',
}

edge_palette = {
    'control': '#325374',
    'patch':   '#4f7f1b',
    'plaque':  '#eab645',
    'nodular': '#7f211e',
}

# === choose your transparencies (0=fully transparent, 1=opaque) ===
FACE_ALPHA = 1   # for the box fills only
EDGE_ALPHA = 1.00   # for box edges, whiskers, caps, medians
LINE_WIDTH_BOX = 1.2
LINE_WIDTH_LINES = 0.9

sns.set_style("ticks")
fig, ax = plt.subplots(1, 1, figsize=(4, 4), dpi=100)

sns.boxplot(
    y='Stage', x='normalized_count', data=data,
    order=order, ax=ax, palette=face_palette,
    linewidth=LINE_WIDTH_BOX, showfliers=False,
)

# ---- Grab boxes robustly across seaborn versions ----
boxes = list(ax.artists)
if len(boxes) == 0:
    boxes = [p for p in ax.patches if isinstance(p, mpl.patches.PathPatch)]

# All whiskers/caps/medians are Line2D in ax.lines
lines = [l for l in ax.lines if isinstance(l, mpl.lines.Line2D)]
lines_per_box = max(1, int(round(len(lines) / max(1, len(boxes)))))

for i, stage in enumerate(order):
    # Box fill + outline with desired alpha
    box = boxes[i]
    box.set_facecolor(mcolors.to_rgba(face_palette[stage], FACE_ALPHA))
    box.set_edgecolor(mcolors.to_rgba(edge_palette[stage], EDGE_ALPHA))
    box.set_linewidth(LINE_WIDTH_BOX)

    # Whiskers, caps, median for this box
    start = i * lines_per_box
    end = start + lines_per_box
    for line in lines[start:end]:
        rgba = mcolors.to_rgba(edge_palette[stage], EDGE_ALPHA)
        line.set_color(rgba)
        line.set_linewidth(LINE_WIDTH_LINES)
        if hasattr(line, 'set_alpha'):
            line.set_alpha(EDGE_ALPHA)

# Axis formatting (optional – match your original)
ax.xaxis.set_major_formatter(ScalarFormatter(useMathText=True))
ax.ticklabel_format(axis='x', style='sci', scilimits=(0, 0))
ax.set_xlabel('Vessel Density (per $\\mu$m$^2$)', fontsize=12)
ax.set_ylabel('Stage', fontsize=12)
ax.set_yticklabels(['Control', 'Patch', 'Plaque', 'Nodular'])

# Perform pairwise comparisons and annotate significant differences 
pairs = [('control', 'patch'), ('control', 'plaque'), ('control', 'nodular'), ('patch', 'plaque'), ('patch', 'nodular'), ('plaque', 'nodular')] 
# Create a list to store results for the Annotator 
results = [] 
for pair in pairs: 
    group1 = data[data['Stage'] == pair[0]]['normalized_count'] 
    group2 = data[data['Stage'] == pair[1]]['normalized_count'] 
    stat, p_value = mannwhitneyu(group1, group2) 
    results.append((pair[0], pair[1], p_value)) 
    
# Set significance threshold 
alpha = 0.05


# BH/FDR correction across the 6 pairwise tests
raw_pvals = [r[2] for r in results]
rej, pvals_bh, *_ = multipletests(raw_pvals, alpha=0.05, method='fdr_bh')

# NOTE: Run annotations *after* the coloring above; annotation may add extra ax.lines.
annotator = Annotator(ax, pairs, data=data, x='normalized_count', y='Stage', orient='h')
formatted_pvalues = []
for p in pvals_bh:
    if p > alpha:
        formatted_pvalues.append(f'ns')
    elif p < 1e-5:
        formatted_pvalues.append(f'****')
    elif p < 1e-4:
        formatted_pvalues.append(f'***')
    elif p < 1e-3:
        formatted_pvalues.append(f'**')
    elif p < 1e-2:
        formatted_pvalues.append(f'*')
annotator.set_custom_annotations(formatted_pvalues)
annotator.annotate()
# save_plot(fig=fig, filename=f'{section_num}_vessel_density_per_stage_boxplot_horizontal')
plt.show()


In [None]:
control = data.loc[(data.Stage == "control"), "normalized_count"].values
patch = data.loc[(data.Stage == "patch"), "normalized_count"].values
plaque = data.loc[(data.Stage == "plaque"), "normalized_count"].values
nodular = data.loc[(data.Stage == "nodular"), "normalized_count"].values


In [None]:
stat_results = [
  mannwhitneyu(control, nodular, alternative="two-sided"),
  mannwhitneyu(control, patch, alternative="two-sided"),
  mannwhitneyu(control, plaque, alternative="two-sided"),
  mannwhitneyu(patch, plaque, alternative="two-sided"),
  mannwhitneyu(nodular, plaque, alternative="two-sided"),
  mannwhitneyu(nodular, patch, alternative="two-sided"),
]

pvalues = [result.pvalue for result in stat_results]

print("Control vs Nodular: \n", stat_results[0], "\n")
print("Control vs Patch: \n", stat_results[1], "\n")
print("Control vs Plaque: \n", stat_results[2], "\n")
print("Patch vs Plaque: \n", stat_results[3], "\n")
print("Nodular vs Plaque: \n", stat_results[4], "\n")
print("Nodular vs Patch: \n", stat_results[5], "\n")
