# Tutorial 7: molecular level differential abundance analysis for AD

In [None]:
import matplotlib.pyplot as plt
import scanpy as sc
import anndata as ad
import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib.colors import LinearSegmentedColormap
import tifffile
import cv2
from argparse import Namespace
from skimage import filters, measure
from scipy import ndimage
from scipy.spatial.distance import cdist
import seaborn as sns
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import argparse
import os
import gc
import json
from scipy import stats
import meld
from meld_analysis_2766g import *
import sklearn

# Set matplotlib parameters
mpl.rcParams['pdf.fonttype'] = 42
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['figure.dpi'] = 300

## Initialize configs and fix seed

In [None]:
# 直接按照配置写死参数，避免 Notebook 再次解析 CLI
args = Namespace(
    data_paths=[
        '/path/to/control_sample1.h5ad',
        '/path/to/AD_sample1.h5ad'
    ],  # TODO: 替换成实际的h5ad文件路径
    conditions=['control', 'AD'],  # 对应每个数据集的条件标签
    plaque_sample='AD_2766g_m9723',
    plaque_image_path='/path/to/plaque_protein_image.tif',  # TODO: 替换成实际的tif文件路径
    output_dir='./results',
    target_sample_size=200000,
    beta=67,
    knn=7,
    min_plaque_area=100,
    surrounding_kernel_size=9,
    surrounding_iterations=15
)

print(f"Processing {len(args.data_paths)} datasets")
print(f"Conditions: {args.conditions}")
print(f"Plaque sample: {args.plaque_sample}")
print(f"Output directory: {args.output_dir}")

## Preprocess and sample dataset

In [None]:
print("=== MELD Analysis with Protein Plaque Distance Analysis ===")
print(f"Data paths: {args.data_paths}")
print(f"Conditions: {args.conditions}")
print(f"Plaque sample: {args.plaque_sample}")
print(f"Output directory: {args.output_dir}")

# Create output directories
figures_dir, data_dir = create_output_dirs(args.output_dir)

# Process and sample datasets
sample_adata = process_and_sample_datasets(args.data_paths, args.conditions, 
                                            args.target_sample_size)

## Meld analysis

In [None]:
# Run MELD analysis
sample_densities, sample_likelihoods = run_meld_analysis(sample_adata, args.beta, args.knn)

# Calculate AD likelihood (using first experimental sample as reference)
experimental_samples = [col for col in sample_likelihoods.columns if 'control' not in col.lower()]
if experimental_samples:
    sample_adata.obs['AD_likelihood'] = sample_likelihoods[experimental_samples].mean(axis=1).values
else:
    print("Warning: No experimental samples found for AD likelihood calculation")
    sample_adata.obs['AD_likelihood'] = sample_likelihoods.mean(axis=1).values

# Save MELD results
meld_results_path = os.path.join(data_dir, 'sample_adata_with_meld.h5ad')
sample_adata.write_h5ad(meld_results_path)
print(f"MELD results saved to: {meld_results_path}")

## Visualize Meld analysis results

In [None]:
# Plot MELD results
colors = ['#479EA2', '#C16AAF']
plot_and_save(
    lambda **kwargs: sc.pl.umap(sample_adata, color=['condition'], 
                                palette=colors, **kwargs),
    'umap_condition', figures_dir
)


## Process plaque data

In [None]:

print(f"\n=== Processing Plaque Analysis for {args.plaque_sample} ===")

# Get subset for plaque analysis
sample_adata_subset = sample_adata[sample_adata.obs['dataset'] == args.plaque_sample].copy()

if len(sample_adata_subset) == 0:
    print(f"Warning: No data found for sample {args.plaque_sample}")
    return

# Process plaque image
abeta_img, abeta_gray, binary_mask_closed, labeled_mask, plaque_df = process_plaque_image(
    args.plaque_image_path, args.min_plaque_area)

# Create region masks
region_mask = create_region_masks(binary_mask_closed, args.surrounding_kernel_size, 
                                args.surrounding_iterations)

## Calculate distances to plaques

In [None]:
# Calculate distances to plaques
min_distances, nearest_plaque_ids = calculate_distances_to_plaques(
    sample_adata_subset, plaque_df, abeta_img)

# Add results to subset
sample_adata_subset.obs['nearest_plaque_distance'] = min_distances
sample_adata_subset.obs['nearest_plaque_id'] = nearest_plaque_ids

# Set invalid distances
sample_adata_subset.obs.loc[min_distances == float('inf'), 'nearest_plaque_id'] = -1
sample_adata_subset.obs.loc[min_distances == float('inf'), 'nearest_plaque_distance'] = -1

## Analysis AD likelihood by distance

In [None]:
# Analyze AD likelihood by distance
analysis_results = analyze_ad_likelihood_by_distance(sample_adata_subset, figures_dir)

# Update original dataset
mask = sample_adata.obs['dataset'] == args.plaque_sample
sample_adata.obs.loc[mask, 'nearest_plaque_distance'] = min_distances
sample_adata.obs.loc[mask, 'nearest_plaque_id'] = nearest_plaque_ids

# Save final results
final_results_path = os.path.join(data_dir, 'sample_adata_with_plaque_analysis.h5ad')
sample_adata.write_h5ad(final_results_path)
print(f"Final results saved to: {final_results_path}")

# Save plaque data
plaque_df.to_csv(os.path.join(data_dir, f'plaque_information_{args.plaque_sample}.csv'), 
                    index=False)

# Save analysis results
if analysis_results:
    with open(os.path.join(data_dir, f'distance_analysis_results_{args.plaque_sample}.json'), 'w') as f:
        json.dump(analysis_results, f, indent=2, default=str)

print(f"\n=== Analysis Complete ===")
print(f"Final dataset shape: {sample_adata.shape}")
print(f"Plaques identified: {len(plaque_df)}")
if analysis_results and analysis_results['significant']:
    print(f"✓ Significant difference found (p={analysis_results['p_value']:.6f})")
    print(f"  Close to plaque mean: {analysis_results['close_mean']:.4f}")
    print(f"  Far from plaque mean: {analysis_results['far_mean']:.4f}")
else:
    print("No significant difference found between close/far groups")