In [None]:
import os 
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

gene_of_interest = ['Pmch', 'Slc17a7', 'Snap25', 'Ctgf', 'Sst', 'Plp1', 'Prox1', 'Vip', 'Gfap']

# 创建一个空列表来存储每个组织的统计结果
all_gene_stats = []

run_ids = [
    '20230704_PRISM3D_mousebrain_CTX_confocal', 
    '20230705_PRISM3D_mousebrain_HT_confocal', 
    '20230706_PRISM3D_mousebrain_TH_confocal', 
    '20230710_PRISM3D_mousebrain_HP_confocal'
]

for RUN_ID in tqdm(run_ids, desc="Processing tissues"):
    # processed dir
    BASE_DIR = Path('G:/spatial_data')
    process_dir = BASE_DIR / 'processed' / RUN_ID
    read_dir = process_dir / 'readout'
    seg_dir = process_dir / 'segmented'

    # analysis dir
    analysis_dir = BASE_DIR / 'analysis' / RUN_ID
    cell_typ_dir = analysis_dir / "celltyping"
    subcel_dir = analysis_dir / 'subcellular'

    # load data
    rna_df = pd.read_csv(read_dir/'mapped_genes.csv', index_col=0, low_memory=False)
    cell_info = pd.read_csv(cell_typ_dir/'cell_info.csv', index_col=0)

    # 筛选出感兴趣的基因
    rna_filtered_df = rna_df[rna_df['Gene'].isin(gene_of_interest)].copy()
    merged_df = pd.merge(rna_filtered_df, cell_info, left_on='Cell Index', right_index=True)
    
    # 计算到细胞核的距离
    merged_df['distance_to_nucleus'] = np.sqrt(
        (merged_df['x_in_pix'] - merged_df['ce_x_in_pix'])**2 +
        (merged_df['y_in_pix'] - merged_df['ce_y_in_pix'])**2 +
        (merged_df['z_in_pix'] - merged_df['ce_z_in_pix'])**2
    )
    
    # 计算归一化距离
    merged_df['normalized_distance'] = merged_df['distance_to_nucleus'] / (merged_df['cell_radius'] * 2)
    
    # 按基因分组并计算统计数据
    gene_stats = merged_df.groupby('Gene').agg(
        mean_distance_to_nucleus=('distance_to_nucleus', 'mean'),
        median_distance_to_nucleus=('distance_to_nucleus', 'median'),
        mean_normalized_distance=('normalized_distance', 'mean')
    )
    
    # 转换距离单位 (从像素转为微米)
    gene_stats['mean_distance_to_nucleus'] *= 0.208
    gene_stats['median_distance_to_nucleus'] *= 0.208
    
    # 提取组织类型并添加到结果中
    tissue_type = RUN_ID.split('_')[3]  # 提取 CTX, HT, TH, HP
    gene_stats['tissue'] = tissue_type
    
    # 保存单个组织的结果（保留原有的单独保存功能）
    subcel_dir.mkdir(parents=True, exist_ok=True)
    gene_stats.drop('tissue', axis=1).to_csv(subcel_dir/'distance_to_nucleus.csv')
    
    # 将结果添加到汇总列表中
    all_gene_stats.append(gene_stats)

# 合并所有组织的结果
combined_stats = pd.concat(all_gene_stats)

# 重置索引，使Gene成为普通列
combined_stats.reset_index(inplace=True)

# 创建最终输出目录
BASE_DIR = Path(r'G:\spatial_data')
final_analysis_dir = BASE_DIR / 'analysis' / '20250727_PRISM_NBT_revision_subcellular_validation'
final_analysis_dir.mkdir(parents=True, exist_ok=True)

# 保存合并后的汇总表
output_path = final_analysis_dir / 'combined_distance_to_nucleus_stats.csv'
combined_stats.to_csv(output_path, index=False)

print(f"所有组织的统计数据已合并并保存至: {output_path}")
print(f"\n合并后的数据表包含 {len(combined_stats)} 行记录")
print(f"组织类型: {combined_stats['tissue'].unique()}")
print(f"基因: {combined_stats['Gene'].unique()}")
print("\n合并后数据表预览:")
print(combined_stats.head(10))

Processing tissues: 100%|██████████| 4/4 [00:01<00:00,  2.85it/s]

所有组织的统计数据已合并并保存至: G:\spatial_data\analysis\20250727_PRISM_NBT_revision_subcellular_validation\combined_distance_to_nucleus_stats.csv

合并后的数据表包含 36 行记录
组织类型: ['CTX' 'HT' 'TH' 'HP']
基因: ['Ctgf' 'Gfap' 'Plp1' 'Pmch' 'Prox1' 'Slc17a7' 'Snap25' 'Sst' 'Vip']

合并后数据表预览:
      Gene  mean_distance_to_nucleus  median_distance_to_nucleus  \
0     Ctgf                  5.771458                    5.516078   
1     Gfap                  6.448569                    5.941253   
2     Plp1                  5.787681                    5.389671   
3     Pmch                  6.023144                    5.795291   
4    Prox1                  5.906025                    5.434893   
5  Slc17a7                  6.088485                    5.781257   
6   Snap25                  6.552690                    6.223193   
7      Sst                  5.942139                    5.609217   
8      Vip                  5.894778                    5.644009   
9     Ctgf                  5.719621                    


