In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
fibro_immune_vs_epithelium_csv.py
分析指定目录/文件中的 CellChat LR 表，提取
  - {Fibro|Immune} ↔ {Luminal|Basal} 的通讯
  - 仅保留乳腺 (MG) 独有信号
  - 拆分三物种共有 / 任意两物种共有
结果各写入 CSV，并保留 pathway_name
"""

import pandas as pd, glob, re, os
from pathlib import Path

# --------------------------------------------------
# 1) 自定义输入：二选一
csv_dir   = r"D:\111\CellChat-immune"            # 放 5 个 *_LR_table.csv 的目录
csv_files = None                                  # 或显式列出 5 个文件路径
# --------------------------------------------------

if csv_files is None:
    csv_files = glob.glob(str(Path(csv_dir) / "*_LR_table.csv"))
assert len(csv_files) == 5, "必须提供 5 份 *_LR_table.csv"

datasets = {Path(f).stem.replace('_LR_table', ''): pd.read_csv(f)
            for f in csv_files}

# ---- 映射：物种 & 腺体 ----
species_map = {'M-MG': 'Mouse', 'R-MG': 'Rabbit', 'S-MG': 'SugarGlider',
               'R-AG': 'Rabbit', 'S-AG': 'SugarGlider'}
gland_map   = {k: ('MG' if k.endswith('MG') else 'AG') for k in datasets}

# ---- 细胞分类关键字 ----
fibro_labels    = {'Fibroblast', 'Fibroblasts', 'Fibroblasts 1', 'Fibroblasts 2'}
immune_keywords = ['immune', 'lymph', 't cell', 'b cell', 'macrophage',
                   'myeloid', 'nk', 'monocyte']
lum_keywords    = ['lum', 'luminal']
basal_keywords  = ['basal', 'myo']

def classify(cell):
    low = cell.lower()
    if cell in fibro_labels:
        return 'Fibro'
    if any(re.search(k, low) for k in immune_keywords):
        return 'Immune'
    if any(re.search(k, low) for k in lum_keywords):
        return 'Luminal'
    if any(re.search(k, low) for k in basal_keywords):
        return 'Basal'
    return None

# ---- 过滤符合条件的通讯 ----
records = []
for ds, df in datasets.items():
    for _, row in df.iterrows():
        src_cat = classify(row['source'])
        tgt_cat = classify(row['target'])
        if src_cat is None or tgt_cat is None:
            continue
        if {src_cat, tgt_cat} & {'Fibro', 'Immune'} and \
           {src_cat, tgt_cat} & {'Luminal', 'Basal'}:
            records.append({
                'interaction' : row['interaction_name'],
                'pathway_name': row['pathway_name'],   # ← 保留通路信息
                'src_cell'    : row['source'],
                'tgt_cell'    : row['target'],
                'pair_class'  : f"{src_cat}->{tgt_cat}",
                'dataset'     : ds,
            })

df_all = pd.DataFrame(records)

# ---- 仅保留乳腺 MG，且排除顶泌腺出现过的交互 ----
mg_df   = df_all[df_all['dataset'].map(gland_map) == 'MG']
ag_inter= set(df_all[df_all['dataset'].map(gland_map) == 'AG']['interaction'])
mg_unique = mg_df[~mg_df['interaction'].isin(ag_inter)].copy()
mg_unique['species'] = mg_unique['dataset'].map(species_map)

# ---- 统计物种交集 ----
species_grp = (mg_unique.groupby('interaction')['species']
               .agg(lambda s: sorted(set(s))).reset_index())
three_inter = species_grp[species_grp['species'].apply(len)==3]['interaction']
two_inter   = species_grp[species_grp['species'].apply(lambda s: len(s)==2)]['interaction']

three_df = mg_unique[mg_unique['interaction'].isin(three_inter)].drop_duplicates()
two_df   = mg_unique[mg_unique['interaction'].isin(two_inter)].drop_duplicates()

# ---- 输出为 CSV ----
out_dir = Path(csv_dir) if csv_files is None else Path(csv_files[0]).parent
three_csv = out_dir / "ThreeSpecies.csv"
two_csv   = out_dir / "TwoSpecies.csv"

three_df.to_csv(three_csv, index=False)
two_df.to_csv(two_csv,   index=False)

print("分析完成！已生成：")
print(f"  • {three_csv}")
print(f"  • {two_csv}")


分析完成！已生成：
  • D:\111\CellChat-immune\ThreeSpecies.csv
  • D:\111\CellChat-immune\TwoSpecies.csv
