In [1]:
### This is a test code for batch CCI detection  ###
import os
import numpy as np
import pandas as pd
from scipy.ndimage import binary_dilation
import tifffile
from concurrent.futures import ProcessPoolExecutor
import cv2
import pandas as pd
import openpyxl
import anndata as ad
import glob

In [None]:
# Define the base paths and their corresponding sample IDs and ends
bases = {
    1: {'path': './S2/CellStatsDir/',
        'csv_filename': 'S2_Cell_Stats_F{fov_str}.csv',
        'end': 274}, # 274
    2: {'path': './S3/CellStatsDir/',
        'csv_filename': 'S3_Cell_Stats_F{fov_str}.csv',
        'end': 390} #390
}

def process_fov(sample_id, fov_str, base_path, csv_filename_template, output_dir):
    csv_filename = csv_filename_template.format(fov_str=fov_str)
    csv_path = os.path.join(base_path, f"FOV{fov_str}", csv_filename)
    image_path = os.path.join(base_path, "CellOverlay", f"CellOverlay_F{fov_str}.jpg")
    mask_path = os.path.join(base_path, f"FOV{fov_str}", f"CellLabels_F{fov_str}.tif")

    print(f"Processing FOV{fov_str} for Sample ID {sample_id}")

    if not os.path.exists(csv_path) or not os.path.exists(image_path) or not os.path.exists(mask_path):
        print(f"Required files not found for FOV{fov_str}.")
        return

    df = pd.read_csv(csv_path)
    image = cv2.imread(image_path)
    mask = tifffile.imread(mask_path)

    if image is None or mask is None:
        print(f"Unable to load image or mask for FOV{fov_str}.")
        return

    interacting_cells = find_interacting_cells(mask)
    print(f"Interacting cells for FOV{fov_str}: {interacting_cells}")

    # Save the interacting cells results to a CSV file
    results_path = os.path.join(output_dir, f"Multiple_InteractingCells_S{sample_id}_FOV{fov_str}.csv")
    interacting_cells_list = []
    for cell_id, ids in interacting_cells.items():
        for id_ in ids:
            interacting_cells_list.append((cell_id, id_))

    df_interacting_cells = pd.DataFrame(interacting_cells_list, columns=['CellID', 'InteractingCellID'])
    df_interacting_cells.to_csv(results_path, sep='\t', index=False)
    print(f"Results saved to {results_path}")

def find_interacting_cells(masks, structure=np.ones((3,3), dtype=np.int8)):
    dilated_masks = np.zeros_like(masks)
    cell_ids = np.unique(masks)[1:]  # Skip the background label 0
    interactions = {}

    for cell_id in cell_ids:
        dilated_cell = binary_dilation(masks == cell_id, structure=structure)
        interacting_ids = np.unique(masks[dilated_cell & (masks != cell_id)])
        interacting_ids = [id_ for id_ in interacting_ids if id_ != 0]  # Skip the background label 0
        if interacting_ids:
            interactions[cell_id] = interacting_ids

    return interactions

# Process each FOV in parallel
with ProcessPoolExecutor() as executor:
    futures = []
    for sample_id, info in bases.items():
        base_path = info['path']
        csv_filename_template = info['csv_filename']
        fov_end = info['end']
        for fov_num in range(1, fov_end + 1):
            fov_str = f"{fov_num:03d}"
            futures.append(executor.submit(process_fov, sample_id, fov_str, base_path, csv_filename_template, output_dir))

    for future in futures:
        future.result()  # Get the result of the future if needed

Processing FOV001 for Sample ID 1Processing FOV002 for Sample ID 2Processing FOV002 for Sample ID 1Processing FOV001 for Sample ID 2



Interacting cells for FOV002: {1: [2], 2: [1, 3], 3: [2], 7: [9], 9: [7], 14: [15], 15: [14], 17: [21], 21: [17, 22], 22: [21], 24: [25, 28], 25: [24], 26: [27, 28], 27: [26, 30], 28: [24, 26], 29: [151], 30: [27], 31: [32, 33], 32: [31], 33: [31], 44: [45], 45: [44], 48: [50, 152], 50: [48], 57: [59], 58: [60], 59: [57], 60: [58], 63: [64], 64: [63], 68: [69], 69: [68, 74], 70: [71, 78, 80], 71: [70, 80], 72: [76, 79], 73: [75], 74: [69, 77], 75: [73, 88], 76: [72, 85], 77: [74, 86, 87], 78: [70], 79: [72, 85], 80: [70, 71, 83], 81: [82, 89], 82: [81, 89, 91, 96], 83: [80, 93, 94, 95], 85: [76, 79, 93], 86: [77, 87, 100], 87: [77, 86], 88: [75, 97], 89: [81, 82, 96], 90: [98, 107], 91: [82, 96, 98], 92: [101, 103], 93: [83, 85, 94, 102, 108], 94: [83, 93, 95, 102], 95: [83, 94, 101, 105], 96: [82, 89, 91, 106], 97: [88], 98: [90, 91, 107], 99: [104], 

In [25]:
###  Merge FOV results   ###
import os
import pandas as pd
import glob

output_file = 'CCI_all_output.csv'

csv_files = glob.glob(os.path.join(input_dir, 'Multiple_InteractingCells_S*_FOV*.csv'))

dfs = []

for file_path in csv_files:
    df = pd.read_csv(file_path, sep = '\t')
    
    file_name = os.path.basename(file_path)
    sample_id = file_name.split('_S')[1].split('_FOV')[0]
    fov_str = file_name.split('_FOV')[1].split('.')[0]

    df['sample_id'] = sample_id
    df['FOV'] = fov_str
    
    df['CellID'] = 's' + sample_id + '_c_' + sample_id + '_FOV' + fov_str + '_' + df['CellID'].astype(str)
    df['InteractingCellID'] = 's' + sample_id + '_c_' + sample_id + '_FOV' + fov_str + '_' + df['InteractingCellID'].astype(str)
    
    dfs.append(df)

dfs_sorted = sorted(dfs, key=lambda x: (x['sample_id'].iloc[0], x['FOV'].iloc[0]))

df_combined = pd.concat(dfs_sorted, ignore_index=True)

df_combined.to_csv(os.path.join(input_dir, output_file), index=False)
print(f"Merged file saved to {output_file}")



Merged file saved to CCI_all_output.csv


In [27]:
###  Analyse Direct CCI results --  Test --  ###
CCI_df = pd.read_csv('/home/zq/TDLNprogram/data/SMIdata/CellPoseTest/CCIResultOutput/CCI_all_output.csv', sep = ',')

In [29]:

CCI_df['CellID'] = CCI_df['CellID'].str.replace('FOV', '', regex=False)
CCI_df['InteractingCellID'] = CCI_df['InteractingCellID'].str.replace('FOV', '', regex=False)


Unnamed: 0,CellID,InteractingCellID,sample_id,FOV
0,s1_c_1_001_1,s1_c_1_001_2,1,1
1,s1_c_1_001_1,s1_c_1_001_16,1,1
2,s1_c_1_001_1,s1_c_1_001_3738,1,1
3,s1_c_1_001_1,s1_c_1_001_3741,1,1
4,s1_c_1_001_1,s1_c_1_001_3750,1,1
...,...,...,...,...
20066833,s2_c_2_390_7590,s2_c_2_390_7568,2,390
20066834,s2_c_2_390_7591,s2_c_2_390_3179,2,390
20066835,s2_c_2_390_7591,s2_c_2_390_7545,2,390
20066836,s2_c_2_390_7591,s2_c_2_390_7576,2,390


In [30]:

def remove_leading_zeros(cell_id):
    return cell_id.replace('_00', '_').replace('_0', '_')

CCI_df['CellID'] = CCI_df['CellID'].apply(remove_leading_zeros)
CCI_df['InteractingCellID'] = CCI_df['InteractingCellID'].apply(remove_leading_zeros)


Unnamed: 0,CellID,InteractingCellID,sample_id,FOV
0,s1_c_1_1_1,s1_c_1_1_2,1,1
1,s1_c_1_1_1,s1_c_1_1_16,1,1
2,s1_c_1_1_1,s1_c_1_1_3738,1,1
3,s1_c_1_1_1,s1_c_1_1_3741,1,1
4,s1_c_1_1_1,s1_c_1_1_3750,1,1
...,...,...,...,...
20066833,s2_c_2_390_7590,s2_c_2_390_7568,2,390
20066834,s2_c_2_390_7591,s2_c_2_390_3179,2,390
20066835,s2_c_2_390_7591,s2_c_2_390_7545,2,390
20066836,s2_c_2_390_7591,s2_c_2_390_7576,2,390


In [31]:

import pandas as pd
import numpy as np
import anndata as ad

adata = ad.read_h5ad(file_path)

final_cell_type_map = adata.obs.set_index(adata.obs.index)['Final.cell.type'].to_dict()


def get_final_cell_type(cell_id, cell_type_map):
    return cell_type_map.get(cell_id, np.nan)

CCI_df['CellID_Final_cell_type'] = CCI_df['CellID'].apply(lambda x: get_final_cell_type(x, final_cell_type_map))
CCI_df['InteractingCellID_Final_cell_type'] = CCI_df['InteractingCellID'].apply(lambda x: get_final_cell_type(x, final_cell_type_map))


Unnamed: 0,CellID,InteractingCellID,sample_id,FOV,CellID_Final_cell_type,InteractingCellID_Final_cell_type
0,s1_c_1_1_1,s1_c_1_1_2,1,1,Class_switched_memoryB,Class_switched_memoryB
1,s1_c_1_1_1,s1_c_1_1_16,1,1,Class_switched_memoryB,Class_switched_memoryB
2,s1_c_1_1_1,s1_c_1_1_3738,1,1,Class_switched_memoryB,
3,s1_c_1_1_1,s1_c_1_1_3741,1,1,Class_switched_memoryB,Class_switched_memoryB
4,s1_c_1_1_1,s1_c_1_1_3750,1,1,Class_switched_memoryB,Class_switched_memoryB
...,...,...,...,...,...,...
20066833,s2_c_2_390_7590,s2_c_2_390_7568,2,390,CD4_Tmem,CD4_Tmem
20066834,s2_c_2_390_7591,s2_c_2_390_3179,2,390,,
20066835,s2_c_2_390_7591,s2_c_2_390_7545,2,390,,
20066836,s2_c_2_390_7591,s2_c_2_390_7576,2,390,,


In [33]:
# remove NA of CCI_df
# CCI_df_removeNA.csv will be used to analyse CCI interaction 
CCI_df = CCI_df.dropna(subset=['CellID_Final_cell_type', 'InteractingCellID_Final_cell_type', 'sample_fov'])

output_path = './SMIdata/CellPoseTest/CCIResultOutput/CCI_df_removeNA.csv'
CCI_df.to_csv(output_path, index=False)
CCI_df

Unnamed: 0,CellID,InteractingCellID,sample_id,FOV,CellID_Final_cell_type,InteractingCellID_Final_cell_type,sample_fov
0,s1_c_1_1_1,s1_c_1_1_2,1,1,Class_switched_memoryB,Class_switched_memoryB,T001848336_1
1,s1_c_1_1_1,s1_c_1_1_16,1,1,Class_switched_memoryB,Class_switched_memoryB,T001848336_1
3,s1_c_1_1_1,s1_c_1_1_3741,1,1,Class_switched_memoryB,Class_switched_memoryB,T001848336_1
4,s1_c_1_1_1,s1_c_1_1_3750,1,1,Class_switched_memoryB,Class_switched_memoryB,T001848336_1
5,s1_c_1_1_2,s1_c_1_1_1,1,1,Class_switched_memoryB,Class_switched_memoryB,T001848336_1
...,...,...,...,...,...,...,...
20066825,s2_c_2_390_7587,s2_c_2_390_7536,2,390,CD4_Tmem,CD4_Tmem,S9795736_390
20066826,s2_c_2_390_7587,s2_c_2_390_7575,2,390,CD4_Tmem,CD4_Tmem,S9795736_390
20066832,s2_c_2_390_7590,s2_c_2_390_7539,2,390,CD4_Tmem,CD4_Tmem,S9795736_390
20066833,s2_c_2_390_7590,s2_c_2_390_7568,2,390,CD4_Tmem,CD4_Tmem,S9795736_390


In [None]:
### CCI region spericif interaction selection ###
region_select_df = pd.read_excel(region_select_path)

cci_df_path = './SMIdata/CellPoseTest/CCIResultOutput/CCI_df_removeNA.csv'
cci_df = pd.read_csv(cci_df_path, sep=',')

t_region_sample_fov = region_select_df[region_select_df['Region_type'] == 'T_region']['sample_fov'].tolist()

t_region_cci_df = cci_df[cci_df['sample_fov'].isin(t_region_sample_fov)]


t_region_output_path = './SMIdata/CellPoseTest/CCIResultOutput/T_region_CCI_df.csv'

t_region_cci_df.to_csv(t_region_output_path, index=False)

print(f"T_region CCI_df saved to {t_region_output_path}")