In [7]:
import pandas as pd
import numpy as np
import sys
sys.path.append('CISM')  # Replace with the actual path
from CISM.cism.graph.create_formatted_graph import GraphBuilder
import os
import pathlib

In [41]:
DATA_DIR = '/sise/assafzar-group/assafzar/Datasets'
raw_csv_file = os.path.join(DATA_DIR, 'CRC_cells.csv')
raw_patient_class_file = os.path.join(DATA_DIR, 'CRC_patient_class.csv')
root_data_dir =  './'
output_graphs_dir = 'CRC_graphs/'

FANMOD_path = '/sise/assafzar-group/assafzar/FANMODPlus/Binary'
FANMOD_exe = 'LocalFANMOD'
output_dir = './FANMOD_output/'
cache_dir = './FANMOD_cache/'


In [43]:
data = pd.read_csv(raw_csv_file)
relevant_columns = ["cellID", 'fov','centroid_x', 'centroid_y','class']
data = data[relevant_columns]
data.head()

Unnamed: 0,cellID,fov,centroid_x,centroid_y,class
0,1,P01_A_FOV1,1000.731481,5.657407,Stroma
1,2,P01_A_FOV1,558.966851,7.392265,Macrophage
2,3,P01_A_FOV1,1451.308707,7.693931,CD4T
3,4,P01_A_FOV1,1475.332288,8.376176,Neuron
4,5,P01_A_FOV1,659.990354,11.832797,Macrophage


In [44]:
cell_types = data['class'].unique()


In [45]:
data[relevant_columns].head()

Unnamed: 0,cellID,fov,centroid_x,centroid_y,class
0,1,P01_A_FOV1,1000.731481,5.657407,Stroma
1,2,P01_A_FOV1,558.966851,7.392265,Macrophage
2,3,P01_A_FOV1,1451.308707,7.693931,CD4T
3,4,P01_A_FOV1,1475.332288,8.376176,Neuron
4,5,P01_A_FOV1,659.990354,11.832797,Macrophage


In [46]:
def extract_patient_id(fov):
    return int(fov.split('_')[0][1:])

def extract_fov_number(fov):
    return int(fov.split('_')[2][3:])
try:
    data['patient_id'] = data['fov'].apply(extract_patient_id)
    data['fov'] = data['fov'].apply(extract_fov_number)
    data.rename(columns={'cellID': 'cell_id','centroid_x':'centroid-0','centroid_y':'centroid-1'}, inplace=True)
except:
    pass
data = data[['patient_id','fov','cell_id','centroid-0','centroid-1','class']]
data.head()

Unnamed: 0,patient_id,fov,cell_id,centroid-0,centroid-1,class
0,1,1,1,1000.731481,5.657407,Stroma
1,1,1,2,558.966851,7.392265,Macrophage
2,1,1,3,1451.308707,7.693931,CD4T
3,1,1,4,1475.332288,8.376176,Neuron
4,1,1,5,659.990354,11.832797,Macrophage


In [47]:
cell_mapper = {cell_type: cell_type for cell_type in cell_types} 
colnames_mapper_dict = {'cell_types' : 'class', #here we configure which column holds the information of cell type
                        'patient_id' : 'patient_id'}

g = GraphBuilder(data,cell_mapper, colnames_mapper_dict)

g.build_graph(path_to_output_dir=root_data_dir + output_graphs_dir, 
              #50 µm - the maximum distance between cells to be considered neighbours. 
              #The paramter value is 100 since the resolution is 0.5 µm.
              max_distance=100,
              exclude_cell_type=None)

{'Bcell': 0, 'CD3T': 1, 'CD4T': 2, 'CD8T': 3, 'DC': 4, 'Endothelial': 5, 'Fibroblast': 6, 'Lymphatic': 7, 'Macrophage': 8, 'Neuron': 9, 'Neutrophil': 10, 'Plasma': 11, 'SMV': 12, 'Stroma': 13, 'Treg': 14, 'Tumor': 15, 'Unidentified': 16}


In [48]:

assert pathlib.Path(FANMOD_path + "//" + FANMOD_exe).exists(), "FANMOD executable not found"

In [49]:
from CISM.cism.cism import CISM

In [51]:
motif_size = 3
iterations = 1000
#root_data_dir = '.'
#output_graphs_dir = 'CRC_graphs'



cism = CISM(fanmod_exe=FANMOD_exe,
            fanmod_path=FANMOD_path,
            network_dataset_root_path=root_data_dir,
            fanmod_output_root_path=output_dir,
            fanmod_cache_root_path=cache_dir,
            motif_size=motif_size,
            iterations=iterations)


# adding the dataset
cism.add_dataset(output_graphs_dir, 
                 'Disease', 
                 'CRC', 
                 force_run_fanmod=False, 
                 force_parse=False, 
                 n_jobs=12)

 95% [|||||||||||||||||||||||||||||||||||||||||||||||||||||||||   ]

  0%|          | 0/35 [00:00<?, ?it/s]

/sise/assafzar-group/assafzar/FANMODPlus/Binary/LocalFANMOD -i ./CRC_graphs//Patient_9_FOV17.txt -o ./FANMOD_output/CRC_graphs//3//9_FOV17.csv -r 1000 -s 3 --colored_vertcies
/sise/assafzar-group/assafzar/FANMODPlus/Binary/LocalFANMOD -i ./CRC_graphs//Patient_19_FOV37.txt -o ./FANMOD_output/CRC_graphs//3//19_FOV37.csv -r 1000 -s 3 --colored_vertcies
/sise/assafzar-group/assafzar/FANMODPlus/Binary/LocalFANMOD -i ./CRC_graphs//Patient_14_FOV27.txt -o ./FANMOD_output/CRC_graphs//3//14_FOV27.csv -r 1000 -s 3 --colored_vertcies
/sise/assafzar-group/assafzar/FANMODPlus/Binary/LocalFANMOD -i ./CRC_graphs//Patient_21_FOV41.txt -o ./FANMOD_output/CRC_graphs//3//21_FOV41.csv -r 1000 -s 3 --colored_vertcies
/sise/assafzar-group/assafzar/FANMODPlus/Binary/LocalFANMOD -i ./CRC_graphs//Patient_15_FOV29.txt -o ./FANMOD_output/CRC_graphs//3//15_FOV29.csv -r 1000 -s 3 --colored_vertcies
/sise/assafzar-group/assafzar/FANMODPlus/Binary/LocalFANMOD -i ./CRC_graphs//Patient_31_FOV61.txt -o ./FANMOD_output/

  0%|          | 0/35 [00:00<?, ?it/s]

parse csv: ./FANMOD_output/CRC_graphs//3/17_FOV33.csv patient_num: 17, FOV: FOV33
parse csv: ./FANMOD_output/CRC_graphs//3/3_FOV5.csv patient_num: 3, FOV: FOV5
parse csv: ./FANMOD_output/CRC_graphs//3/22_FOV43.csv patient_num: 22, FOV: FOV43
parse csv: ./FANMOD_output/CRC_graphs//3/10_FOV19.csv patient_num: 10, FOV: FOV19
parse csv: ./FANMOD_output/CRC_graphs//3/12_FOV23.csv patient_num: 12, FOV: FOV23
parse csv: ./FANMOD_output/CRC_graphs//3/13_FOV25.csv patient_num: 13, FOV: FOV25
parse csv: ./FANMOD_output/CRC_graphs//3/33_FOV65.csv patient_num: 33, FOV: FOV65
parse csv: ./FANMOD_output/CRC_graphs//3/32_FOV63.csv patient_num: 32, FOV: FOV63
parse csv: ./FANMOD_output/CRC_graphs//3/2_FOV3.csv patient_num: 2, FOV: FOV3
parse csv: ./FANMOD_output/CRC_graphs//3/30_FOV59.csv patient_num: 30, FOV: FOV59
parse csv: ./FANMOD_output/CRC_graphs//3/6_FOV11.csv patient_num: 6, FOV: FOV11
parse csv: ./FANMOD_output/CRC_graphs//3/18_FOV35.csv patient_num: 18, FOV: FOV35
parse csv: ./FANMOD_output

ValueError: not enough values to unpack (expected 7, got 1)

In [33]:
network_dataset_root_path = root_data_dir 
dataset_folder = output_graphs_dir 
for r, d, files in os.walk(network_dataset_root_path + dataset_folder):
    print(r, d, files)

./CRC_graphs/ [] ['.Patient_18_FOV35.txt', '.Patient_6_FOV11.txt', '.Patient_22_FOV43.txt', '.Patient_9_FOV17.txt', '.Patient_17_FOV33.txt', '.Patient_10_FOV19.txt', '.Patient_32_FOV63.txt', '.Patient_4_FOV7.txt', '.Patient_13_FOV25.txt', '.Patient_23_FOV45.txt', '.Patient_27_FOV54.txt', '.Patient_20_FOV39.txt', '.Patient_33_FOV65.txt', '.Patient_5_FOV9.txt', '.Patient_30_FOV59.txt', '.Patient_12_FOV23.txt', '.Patient_28_FOV55.txt', '.Patient_31_FOV61.txt', '.Patient_16_FOV31.txt', '.Patient_8_FOV15.txt', '.Patient_29_FOV58.txt', '.Patient_2_FOV3.txt', '.Patient_3_FOV5.txt', '.Patient_7_FOV13.txt', '.Patient_19_FOV37.txt', '.Patient_14_FOV27.txt', '.Patient_21_FOV41.txt', '.Patient_15_FOV29.txt', '.Patient_24_FOV47.txt', '.Patient_25_FOV49.txt', '.Patient_11_FOV21.txt', '.Patient_35_FOV69.txt', '.Patient_26_FOV51.txt', '.Patient_34_FOV67.txt', '.Patient_1_FOV1.txt']
