# Create cell table for Pixie from MCquant quantification tables

In [1]:
import os

# Import the necessary library to handle the CSV file
import pandas as pd

# Set the path to the directory containing the CSV files
quant_path = "/gpfs/bwfor/work/ws/hd_gr294-mi_lunaphore_mcmicro/pixie_subset/cell_quantifications"

# Create an empty list to store the DataFrames
dfs = []

# Loop through each file in the directory
for filename in os.listdir(quant_path):
    if filename.endswith(".csv"):
        # Load the CSV file into a pandas DataFrame
        file_path = os.path.join(quant_path, filename)
        cells_df = pd.read_csv(file_path)

        # Rename the 'CellID' column to 'label'
        cells_df.rename(columns={'CellID': 'label'}, inplace=True)

        # Rename the 'Area' column to 'cell_size'
        cells_df.rename(columns={'Area': 'cell_size'}, inplace=True)

        # Extract the filename from the file path and remove the extension
        filename = os.path.splitext(filename)[0]

        # Add a new column 'fov' containing the filename for each row
        cells_df['fov'] = filename

        # Select only the three specified columns: 'fov', 'label', and 'cell_size'
        reformatted_cells_df = cells_df[['fov', 'label', 'cell_size','X_centroid','Y_centroid','Eccentricity','Solidity','Extent','Orientation']]

        # Append the reformatted DataFrame to the list
        dfs.append(reformatted_cells_df)

# Concatenate all the DataFrames in the list into one big table
merged_df = pd.concat(dfs)

# Write the merged table to a CSV file
merged_df.to_csv('/gpfs/bwfor/work/ws/hd_gr294-mi_lunaphore_mcmicro/pixie_subset/segmentation/cell_table/cell_table_size_normalized.csv', index=False)

In [3]:
merged_df

Unnamed: 0,fov,label,cell_size,X_centroid,Y_centroid,Eccentricity,Solidity,Extent,Orientation
0,24h_86,1,619,23356.368336,2304.602585,0.529328,0.962675,0.764198,-0.617105
1,24h_86,2,727,28690.766162,2545.367263,0.821353,0.969333,0.677540,-1.067879
2,24h_86,3,1053,24556.710351,3095.472934,0.692851,0.901541,0.716327,1.303128
3,24h_86,4,554,24589.252708,3103.557762,0.827008,0.819527,0.586243,0.536782
4,24h_86,5,3219,14522.277415,4275.552656,0.492453,0.958036,0.752454,-1.437275
...,...,...,...,...,...,...,...,...,...
82108,Control_14,82109,2077,12287.895041,24183.298507,0.510616,0.971014,0.714482,-1.363983
82109,Control_14,82110,1574,12391.485388,24184.196315,0.311718,0.968615,0.745265,-1.159081
82110,Control_14,82111,1466,5664.908595,26437.990450,0.885100,0.865918,0.553208,-0.970361
82111,Control_14,82112,326,20273.625767,27866.987730,0.929094,0.767059,0.449655,0.681381


In [3]:
import feather
fov_pixel_data = feather.read_dataframe(
            os.path.join("/gpfs/bwfor/work/ws/hd_gr294-mi_lunaphore_mcmicro/pixie_subset/pixie/lunaphore_pixel_masked_0.05_wseg_pixel_output_dir/pixel_mat_data/", "4h_96" + '.feather')
        )

In [4]:
fov_pixel_data

Unnamed: 0,ANKRD1,aSMA,CCR2,CD31,CD45,CD68,MPO,PDGFRa,TNNT2,TREM2,fov,row_index,column_index,label,pixel_som_cluster,pixel_meta_cluster,pixel_meta_cluster_rename
0,0.325984,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.002172,0.000000,4h_96,3494,16292,0,13,20,20
1,0.347732,0.000000,0.107691,0.054809,0.121312,0.055774,0.081402,0.069343,0.690877,0.367059,4h_96,3494,16293,0,24,20,20
2,0.350133,0.000000,0.121155,0.073685,0.137808,0.062747,0.092779,0.078013,0.639372,0.412950,4h_96,3494,16294,0,24,20,20
3,0.354894,0.001612,0.135305,0.088188,0.153648,0.071848,0.104447,0.087198,0.588721,0.459158,4h_96,3494,16295,0,24,20,20
4,0.358978,0.003833,0.148458,0.103524,0.168571,0.080326,0.115231,0.096250,0.539528,0.501184,4h_96,3494,16296,0,24,20,20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243860754,1.824928,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.087362,0.000000,0.000000,4h_96,28355,30240,0,91,1,1
243860755,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.611024,0.000000,0.000000,4h_96,28355,30241,0,70,8,8
243860756,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.611024,0.000000,0.000000,4h_96,28355,30242,0,70,8,8
243860757,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.611024,0.000000,0.000000,4h_96,28355,30243,0,70,8,8


In [5]:
fov_pixel_data.label.unique()

array([    0,     3,     4, ..., 55747, 55748, 55749], dtype=uint32)

In [11]:
from skimage import io

mask = io.imread('/gpfs/bwfor/work/ws/hd_gr294-mi_lunaphore_mcmicro/pixie_subset/segmentation/4h_96_whole_cell.tiff')

In [13]:
mask.max()

55770

In [6]:
group_by_cluster_col = fov_pixel_data.groupby(
            ['label', "pixel_meta_cluster_rename"]
        ).size().reset_index(name='count')

In [9]:
group_by_cluster_col

Unnamed: 0,label,pixel_meta_cluster_rename,count
0,0,1,13593229
1,0,2,427031
2,0,5,442247
3,0,8,1149668
4,0,10,688466
...,...,...,...
295727,55748,8,1246
295728,55749,8,3
295729,55749,10,6
295730,55749,17,883
