In [1]:
# pip install G4X-helpers
import g4x_helpers as g4h
g4h.__version__

'0.1.1'

### pass a customer output folder to create a sample object

In [2]:
run_base = '/mnt/scratch/users/floraths/s3_data/caretta_staged_reads/g4-007/127/fc3/g4-007-127-FC3-L002_v2111/customer_output/D02'
sample = g4h.G4Xoutput(run_base=run_base)

print(sample)

G4X_output @ /mnt/scratch/users/floraths/s3_data/caretta_staged_reads/g4-007/127/fc3/g4-007-127-FC3-L002_v2111/customer_output/D02
Sample: [1mD02[0m of G07-127, FC3
imaged area: (4.76 x 6.00) mm

Transcript panel with 362 genes	[ACKR1, ACKR4, ACTA2, ADGRE5, ADGRF5 ... ]
Protein panel with 20 proteins	[ATPase, CD11c, CD20, CD3, CD31 ... ]



list content of the sample directory and its subdirectories

In [3]:
sample.list_content(subdir='single_cell_data')

{'dirs': [],
 'files': ['clustering_umap.csv.gz',
  'cell_by_protein.csv.gz',
  'cell_metadata.csv.gz',
  'cell_by_transcript.csv.gz',
  'dgex.csv.gz',
  'feature_matrix.h5']}

In [5]:
img = sample.load_image('h_and_e', thumbnail=False)
img.shape

(19200, 15232, 3)

# Re-Segmentation
### vectorize the seg masks to simulate input from polygons

In [6]:
import g4x_helpers.segmentation as g4x_segmentation

test_mask = sample.load_segmentation(expanded=True)

polygon_labels = g4x_segmentation._vectorize_mask(test_mask, nudge=True)
polygon_labels.head()

Unnamed: 0,label,geometry
0,1,"POLYGON ((11282.5 37, 11281.5 37, 11280.5 37, ..."
1,2,"POLYGON ((77.5 42, 76.5 42, 75.5 42, 74.5 42, ..."
2,3,"POLYGON ((13034.5 78, 13033.5 78, 13032.5 78, ..."
3,4,"POLYGON ((12417.5 85, 12416.5 85, 12416 84.5, ..."
4,5,"POLYGON ((5776.5 96, 5775.5 96, 5774.5 96, 577..."


In [7]:
seg_out = sample.run_base / 'custom_seg'
sample.run_g4x_segmentation(labels=polygon_labels, out_dir=seg_out, include_channels=['nuclear', 'eosin', 'PanCK', 'CD31'])

Rasterizing provided GeoDataFrame
Extracting mask properties
Assigning transcripts to mask labels
Extracting image signals
Extracting nuclear signal...
Extracting eosin signal...
Extracting PanCK signal...
Extracting CD31 signal...
Building output data structures
Saving output files to /mnt/scratch/users/floraths/s3_data/caretta_staged_reads/g4-007/127/fc3/g4-007-127-FC3-L002_v2111/customer_output/D02/custom_seg


we can see that the output after resegmentation with the vectorized original mask returns idential tables

In [10]:
import polars as pl
pl.read_csv(seg_out / 'single_cell_data' / 'cell_metadata.csv.gz').head()

Unnamed: 0_level_0,segmentation_cell_id,area,cell_x,cell_y,nuclearstain_intensity_mean,cytoplasmicstain_intensity_mean,PanCK_intensity_mean,CD31_intensity_mean,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts
i64,str,f64,f64,f64,f64,f64,f64,f64,i64,f64,i64,f64
0,"""D02-1""",1208.0,11278.938742,15.34851,6.461093,6.461093,6.461093,6.461093,0,0.0,0,0.0
1,"""D02-2""",1735.0,69.005764,19.633429,3.84611,3.84611,3.84611,3.84611,0,0.0,0,0.0
2,"""D02-3""",2076.0,13024.856936,54.096339,6.550096,6.550096,6.550096,6.550096,2,1.098612,5,1.791759
3,"""D02-4""",1787.0,12414.146614,60.585339,4.829323,4.829323,4.829323,4.829323,2,1.098612,9,2.302585
4,"""D02-5""",2250.0,5767.357778,69.454222,5.351111,5.351111,5.351111,5.351111,4,1.609438,13,2.639057


In [11]:
import polars as pl
pl.read_csv(sample.run_base / 'single_cell_data' / 'cell_metadata.csv.gz').head()

label,nuclei_area,cell_y,cell_x,nuclei_expanded_area,expanded_cell_y,expanded_cell_x,nuclearstain_intensity_mean,cytoplasmicstain_intensity_mean,PD1_intensity_mean,CD8_intensity_mean,PDL1_intensity_mean,CD56_intensity_mean,KI67_intensity_mean,CD45RA_intensity_mean,PanCK_intensity_mean,MsIgG2a_intensity_mean,FOXP3_intensity_mean,CD20_intensity_mean,CD4_intensity_mean,MsIgG1_intensity_mean,CD11c_intensity_mean,HLA-DR_intensity_mean,CD68_intensity_mean,CD31_intensity_mean,CTLA4_intensity_mean,ATPase_intensity_mean,CD3_intensity_mean,aSMA_intensity_mean,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,f64,i64,f64
"""D02-1""",133.0,12.789474,11278.766917,1208.0,15.34851,11278.938742,113.948675,65.528974,1.220199,2.620033,1.120861,1.027318,1.113411,1.403974,6.461093,1.855132,1.495033,2.245033,1.528974,0.916391,0.716887,1.059603,3.703642,8.274007,1.433775,1.167219,1.862583,10.798013,0,0.0,0,0.0
"""D02-2""",299.0,18.458194,69.076923,1735.0,19.633429,69.005764,195.552476,127.934552,3.468012,8.157349,5.737752,1.536023,3.442075,4.533718,3.84611,3.14755,3.832277,6.106052,2.571758,2.043228,3.580403,2.847262,8.447839,18.906628,3.46513,3.491066,6.203458,3.285303,0,0.0,0,0.0
"""D02-3""",391.0,53.71867,13024.918159,2076.0,54.096339,13024.856936,142.603083,85.710983,1.867052,2.692678,2.007707,0.820809,2.180154,2.188825,6.550096,1.879576,1.86368,3.374759,1.521676,2.025048,1.865607,2.38632,2.424374,7.371387,1.462909,1.769268,4.195087,8.270713,2,1.098612,5,1.791759
"""D02-4""",306.0,60.882353,12414.04902,1787.0,60.585339,12414.146614,218.645215,122.061556,2.002238,2.703414,2.001679,1.148293,6.467823,5.761052,4.829323,1.841634,2.196419,10.973139,0.866256,0.593733,3.125909,2.68047,4.304421,10.589256,0.814214,0.54169,1.562955,40.233352,2,1.098612,9,2.302585
"""D02-5""",500.0,69.414,5767.33,2250.0,69.454222,5767.357778,156.375619,87.652722,2.183556,3.704444,1.211111,0.675111,2.282667,2.940889,5.351111,2.068,1.877778,2.992444,2.667111,0.948,0.787556,1.132444,4.692444,25.133333,1.274222,1.46,3.810667,8.737333,4,1.609438,13,2.639057


## Bin file generation

In [12]:
import g4x_helpers.g4x_viewer.bin_generator as bin_gen
import anndata as ad

seg_out = sample.run_base / 'custom_seg'

adata = ad.read_h5ad(seg_out / 'single_cell_data' / 'feature_matrix.h5')

adata_ori = sample.load_adata()
cell_labels = sample.load_segmentation(expanded=True)

# creating a label since resegmentation does not produce clusters by default
adata.obs['sample'] = sample.sample_id

In [13]:
out = '/mnt/scratch/users/floraths/s3_data/caretta_staged_reads/g4-007/127/fc3/g4-007-127-FC3-L002_v2111/customer_output/D02/custom_seg/bin_test'

_ = bin_gen.seg_converter(
    adata_clustered=adata,
    adata_orig=adata_ori,
    arr=cell_labels,
    cluster_key='sample',
    outpath=f'{out}/{sample.sample_id}.bin',
)


[33m(raylet)[0m [2025-05-16 00:02:53,380 E 356248 356277] (raylet) file_system_monitor.cc:116: /tmp/ray/session_2025-05-16_00-02-41_920682_355172 is over 95% full, available space: 273.087 GB; capacity: 9691.57 GB. Object creation will fail if spilling is required.
[33m(raylet)[0m [2025-05-16 00:03:03,389 E 356248 356277] (raylet) file_system_monitor.cc:116: /tmp/ray/session_2025-05-16_00-02-41_920682_355172 is over 95% full, available space: 273.085 GB; capacity: 9691.57 GB. Object creation will fail if spilling is required.
[33m(raylet)[0m [2025-05-16 00:03:13,397 E 356248 356277] (raylet) file_system_monitor.cc:116: /tmp/ray/session_2025-05-16_00-02-41_920682_355172 is over 95% full, available space: 273.083 GB; capacity: 9691.57 GB. Object creation will fail if spilling is required.
[33m(raylet)[0m [2025-05-16 00:03:23,404 E 356248 356277] (raylet) file_system_monitor.cc:116: /tmp/ray/session_2025-05-16_00-02-41_920682_355172 is over 95% full, available space: 273.081 GB; c