## DUSP1 smiFISH Classification Workflow

This notebook processes experimental data from BigFish and CellProperties CSV files to classify DUSP1 smiFISH spots. Below is an outline of the workflow:

### Input:
- BigFish CSV files
- CellProperties CSV files

### Workflow Steps:
1. **Merge Experimental Data**  
    Combine data from the input CSV files for unified analysis.

2. **Signal-to-Noise Ratio (SNR) Analysis**  
    Perform SNR analysis to classify and filter spots based on signal quality.

3. **Measurement Analysis**  
    Conduct measurement analysis to extract relevant features from the data.

4. **Data Merging**  
    Merge all processed data into a single dataset for further analysis.

5. **Create Training Spot Crops (11px x 11px)**  
    Generate training data for machine learning models:
    - Select 1000 spots from each `h5_idx`, ensuring variation across cells and fields of view (FOVs).
    - Use the DUSP1 Display Manager to safely load spot channel images.
    - Perform max projection of images along the z-axis to create 2D (x, y) representations.
    - Extract 11px x 11px crops centered on each spot.
    - Rescale intensity for visualization purposes.
    - Display 100 sample spot crops for quality inspection before saving all crops to a directory.

In [None]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import dask.array as da
import os
import sys
import logging
import seaborn as sns
import datetime

# Today's date
today = datetime.date.today()
# Format date as 'Mar21' (for example)
date_str = today.strftime("%b%d")

logging.getLogger('matplotlib.font_manager').disabled = True
numba_logger = logging.getLogger('numba')
numba_logger.setLevel(logging.WARNING)

matplotlib_logger = logging.getLogger('matplotlib')
matplotlib_logger.setLevel(logging.WARNING)

src_path = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
print(src_path)
sys.path.append(src_path)

from src.Analysis_DUSP1_v2 import DUSP1AnalysisManager, SNRAnalysis, DUSP1Measurement, DUSP1DisplayManager, SpotCropSampler

In [None]:
loc = None
log_location = r'/Volumes/share/Users/Eric/GR_DUSP1_reruns'
save_dir = r'/Volumes/share/Users/Eric/DUSP1_SpotCrops'

# Define Thresholds
abs_threshold = 4
mg_threshold = 3

    # DUSP1 Replica D 3hr 100nM time-sweep R1

In [None]:
am = DUSP1AnalysisManager(location=loc, log_location=log_location, mac=True) 
am.select_analysis('DUSP1_D_Final')

spots_df = am.select_datasets("spotresults", dtype="dataframe")
clusters_df = am.select_datasets("clusterresults", dtype="dataframe")
props_df = am.select_datasets("cell_properties", dtype="dataframe")

snr_df = SNRAnalysis(spots_df, props_df, clusters_df, abs_threshold=abs_threshold, mg_threshold=mg_threshold)

merged_spots_df, merged_clusters_df, merged_cellprops_df = snr_df.get_results()

# Create an instance of the DUSP1Measurement class.
dusp = DUSP1Measurement(merged_spots_df, merged_clusters_df, merged_cellprops_df)

# Process the data with a chosen threshold method
cell_level_results = dusp.measure(abs_threshold=abs_threshold, mg_threshold=mg_threshold)

# Add replica level unique IDs for 'unique_cell_id', 'unique_spot_id', and 'unique_cluster_id'
# Get number of digits in the max unique_cell_id
max_id = merged_cellprops_df['unique_cell_id'].max()
num_digits = len(str(max_id))

# Calculate multiplier to add a '1' followed by the right number of zeroes - prefix is specific for each experiment (e.g., repD:1, repE:2, etc.)
rep_prefix = 10
prefix = rep_prefix ** num_digits  # e.g., if max_id = 30245 → prefix = 100000

# Apply prefix to all related DataFrames
merged_spots_df['unique_cell_id'] += prefix
merged_clusters_df['unique_cell_id'] += prefix
merged_cellprops_df['unique_cell_id'] += prefix
cell_level_results['unique_cell_id'] += prefix

# Repeat for unique_spot_id and unique_cluster_id
max_spot_id = merged_spots_df['unique_spot_id'].max()
spot_prefix = rep_prefix ** len(str(max_spot_id))
merged_spots_df['unique_spot_id'] += spot_prefix

max_cluster_id = merged_clusters_df['unique_cluster_id'].max()
cluster_prefix = rep_prefix ** len(str(max_cluster_id))
merged_clusters_df['unique_cluster_id'] += cluster_prefix

# Save all results to CSV
rep_string = 'DUSP1_D_Final'
output_dir = '/Users/ericron/Desktop/AngelFISH/Publications/Ron_2024/Classification'
os.makedirs(output_dir, exist_ok=True)
cell_level_results.to_csv(os.path.join(output_dir, f"{rep_string}_cell_level_results_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_spots_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_spots_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_clusters_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_clusters_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_cellprops_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_cellprops_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)

# instantiate sampler
sampler = SpotCropSampler(
    spots_df=merged_spots_df,
    clusters_df=merged_clusters_df,
    cellprops_df=merged_cellprops_df,
    mount_prefix="/Volumes/share"
)

# # — TEST RUN: only 3 displays, no files written —
# crops, meta = sampler.run(
#     save_dir=save_dir,
#     display=3,
#     save_individual=False,
#     save_summary=False,
#     pad=5,
#     cells_per_quad=1,
#     spots_per_cell=20,
#     spotChannel=0
# )

# choose a prefix for this experiment
prefix = "DUSP1_D_Final_MG3_Abs4"

# full run, no per-spot files, but summary files with prefix
crops, meta = sampler.run(
    save_dir=save_dir,
    display=0,
    save_individual=False,
    save_summary=True,
    file_prefix=prefix,
    pad=5,
    cells_per_quad=1,
    spots_per_cell=20,
    spotChannel=0
)

In [None]:
# # Directory where you saved the summary files
# save_dir = "/Volumes/share/Users/Eric/DUSP1_SpotCrops"

# # 1) Load the data
# crops = np.load(os.path.join(save_dir, f"{prefix}_all_crops.npy"))  # shape: (N, 7, 7)
# meta  = pd.read_csv(os.path.join(save_dir, f"{prefix}_all_crop_metadata.csv"))

# # 2) Choose a few random examples (up to 5)
# num_examples = min(5, len(crops))
# indices = np.random.choice(len(crops), size=num_examples, replace=False)

# # 3) Display them
# fig, axes = plt.subplots(1, num_examples, figsize=(num_examples * 3, 3))
# for ax, idx in zip(axes, indices):
#     patch = crops[idx]
#     info  = meta.iloc[idx]
    
#     ax.imshow(patch, cmap='gray')
#     ax.set_title(
#         f"Cell {info.unique_cell_id}\n"
#         f"Spot {info.unique_spot_id}\n"
#         f"MG_SNR={info.MG_SNR:.1f}, SNR={info.snr:.1f}\n"
#         f"MG_pass={info.MG_pass}"
#     )
#     ax.axis('off')

# plt.tight_layout()
# plt.show()

    # DUSP1 Replica E 3hr 100nM time-sweep R2

In [None]:
am = DUSP1AnalysisManager(location=loc, log_location=log_location, mac=True) 
am.select_analysis('DUSP1_E_Final')

spots_df = am.select_datasets("spotresults", dtype="dataframe")
clusters_df = am.select_datasets("clusterresults", dtype="dataframe")
props_df = am.select_datasets("cell_properties", dtype="dataframe")

snr_df = SNRAnalysis(spots_df, props_df, clusters_df, abs_threshold=abs_threshold, mg_threshold=mg_threshold)

merged_spots_df, merged_clusters_df, merged_cellprops_df = snr_df.get_results()

# Create an instance of the DUSP1Measurement class.
dusp = DUSP1Measurement(merged_spots_df, merged_clusters_df, merged_cellprops_df)

# Process the data with a chosen threshold method
cell_level_results = dusp.measure(abs_threshold=abs_threshold, mg_threshold=mg_threshold)

# Add replica level unique IDs for 'unique_cell_id', 'unique_spot_id', and 'unique_cluster_id'
# Get number of digits in the max unique_cell_id
max_id = merged_cellprops_df['unique_cell_id'].max()
num_digits = len(str(max_id))

# Calculate multiplier to add a '1' followed by the right number of zeroes - prefix is specific for each experiment (e.g., repD:1, repE:2, etc.)
rep_prefix = 20
prefix = rep_prefix ** num_digits  # e.g., if max_id = 30245 → prefix = 100000

# Apply prefix to all related DataFrames
merged_spots_df['unique_cell_id'] += prefix
merged_clusters_df['unique_cell_id'] += prefix
merged_cellprops_df['unique_cell_id'] += prefix
cell_level_results['unique_cell_id'] += prefix

# Repeat for unique_spot_id and unique_cluster_id
max_spot_id = merged_spots_df['unique_spot_id'].max()
spot_prefix = rep_prefix ** len(str(max_spot_id))
merged_spots_df['unique_spot_id'] += spot_prefix

max_cluster_id = merged_clusters_df['unique_cluster_id'].max()
cluster_prefix = rep_prefix ** len(str(max_cluster_id))
merged_clusters_df['unique_cluster_id'] += cluster_prefix

# Save all results to CSV
rep_string = 'DUSP1_E_Final'
output_dir = '/Users/ericron/Desktop/AngelFISH/Publications/Ron_2024/Classification'
os.makedirs(output_dir, exist_ok=True)
cell_level_results.to_csv(os.path.join(output_dir, f"{rep_string}_cell_level_results_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_spots_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_spots_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_clusters_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_clusters_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_cellprops_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_cellprops_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)

# instantiate sampler
sampler = SpotCropSampler(
    spots_df=merged_spots_df,
    clusters_df=merged_clusters_df,
    cellprops_df=merged_cellprops_df,
    mount_prefix="/Volumes/share"
)

# # — TEST RUN: only 3 displays, no files written —
# crops, meta = sampler.run(
#     save_dir=save_dir,
#     display=3,
#     save_individual=False,
#     save_summary=False,
#     pad=5,
#     cells_per_quad=1,
#     spots_per_cell=20,
#     spotChannel=0
# )

# choose a prefix for this experiment
prefix = "DUSP1_E_Final_MG3_Abs4"

# full run, no per-spot files, but summary files with prefix
crops, meta = sampler.run(
    save_dir=save_dir,
    display=0,
    save_individual=False,
    save_summary=True,
    file_prefix=prefix,
    pad=5,
    cells_per_quad=1,
    spots_per_cell=20,
    spotChannel=0
)

    # DUSP1 Replica F 3hr 100nM time-sweep R3

In [None]:
am = DUSP1AnalysisManager(location=loc, log_location=log_location, mac=True) 
am.select_analysis('DUSP1_F_Final')

spots_df = am.select_datasets("spotresults", dtype="dataframe")
clusters_df = am.select_datasets("clusterresults", dtype="dataframe")
props_df = am.select_datasets("cell_properties", dtype="dataframe")

snr_df = SNRAnalysis(spots_df, props_df, clusters_df, abs_threshold=abs_threshold, mg_threshold=mg_threshold)

merged_spots_df, merged_clusters_df, merged_cellprops_df = snr_df.get_results()

# Create an instance of the DUSP1Measurement class.
dusp = DUSP1Measurement(merged_spots_df, merged_clusters_df, merged_cellprops_df)

# Process the data with a chosen threshold method
cell_level_results = dusp.measure(abs_threshold=abs_threshold, mg_threshold=mg_threshold)

# Add replica level unique IDs for 'unique_cell_id', 'unique_spot_id', and 'unique_cluster_id'
# Get number of digits in the max unique_cell_id
max_id = merged_cellprops_df['unique_cell_id'].max()
num_digits = len(str(max_id))

# Calculate multiplier to add a '1' followed by the right number of zeroes - prefix is specific for each experiment (e.g., repD:1, repE:2, etc.)
rep_prefix = 30
prefix = rep_prefix ** num_digits  # e.g., if max_id = 30245 → prefix = 100000

# Apply prefix to all related DataFrames
merged_spots_df['unique_cell_id'] += prefix
merged_clusters_df['unique_cell_id'] += prefix
merged_cellprops_df['unique_cell_id'] += prefix
cell_level_results['unique_cell_id'] += prefix

# Repeat for unique_spot_id and unique_cluster_id
max_spot_id = merged_spots_df['unique_spot_id'].max()
spot_prefix = rep_prefix ** len(str(max_spot_id))
merged_spots_df['unique_spot_id'] += spot_prefix

max_cluster_id = merged_clusters_df['unique_cluster_id'].max()
cluster_prefix = rep_prefix ** len(str(max_cluster_id))
merged_clusters_df['unique_cluster_id'] += cluster_prefix

# Save all results to CSV
rep_string = 'DUSP1_F_Final'
output_dir = '/Users/ericron/Desktop/AngelFISH/Publications/Ron_2024/Classification'
os.makedirs(output_dir, exist_ok=True)
cell_level_results.to_csv(os.path.join(output_dir, f"{rep_string}_cell_level_results_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_spots_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_spots_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_clusters_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_clusters_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_cellprops_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_cellprops_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)

# instantiate sampler
sampler = SpotCropSampler(
    spots_df=merged_spots_df,
    clusters_df=merged_clusters_df,
    cellprops_df=merged_cellprops_df,
    mount_prefix="/Volumes/share"
)

# # — TEST RUN: only 3 displays, no files written —
# crops, meta = sampler.run(
#     save_dir=save_dir,
#     display=3,
#     save_individual=False,
#     save_summary=False,
#     pad=5,
#     cells_per_quad=1,
#     spots_per_cell=20,
#     spotChannel=0
# )

# choose a prefix for this experiment
prefix = f"DUSP1_F_Final_MG3_Abs4"

# full run, no per-spot files, but summary files with prefix
crops, meta = sampler.run(
    save_dir=save_dir,
    display=0,
    save_individual=False,
    save_summary=True,
    file_prefix=prefix,
    pad=5,
    cells_per_quad=1,
    spots_per_cell=20,
    spotChannel=0
)

    # DUSP1 Replica M 3hr 100nM time-sweep Partial

In [None]:
am = DUSP1AnalysisManager(location=loc, log_location=log_location, mac=True) 
am.select_analysis('DUSP1_M_Final')

spots_df = am.select_datasets("spotresults", dtype="dataframe")
clusters_df = am.select_datasets("clusterresults", dtype="dataframe")
props_df = am.select_datasets("cell_properties", dtype="dataframe")

snr_df = SNRAnalysis(spots_df, props_df, clusters_df, abs_threshold=abs_threshold, mg_threshold=mg_threshold)

merged_spots_df, merged_clusters_df, merged_cellprops_df = snr_df.get_results()

# Create an instance of the DUSP1Measurement class.
dusp = DUSP1Measurement(merged_spots_df, merged_clusters_df, merged_cellprops_df)

# Process the data with a chosen threshold method
cell_level_results = dusp.measure(abs_threshold=abs_threshold, mg_threshold=mg_threshold)

# Add replica level unique IDs for 'unique_cell_id', 'unique_spot_id', and 'unique_cluster_id'
# Get number of digits in the max unique_cell_id
max_id = merged_cellprops_df['unique_cell_id'].max()
num_digits = len(str(max_id))

# Calculate multiplier to add a '1' followed by the right number of zeroes - prefix is specific for each experiment (e.g., repD:1, repE:2, etc.)
rep_prefix = 40
prefix = rep_prefix ** num_digits  # e.g., if max_id = 30245 → prefix = 100000

# Apply prefix to all related DataFrames
merged_spots_df['unique_cell_id'] += prefix
merged_clusters_df['unique_cell_id'] += prefix
merged_cellprops_df['unique_cell_id'] += prefix
cell_level_results['unique_cell_id'] += prefix

# Repeat for unique_spot_id and unique_cluster_id
max_spot_id = merged_spots_df['unique_spot_id'].max()
spot_prefix = rep_prefix ** len(str(max_spot_id))
merged_spots_df['unique_spot_id'] += spot_prefix

max_cluster_id = merged_clusters_df['unique_cluster_id'].max()
cluster_prefix = rep_prefix ** len(str(max_cluster_id))
merged_clusters_df['unique_cluster_id'] += cluster_prefix

# Save all results to CSV
rep_string = 'DUSP1_M_Final'
output_dir = '/Users/ericron/Desktop/AngelFISH/Publications/Ron_2024/Classification'
os.makedirs(output_dir, exist_ok=True)
cell_level_results.to_csv(os.path.join(output_dir, f"{rep_string}_cell_level_results_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_spots_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_spots_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_clusters_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_clusters_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_cellprops_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_cellprops_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)

# instantiate sampler
sampler = SpotCropSampler(
    spots_df=merged_spots_df,
    clusters_df=merged_clusters_df,
    cellprops_df=merged_cellprops_df,
    mount_prefix="/Volumes/share"
)

# # — TEST RUN: only 3 displays, no files written —
# crops, meta = sampler.run(
#     save_dir=save_dir,
#     display=3,
#     save_individual=False,
#     save_summary=False,
#     pad=5,
#     cells_per_quad=1,
#     spots_per_cell=20,
#     spotChannel=0
# )

# choose a prefix for this experiment
prefix = f"DUSP1_M_Final_MG3_Abs4"

# full run, no per-spot files, but summary files with prefix
crops, meta = sampler.run(
    save_dir=save_dir,
    display=0,
    save_individual=False,
    save_summary=True,
    file_prefix=prefix,
    pad=5,
    cells_per_quad=1,
    spots_per_cell=20,
    spotChannel=0
)

    # DUSP1 Replica N 3hr 100nM time-sweep Partial

In [None]:
am = DUSP1AnalysisManager(location=loc, log_location=log_location, mac=True) 
am.select_analysis('DUSP1_N_Final2')

spots_df = am.select_datasets("spotresults", dtype="dataframe")
clusters_df = am.select_datasets("clusterresults", dtype="dataframe")
props_df = am.select_datasets("cell_properties", dtype="dataframe")

snr_df = SNRAnalysis(spots_df, props_df, clusters_df, abs_threshold=abs_threshold, mg_threshold=mg_threshold)

merged_spots_df, merged_clusters_df, merged_cellprops_df = snr_df.get_results()

# Create an instance of the DUSP1Measurement class.
dusp = DUSP1Measurement(merged_spots_df, merged_clusters_df, merged_cellprops_df)

# Process the data with a chosen threshold method
cell_level_results = dusp.measure(abs_threshold=abs_threshold, mg_threshold=mg_threshold)

# Add replica level unique IDs for 'unique_cell_id', 'unique_spot_id', and 'unique_cluster_id'
# Get number of digits in the max unique_cell_id
max_id = merged_cellprops_df['unique_cell_id'].max()
num_digits = len(str(max_id))

# Calculate multiplier to add a '1' followed by the right number of zeroes - prefix is specific for each experiment (e.g., repD:1, repE:2, etc.)
rep_prefix = 50
prefix = rep_prefix ** num_digits  # e.g., if max_id = 30245 → prefix = 100000

# Apply prefix to all related DataFrames
merged_spots_df['unique_cell_id'] += prefix
merged_clusters_df['unique_cell_id'] += prefix
merged_cellprops_df['unique_cell_id'] += prefix
cell_level_results['unique_cell_id'] += prefix

# Repeat for unique_spot_id and unique_cluster_id
max_spot_id = merged_spots_df['unique_spot_id'].max()
spot_prefix = rep_prefix ** len(str(max_spot_id))
merged_spots_df['unique_spot_id'] += spot_prefix

max_cluster_id = merged_clusters_df['unique_cluster_id'].max()
cluster_prefix = rep_prefix ** len(str(max_cluster_id))
merged_clusters_df['unique_cluster_id'] += cluster_prefix

# Save all results to CSV
rep_string = 'DUSP1_N_Final'
output_dir = '/Users/ericron/Desktop/AngelFISH/Publications/Ron_2024/Classification'
os.makedirs(output_dir, exist_ok=True)
cell_level_results.to_csv(os.path.join(output_dir, f"{rep_string}_cell_level_results_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_spots_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_spots_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_clusters_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_clusters_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_cellprops_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_cellprops_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)

# instantiate sampler
sampler = SpotCropSampler(
    spots_df=merged_spots_df,
    clusters_df=merged_clusters_df,
    cellprops_df=merged_cellprops_df,
    mount_prefix="/Volumes/share"
)

# # — TEST RUN: only 3 displays, no files written —
# crops, meta = sampler.run(
#     save_dir=save_dir,
#     display=3,
#     save_individual=False,
#     save_summary=False,
#     pad=5,
#     cells_per_quad=1,
#     spots_per_cell=20,
#     spotChannel=0
# )

# choose a prefix for this experiment
prefix = f"DUSP1_N_Final_MG3_Abs4"

# full run, no per-spot files, but summary files with prefix
crops, meta = sampler.run(
    save_dir=save_dir,
    display=0,
    save_individual=False,
    save_summary=True,
    file_prefix=prefix,
    pad=5,
    cells_per_quad=1,
    spots_per_cell=20,
    spotChannel=0
)

    DUSP1 75min Concentration-sweep Replica 1

In [None]:
am = DUSP1AnalysisManager(location=loc, log_location=log_location, mac=True) 
am.select_analysis('DUSP1_CS_R1_Final3')

spots_df = am.select_datasets("spotresults", dtype="dataframe")
clusters_df = am.select_datasets("clusterresults", dtype="dataframe")
props_df = am.select_datasets("cell_properties", dtype="dataframe")

snr_df = SNRAnalysis(spots_df, props_df, clusters_df, abs_threshold=abs_threshold, mg_threshold=mg_threshold)

merged_spots_df, merged_clusters_df, merged_cellprops_df = snr_df.get_results()

# Create an instance of the DUSP1Measurement class.
dusp = DUSP1Measurement(merged_spots_df, merged_clusters_df, merged_cellprops_df)

# Process the data with a chosen threshold method
cell_level_results = dusp.measure(abs_threshold=abs_threshold, mg_threshold=mg_threshold)

# Add replica level unique IDs for 'unique_cell_id', 'unique_spot_id', and 'unique_cluster_id'
# Get number of digits in the max unique_cell_id
max_id = merged_cellprops_df['unique_cell_id'].max()
num_digits = len(str(max_id))

# Calculate multiplier to add a '1' followed by the right number of zeroes - prefix is specific for each experiment (e.g., repD:1, repE:2, etc.)
rep_prefix = 60
prefix = rep_prefix ** num_digits  # e.g., if max_id = 30245 → prefix = 100000

# Apply prefix to all related DataFrames
merged_spots_df['unique_cell_id'] += prefix
merged_clusters_df['unique_cell_id'] += prefix
merged_cellprops_df['unique_cell_id'] += prefix
cell_level_results['unique_cell_id'] += prefix

# Repeat for unique_spot_id and unique_cluster_id
max_spot_id = merged_spots_df['unique_spot_id'].max()
spot_prefix = rep_prefix ** len(str(max_spot_id))
merged_spots_df['unique_spot_id'] += spot_prefix

max_cluster_id = merged_clusters_df['unique_cluster_id'].max()
cluster_prefix = rep_prefix ** len(str(max_cluster_id))
merged_clusters_df['unique_cluster_id'] += cluster_prefix

# Save all results to CSV
rep_string = 'DUSP1_G_Final'
output_dir = '/Users/ericron/Desktop/AngelFISH/Publications/Ron_2024/Classification'
os.makedirs(output_dir, exist_ok=True)
cell_level_results.to_csv(os.path.join(output_dir, f"{rep_string}_cell_level_results_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_spots_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_spots_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_clusters_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_clusters_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_cellprops_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_cellprops_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)

# instantiate sampler
sampler = SpotCropSampler(
    spots_df=merged_spots_df,
    clusters_df=merged_clusters_df,
    cellprops_df=merged_cellprops_df,
    mount_prefix="/Volumes/share"
)

# # — TEST RUN: only 3 displays, no files written —
# crops, meta = sampler.run(
#     save_dir=save_dir,
#     display=3,
#     save_individual=False,
#     save_summary=False,
#     pad=5,
#     cells_per_quad=1,
#     spots_per_cell=20,
#     spotChannel=0
# )

# choose a prefix for this experiment
prefix = f"DUSP1_G_Final_MG3_Abs4"

# full run, no per-spot files, but summary files with prefix
crops, meta = sampler.run(
    save_dir=save_dir,
    display=0,
    save_individual=False,
    save_summary=True,
    file_prefix=prefix,
    pad=5,
    cells_per_quad=1,
    spots_per_cell=20,
    spotChannel=0
)

    DUSP1 75min Concentration-sweep Replica 2

In [None]:
am = DUSP1AnalysisManager(location=loc, log_location=log_location, mac=True) 
am.select_analysis('DUSP1_H_Final')

spots_df = am.select_datasets("spotresults", dtype="dataframe")
clusters_df = am.select_datasets("clusterresults", dtype="dataframe")
props_df = am.select_datasets("cell_properties", dtype="dataframe")

snr_df = SNRAnalysis(spots_df, props_df, clusters_df, abs_threshold=abs_threshold, mg_threshold=mg_threshold)

merged_spots_df, merged_clusters_df, merged_cellprops_df = snr_df.get_results()

# Create an instance of the DUSP1Measurement class.
dusp = DUSP1Measurement(merged_spots_df, merged_clusters_df, merged_cellprops_df)

# Process the data with a chosen threshold method
cell_level_results = dusp.measure(abs_threshold=abs_threshold, mg_threshold=mg_threshold)

# Add replica level unique IDs for 'unique_cell_id', 'unique_spot_id', and 'unique_cluster_id'
# Get number of digits in the max unique_cell_id
max_id = merged_cellprops_df['unique_cell_id'].max()
num_digits = len(str(max_id))

# Calculate multiplier to add a '1' followed by the right number of zeroes - prefix is specific for each experiment (e.g., repD:1, repE:2, etc.)
rep_prefix = 70
prefix = rep_prefix ** num_digits  # e.g., if max_id = 30245 → prefix = 100000

# Apply prefix to all related DataFrames
merged_spots_df['unique_cell_id'] += prefix
merged_clusters_df['unique_cell_id'] += prefix
merged_cellprops_df['unique_cell_id'] += prefix
cell_level_results['unique_cell_id'] += prefix

# Repeat for unique_spot_id and unique_cluster_id
max_spot_id = merged_spots_df['unique_spot_id'].max()
spot_prefix = rep_prefix ** len(str(max_spot_id))
merged_spots_df['unique_spot_id'] += spot_prefix

max_cluster_id = merged_clusters_df['unique_cluster_id'].max()
cluster_prefix = rep_prefix ** len(str(max_cluster_id))
merged_clusters_df['unique_cluster_id'] += cluster_prefix

# Save all results to CSV
rep_string = 'DUSP1_H_Final'
output_dir = '/Users/ericron/Desktop/AngelFISH/Publications/Ron_2024/Classification'
os.makedirs(output_dir, exist_ok=True)
cell_level_results.to_csv(os.path.join(output_dir, f"{rep_string}_cell_level_results_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_spots_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_spots_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_clusters_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_clusters_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_cellprops_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_cellprops_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)

# instantiate sampler
sampler = SpotCropSampler(
    spots_df=merged_spots_df,
    clusters_df=merged_clusters_df,
    cellprops_df=merged_cellprops_df,
    mount_prefix="/Volumes/share"
)

# # — TEST RUN: only 3 displays, no files written —
# crops, meta = sampler.run(
#     save_dir=save_dir,
#     display=3,
#     save_individual=False,
#     save_summary=False,
#     pad=5,
#     cells_per_quad=1,
#     spots_per_cell=20,
#     spotChannel=0
# )

# choose a prefix for this experiment
prefix = f"DUSP1_H_Final_MG3_Abs4"

# full run, no per-spot files, but summary files with prefix
crops, meta = sampler.run(
    save_dir=save_dir,
    display=0,
    save_individual=False,
    save_summary=True,
    file_prefix=prefix,
    pad=5,
    cells_per_quad=1,
    spots_per_cell=20,
    spotChannel=0
)

    DUSP1 75min Concentration-sweep Replica 3

In [None]:
am = DUSP1AnalysisManager(location=loc, log_location=log_location, mac=True) 
am.select_analysis('DUSP1_I_Final')

spots_df = am.select_datasets("spotresults", dtype="dataframe")
clusters_df = am.select_datasets("clusterresults", dtype="dataframe")
props_df = am.select_datasets("cell_properties", dtype="dataframe")

snr_df = SNRAnalysis(spots_df, props_df, clusters_df, abs_threshold=abs_threshold, mg_threshold=mg_threshold)

merged_spots_df, merged_clusters_df, merged_cellprops_df = snr_df.get_results()

# Create an instance of the DUSP1Measurement class.
dusp = DUSP1Measurement(merged_spots_df, merged_clusters_df, merged_cellprops_df)

# Process the data with a chosen threshold method
cell_level_results = dusp.measure(abs_threshold=abs_threshold, mg_threshold=mg_threshold)

# Add replica level unique IDs for 'unique_cell_id', 'unique_spot_id', and 'unique_cluster_id'
# Get number of digits in the max unique_cell_id
max_id = merged_cellprops_df['unique_cell_id'].max()
num_digits = len(str(max_id))

# Calculate multiplier to add a '1' followed by the right number of zeroes - prefix is specific for each experiment (e.g., repD:1, repE:2, etc.)
rep_prefix = 80
prefix = rep_prefix ** num_digits  # e.g., if max_id = 30245 → prefix = 100000

# Apply prefix to all related DataFrames
merged_spots_df['unique_cell_id'] += prefix
merged_clusters_df['unique_cell_id'] += prefix
merged_cellprops_df['unique_cell_id'] += prefix
cell_level_results['unique_cell_id'] += prefix

# Repeat for unique_spot_id and unique_cluster_id
max_spot_id = merged_spots_df['unique_spot_id'].max()
spot_prefix = rep_prefix ** len(str(max_spot_id))
merged_spots_df['unique_spot_id'] += spot_prefix

max_cluster_id = merged_clusters_df['unique_cluster_id'].max()
cluster_prefix = rep_prefix ** len(str(max_cluster_id))
merged_clusters_df['unique_cluster_id'] += cluster_prefix

# Save all results to CSV
rep_string = 'DUSP1_I_Final'
output_dir = '/Users/ericron/Desktop/AngelFISH/Publications/Ron_2024/Classification'
os.makedirs(output_dir, exist_ok=True)
cell_level_results.to_csv(os.path.join(output_dir, f"{rep_string}_cell_level_results_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_spots_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_spots_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_clusters_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_clusters_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_cellprops_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_cellprops_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)

# instantiate sampler
sampler = SpotCropSampler(
    spots_df=merged_spots_df,
    clusters_df=merged_clusters_df,
    cellprops_df=merged_cellprops_df,
    mount_prefix="/Volumes/share"
)

# # — TEST RUN: only 3 displays, no files written —
# crops, meta = sampler.run(
#     save_dir=save_dir,
#     display=3,
#     save_individual=False,
#     save_summary=False,
#     pad=5,
#     cells_per_quad=1,
#     spots_per_cell=20,
#     spotChannel=0
# )

# choose a prefix for this experiment
prefix = f"DUSP1_I_Final_MG3_Abs4"

# full run, no per-spot files, but summary files with prefix
crops, meta = sampler.run(
    save_dir=save_dir,
    display=0,
    save_individual=False,
    save_summary=True,
    file_prefix=prefix,
    pad=5,
    cells_per_quad=1,
    spots_per_cell=20,
    spotChannel=0
)

    DUSP1 0.3, 1, 10nM Dex 3hr Time-sweep Replica 1

In [None]:
am = DUSP1AnalysisManager(location=loc, log_location=log_location, mac=True) 
am.select_analysis('DUSP1_TCS_R1_Final3')

spots_df = am.select_datasets("spotresults", dtype="dataframe")
clusters_df = am.select_datasets("clusterresults", dtype="dataframe")
props_df = am.select_datasets("cell_properties", dtype="dataframe")

snr_df = SNRAnalysis(spots_df, props_df, clusters_df, abs_threshold=abs_threshold, mg_threshold=mg_threshold)

merged_spots_df, merged_clusters_df, merged_cellprops_df = snr_df.get_results()

# Create an instance of the DUSP1Measurement class.
dusp = DUSP1Measurement(merged_spots_df, merged_clusters_df, merged_cellprops_df)

# Process the data with a chosen threshold method
cell_level_results = dusp.measure(abs_threshold=abs_threshold, mg_threshold=mg_threshold)

# Add replica level unique IDs for 'unique_cell_id', 'unique_spot_id', and 'unique_cluster_id'
# Get number of digits in the max unique_cell_id
max_id = merged_cellprops_df['unique_cell_id'].max()
num_digits = len(str(max_id))

# Calculate multiplier to add a '1' followed by the right number of zeroes - prefix is specific for each experiment (e.g., repD:1, repE:2, etc.)
rep_prefix = 90
prefix = rep_prefix ** num_digits  # e.g., if max_id = 30245 → prefix = 100000

# Apply prefix to all related DataFrames
merged_spots_df['unique_cell_id'] += prefix
merged_clusters_df['unique_cell_id'] += prefix
merged_cellprops_df['unique_cell_id'] += prefix
cell_level_results['unique_cell_id'] += prefix

# Repeat for unique_spot_id and unique_cluster_id
max_spot_id = merged_spots_df['unique_spot_id'].max()
spot_prefix = rep_prefix ** len(str(max_spot_id))
merged_spots_df['unique_spot_id'] += spot_prefix

max_cluster_id = merged_clusters_df['unique_cluster_id'].max()
cluster_prefix = rep_prefix ** len(str(max_cluster_id))
merged_clusters_df['unique_cluster_id'] += cluster_prefix

# Save all results to CSV
rep_string = 'DUSP1_J_Final'
output_dir = '/Users/ericron/Desktop/AngelFISH/Publications/Ron_2024/Classification'
os.makedirs(output_dir, exist_ok=True)
cell_level_results.to_csv(os.path.join(output_dir, f"{rep_string}_cell_level_results_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_spots_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_spots_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_clusters_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_clusters_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_cellprops_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_cellprops_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)

# instantiate sampler
sampler = SpotCropSampler(
    spots_df=merged_spots_df,
    clusters_df=merged_clusters_df,
    cellprops_df=merged_cellprops_df,
    mount_prefix="/Volumes/share"
)

# # — TEST RUN: only 3 displays, no files written —
# crops, meta = sampler.run(
#     save_dir=save_dir,
#     display=3,
#     save_individual=False,
#     save_summary=False,
#     pad=5,
#     cells_per_quad=1,
#     spots_per_cell=20,
#     spotChannel=0
# )

# choose a prefix for this experiment
prefix = f"DUSP1_J_Final_MG3_Abs4"

# full run, no per-spot files, but summary files with prefix
crops, meta = sampler.run(
    save_dir=save_dir,
    display=0,
    save_individual=False,
    save_summary=True,
    file_prefix=prefix,
    pad=5,
    cells_per_quad=1,
    spots_per_cell=20,
    spotChannel=0
)

    DUSP1 0.3, 1, 10nM Dex 3hr Time-sweep Replica 2


In [None]:
am = DUSP1AnalysisManager(location=loc, log_location=log_location, mac=True) 
am.select_analysis('DUSP1_K_Final')

spots_df = am.select_datasets("spotresults", dtype="dataframe")
clusters_df = am.select_datasets("clusterresults", dtype="dataframe")
props_df = am.select_datasets("cell_properties", dtype="dataframe")

snr_df = SNRAnalysis(spots_df, props_df, clusters_df, abs_threshold=abs_threshold, mg_threshold=mg_threshold)

merged_spots_df, merged_clusters_df, merged_cellprops_df = snr_df.get_results()

# Create an instance of the DUSP1Measurement class.
dusp = DUSP1Measurement(merged_spots_df, merged_clusters_df, merged_cellprops_df)

# Process the data with a chosen threshold method
cell_level_results = dusp.measure(abs_threshold=abs_threshold, mg_threshold=mg_threshold)

# Add replica level unique IDs for 'unique_cell_id', 'unique_spot_id', and 'unique_cluster_id'
# Get number of digits in the max unique_cell_id
max_id = merged_cellprops_df['unique_cell_id'].max()
num_digits = len(str(max_id))

# Calculate multiplier to add a '1' followed by the right number of zeroes - prefix is specific for each experiment (e.g., repD:1, repE:2, etc.)
rep_prefix = 11
prefix = rep_prefix * (10** num_digits)  # e.g., if max_id = 30245 → prefix = 1100000

# Apply prefix to all related DataFrames
merged_spots_df['unique_cell_id'] += prefix
merged_clusters_df['unique_cell_id'] += prefix
merged_cellprops_df['unique_cell_id'] += prefix
cell_level_results['unique_cell_id'] += prefix

# Repeat for unique_spot_id and unique_cluster_id
max_spot_id = merged_spots_df['unique_spot_id'].max()
spot_prefix = rep_prefix ** len(str(max_spot_id))
merged_spots_df['unique_spot_id'] += spot_prefix

max_cluster_id = merged_clusters_df['unique_cluster_id'].max()
cluster_prefix = rep_prefix ** len(str(max_cluster_id))
merged_clusters_df['unique_cluster_id'] += cluster_prefix

# Save all results to CSV
rep_string = 'DUSP1_k_Final'
output_dir = '/Users/ericron/Desktop/AngelFISH/Publications/Ron_2024/Classification'
os.makedirs(output_dir, exist_ok=True)
cell_level_results.to_csv(os.path.join(output_dir, f"{rep_string}_cell_level_results_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_spots_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_spots_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_clusters_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_clusters_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_cellprops_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_cellprops_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)

# instantiate sampler
sampler = SpotCropSampler(
    spots_df=merged_spots_df,
    clusters_df=merged_clusters_df,
    cellprops_df=merged_cellprops_df,
    mount_prefix="/Volumes/share"
)

# # — TEST RUN: only 3 displays, no files written —
# crops, meta = sampler.run(
#     save_dir=save_dir,
#     display=3,
#     save_individual=False,
#     save_summary=False,
#     pad=5,
#     cells_per_quad=1,
#     spots_per_cell=20,
#     spotChannel=0
# )

# choose a prefix for this experiment
prefix = f"DUSP1_K_Final_MG3_Abs4"

# full run, no per-spot files, but summary files with prefix
crops, meta = sampler.run(
    save_dir=save_dir,
    display=0,
    save_individual=False,
    save_summary=True,
    file_prefix=prefix,
    pad=5,
    cells_per_quad=1,
    spots_per_cell=20,
    spotChannel=0
)

    DUSP1 0.3, 1, 10nM Dex 3hr Time-sweep Replica 3

In [None]:
am = DUSP1AnalysisManager(location=loc, log_location=log_location, mac=True) 
am.select_analysis('DUSP1_L_Final')

spots_df = am.select_datasets("spotresults", dtype="dataframe")
clusters_df = am.select_datasets("clusterresults", dtype="dataframe")
props_df = am.select_datasets("cell_properties", dtype="dataframe")

snr_df = SNRAnalysis(spots_df, props_df, clusters_df, abs_threshold=abs_threshold, mg_threshold=mg_threshold)

merged_spots_df, merged_clusters_df, merged_cellprops_df = snr_df.get_results()

# Create an instance of the DUSP1Measurement class.
dusp = DUSP1Measurement(merged_spots_df, merged_clusters_df, merged_cellprops_df)

# Process the data with a chosen threshold method
cell_level_results = dusp.measure(abs_threshold=abs_threshold, mg_threshold=mg_threshold)

# Add replica level unique IDs for 'unique_cell_id', 'unique_spot_id', and 'unique_cluster_id'
# Get number of digits in the max unique_cell_id
max_id = merged_cellprops_df['unique_cell_id'].max()
num_digits = len(str(max_id))

# Calculate multiplier to add a '1' followed by the right number of zeroes - prefix is specific for each experiment (e.g., repD:1, repE:2, etc.)
rep_prefix = 12
prefix = rep_prefix * (10** num_digits)  # e.g., if max_id = 30245 → prefix = 100000

# Apply prefix to all related DataFrames
merged_spots_df['unique_cell_id'] += prefix
merged_clusters_df['unique_cell_id'] += prefix
merged_cellprops_df['unique_cell_id'] += prefix
cell_level_results['unique_cell_id'] += prefix

# Repeat for unique_spot_id and unique_cluster_id
max_spot_id = merged_spots_df['unique_spot_id'].max()
spot_prefix = rep_prefix ** len(str(max_spot_id))
merged_spots_df['unique_spot_id'] += spot_prefix

max_cluster_id = merged_clusters_df['unique_cluster_id'].max()
cluster_prefix = rep_prefix ** len(str(max_cluster_id))
merged_clusters_df['unique_cluster_id'] += cluster_prefix

# Save all results to CSV
rep_string = 'DUSP1_L_Final'
output_dir = '/Users/ericron/Desktop/AngelFISH/Publications/Ron_2024/Classification'
os.makedirs(output_dir, exist_ok=True)
cell_level_results.to_csv(os.path.join(output_dir, f"{rep_string}_cell_level_results_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_spots_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_spots_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_clusters_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_clusters_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_cellprops_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_cellprops_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)

# instantiate sampler
sampler = SpotCropSampler(
    spots_df=merged_spots_df,
    clusters_df=merged_clusters_df,
    cellprops_df=merged_cellprops_df,
    mount_prefix="/Volumes/share"
)

# # — TEST RUN: only 3 displays, no files written —
# crops, meta = sampler.run(
#     save_dir=save_dir,
#     display=3,
#     save_individual=False,
#     save_summary=False,
#     pad=5,
#     cells_per_quad=1,
#     spots_per_cell=20,
#     spotChannel=0
# )

# choose a prefix for this experiment
prefix = f"DUSP1_L_Final_MG3_Abs4"

# full run, no per-spot files, but summary files with prefix
crops, meta = sampler.run(
    save_dir=save_dir,
    display=0,
    save_individual=False,
    save_summary=True,
    file_prefix=prefix,
    pad=5,
    cells_per_quad=1,
    spots_per_cell=20,
    spotChannel=0
)

    DUSP1 100nM Dex & 5µM TPL Time-sweep Replica 1

In [None]:
am = DUSP1AnalysisManager(location=loc, log_location=log_location, mac=True) 
am.select_analysis('DUSP1_O_Final')

spots_df = am.select_datasets("spotresults", dtype="dataframe")
clusters_df = am.select_datasets("clusterresults", dtype="dataframe")
props_df = am.select_datasets("cell_properties", dtype="dataframe")

snr_df = SNRAnalysis(spots_df, props_df, clusters_df, abs_threshold=abs_threshold, mg_threshold=mg_threshold)

merged_spots_df, merged_clusters_df, merged_cellprops_df = snr_df.get_results()

# Create an instance of the DUSP1Measurement class.
dusp = DUSP1Measurement(merged_spots_df, merged_clusters_df, merged_cellprops_df)

# Process the data with a chosen threshold method
cell_level_results = dusp.measure(abs_threshold=abs_threshold, mg_threshold=mg_threshold)

# Add replica level unique IDs for 'unique_cell_id', 'unique_spot_id', and 'unique_cluster_id'
# Get number of digits in the max unique_cell_id
max_id = merged_cellprops_df['unique_cell_id'].max()
num_digits = len(str(max_id))

# Calculate multiplier to add a '1' followed by the right number of zeroes - prefix is specific for each experiment (e.g., repD:1, repE:2, etc.)
rep_prefix = 13
prefix = rep_prefix * (10** num_digits)  # e.g., if max_id = 30245 → prefix = 100000

# Apply prefix to all related DataFrames
merged_spots_df['unique_cell_id'] += prefix
merged_clusters_df['unique_cell_id'] += prefix
merged_cellprops_df['unique_cell_id'] += prefix
cell_level_results['unique_cell_id'] += prefix

# Repeat for unique_spot_id and unique_cluster_id
max_spot_id = merged_spots_df['unique_spot_id'].max()
spot_prefix = rep_prefix ** len(str(max_spot_id))
merged_spots_df['unique_spot_id'] += spot_prefix

max_cluster_id = merged_clusters_df['unique_cluster_id'].max()
cluster_prefix = rep_prefix ** len(str(max_cluster_id))
merged_clusters_df['unique_cluster_id'] += cluster_prefix

# Save all results to CSV
rep_string = 'DUSP1_O_Final'
output_dir = '/Users/ericron/Desktop/AngelFISH/Publications/Ron_2024/Classification'
os.makedirs(output_dir, exist_ok=True)
cell_level_results.to_csv(os.path.join(output_dir, f"{rep_string}_cell_level_results_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_spots_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_spots_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_clusters_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_clusters_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_cellprops_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_cellprops_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)

# instantiate sampler
sampler = SpotCropSampler(
    spots_df=merged_spots_df,
    clusters_df=merged_clusters_df,
    cellprops_df=merged_cellprops_df,
    mount_prefix="/Volumes/share"
)

# # — TEST RUN: only 3 displays, no files written —
# crops, meta = sampler.run(
#     save_dir=save_dir,
#     display=3,
#     save_individual=False,
#     save_summary=False,
#     pad=5,
#     cells_per_quad=1,
#     spots_per_cell=20,
#     spotChannel=0
# )

# choose a prefix for this experiment
prefix = f"DUSP1_O_Final_MG3_Abs4"

# full run, no per-spot files, but summary files with prefix
crops, meta = sampler.run(
    save_dir=save_dir,
    display=0,
    save_individual=False,
    save_summary=True,
    file_prefix=prefix,
    pad=5,
    cells_per_quad=1,
    spots_per_cell=20,
    spotChannel=0
)

    DUSP1 100nM Dex & 5µM TPL Time-sweep Replica 2

In [None]:
am = DUSP1AnalysisManager(location=loc, log_location=log_location, mac=True) 
am.select_analysis('DUSP1_P_Final2')

spots_df = am.select_datasets("spotresults", dtype="dataframe")
clusters_df = am.select_datasets("clusterresults", dtype="dataframe")
props_df = am.select_datasets("cell_properties", dtype="dataframe")

snr_df = SNRAnalysis(spots_df, props_df, clusters_df, abs_threshold=abs_threshold, mg_threshold=mg_threshold)

merged_spots_df, merged_clusters_df, merged_cellprops_df = snr_df.get_results()

# Create an instance of the DUSP1Measurement class.
dusp = DUSP1Measurement(merged_spots_df, merged_clusters_df, merged_cellprops_df)

# Process the data with a chosen threshold method
cell_level_results = dusp.measure(abs_threshold=abs_threshold, mg_threshold=mg_threshold)

# Add replica level unique IDs for 'unique_cell_id', 'unique_spot_id', and 'unique_cluster_id'
# Get number of digits in the max unique_cell_id
max_id = merged_cellprops_df['unique_cell_id'].max()
num_digits = len(str(max_id))

# Calculate multiplier to add a '1' followed by the right number of zeroes - prefix is specific for each experiment (e.g., repD:1, repE:2, etc.)
rep_prefix = 14
prefix = rep_prefix * (10** num_digits)  # e.g., if max_id = 30245 → prefix = 100000

# Apply prefix to all related DataFrames
merged_spots_df['unique_cell_id'] += prefix
merged_clusters_df['unique_cell_id'] += prefix
merged_cellprops_df['unique_cell_id'] += prefix
cell_level_results['unique_cell_id'] += prefix

# Repeat for unique_spot_id and unique_cluster_id
max_spot_id = merged_spots_df['unique_spot_id'].max()
spot_prefix = rep_prefix ** len(str(max_spot_id))
merged_spots_df['unique_spot_id'] += spot_prefix

max_cluster_id = merged_clusters_df['unique_cluster_id'].max()
cluster_prefix = rep_prefix ** len(str(max_cluster_id))
merged_clusters_df['unique_cluster_id'] += cluster_prefix

# Save all results to CSV
rep_string = 'DUSP1_P_Final'
output_dir = '/Users/ericron/Desktop/AngelFISH/Publications/Ron_2024/Classification'
os.makedirs(output_dir, exist_ok=True)
cell_level_results.to_csv(os.path.join(output_dir, f"{rep_string}_cell_level_results_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_spots_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_spots_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_clusters_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_clusters_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)
merged_cellprops_df.to_csv(os.path.join(output_dir, f"{rep_string}_merged_cellprops_df_MG{mg_threshold}_Abs{abs_threshold}_{date_str}.csv"), index=False)

# instantiate sampler
sampler = SpotCropSampler(
    spots_df=merged_spots_df,
    clusters_df=merged_clusters_df,
    cellprops_df=merged_cellprops_df,
    mount_prefix="/Volumes/share"
)

# # — TEST RUN: only 3 displays, no files written —
# crops, meta = sampler.run(
#     save_dir=save_dir,
#     display=3,
#     save_individual=False,
#     save_summary=False,
#     pad=5,
#     cells_per_quad=1,
#     spots_per_cell=20,
#     spotChannel=0
# )

# choose a prefix for this experiment
prefix = f"DUSP1_P_Final_MG3_Abs4"

# full run, no per-spot files, but summary files with prefix
crops, meta = sampler.run(
    save_dir=save_dir,
    display=0,
    save_individual=False,
    save_summary=True,
    file_prefix=prefix,
    pad=5,
    cells_per_quad=1,
    spots_per_cell=20,
    spotChannel=0
)

## Train Classifier

In [None]:
# Use interactive matplotlib backend
%matplotlib widget

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

In [None]:
class ManualLabeler:
    def __init__(self, crops_path, meta_path, output_path=None):
        self.crops = np.load(crops_path)
        self.meta = pd.read_csv(meta_path)
        self.replica_name = os.path.basename(meta_path).replace("_all_crop_metadata.csv", "")
        self.output_path = output_path or meta_path.replace(".csv", "_labeled.csv")

        if "manual_label" not in self.meta.columns:
            self.meta["manual_label"] = np.nan

        self.batch_size = 25
        self.current_idx = 0
        self.fig, self.axes = None, None
        self.rects = {}
        self.labels = {}

    def get_next_batch_indices(self):
        return self.meta[self.meta["manual_label"].isna()].index[:self.batch_size]

    def onclick(self, event):
        for i, ax in enumerate(self.axes.flat):
            if event.inaxes == ax:
                idx = self.indices[i]
                if event.button == 1:
                    self.labels[idx] = 1
                elif event.button == 3:
                    self.labels[idx] = 0
                self.update_border(i, self.labels[idx])
                break

    def update_border(self, plot_idx, label):
        ax = self.axes.flat[plot_idx]
        if self.rects.get(plot_idx):
            self.rects[plot_idx].remove()
        color = "green" if label == 1 else "red"
        self.rects[plot_idx] = ax.add_patch(Rectangle((0, 0), 1, 1, transform=ax.transAxes,
                                                    fill=False, edgecolor=color, linewidth=3))
        symbol = "[SPOT]" if label == 1 else "[NOT]"
        idx = self.indices[plot_idx]
        ax.set_title(f"Idx {idx}\n{symbol}")
        self.fig.canvas.draw_idle()

    def label_batch(self):
        self.indices = self.get_next_batch_indices()
        if len(self.indices) == 0:
            print("All crops labeled.")
            return

        self.labels = {}
        self.rects = {}

        self.fig, self.axes = plt.subplots(5, 5, figsize=(10, 10))
        self.fig.suptitle(f"Replica: {self.replica_name}  |  Left Click = [SPOT]  |  Right Click = [NOT]")
        cid = self.fig.canvas.mpl_connect("button_press_event", self.onclick)

        for i, idx in enumerate(self.indices):
            crop = self.crops[idx]
            ax = self.axes.flat[i]
            ax.imshow(crop, cmap="gray")
            ax.set_xticks([])
            ax.set_yticks([])
            ax.set_title(f"Idx {idx}\nNo label")

        for j in range(len(self.indices), 25):
            self.axes.flat[j].axis('off')

        plt.tight_layout()
        plt.show()

        # ✅ Wait for user confirmation in terminal/cell
        input("👆 Done labeling? Press ENTER here to save and continue...")

        self.fig.canvas.mpl_disconnect(cid)
        plt.close(self.fig)

        labeled_count = 0
        for plot_idx, ax in enumerate(self.axes.flat):
            if plot_idx >= len(self.indices):
                continue
            crop_idx = self.indices[plot_idx]
            if crop_idx in self.labels:
                self.meta.at[crop_idx, "manual_label"] = self.labels[crop_idx]
                labeled_count += 1

        self.meta.to_csv(self.output_path, index=False)

        if labeled_count > 0:
            print(f"✅ Saved {labeled_count} manual labels to: {self.output_path}")
        else:
            print("⚠️ No labels were recorded. Did you click any crops?")

In [None]:
# Example usage
crops_path = "/Volumes/share/Users/Eric/DUSP1_SpotCrops/DUSP1_D_Final_MG3_Abs4_all_crops.npy"
meta_path = "/Volumes/share/Users/Eric/DUSP1_SpotCrops/DUSP1_D_Final_MG3_Abs4_all_crop_metadata.csv"

labeler = ManualLabeler(crops_path, meta_path)

In [None]:
labeler.label_batch()

In [1]:
from ipywidgets import Button
from IPython.display import display

btn = Button(description="Test Button")

def on_click(b):
    print("✅ Button clicked!")

btn.on_click(on_click)
display(btn)

Button(description='Test Button', style=ButtonStyle())