## DUSP1 Confirmation Notebook
The purpose of this notebook is to:
1. Confirm successful segmentation.
2. Confirm successful BigFISH spot and cluster detection.
3. Refine spots and clusters through additional filtering (SNR) for gating and final dataframe preparation:  
    a. Test predefined SNR thresholds.  
    b. Test weighted SNR tresholding    
    c. Filter `df_spots` with snr threshold if needed.  
    d. Create final dataframes ('df_cellspots').    
    e. Save the dataframes.

In [None]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import dask.array as da
import os
import sys
import logging


logging.getLogger('matplotlib.font_manager').disabled = True
numba_logger = logging.getLogger('numba')
numba_logger.setLevel(logging.WARNING)

matplotlib_logger = logging.getLogger('matplotlib')
matplotlib_logger.setLevel(logging.WARNING)

src_path = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
print(src_path)
sys.path.append(src_path)

from src.Analysis import AnalysisManager, Analysis, SpotDetection_SNRConfirmation, Spot_Cluster_Analysis_WeightedSNR, GR_Confirmation

# Use the log file to search for analyses

In [None]:
loc = None 
log_location = r'/Volumes/share/Users/Eric/AngelFISH_data'  #  r'/Volumes/share/Users/Jack/All_Analysis'
am1 = AnalysisManager(location=loc, log_location=log_location, mac=True) 

In [None]:
# list all analysis done 
all_analysis_names = am1.list_analysis_names()
print("All discovered analyses:", all_analysis_names)

In [None]:
# DUSP1 100nM Dex 3hr Time-sweep Replica 1
am1.select_analysis('DUSP1_D_Jan2125')
am1.list_datasets()

## Analysis/confirmation

In [6]:
# Initiate the class
SD1 = Spot_Cluster_Analysis_WeightedSNR(am1)
# Load the data
SD1.get_data()

In [None]:
# Display Segmentation, BF_spotdetection, SNR thresholding (basic and weighted), Summary Stats and plots
SD1.display(newFOV=True, newCell=True)

## Filter the data to remove partial cells and high-noise spots

In [8]:
# Remove partial cells from dataset
SD1.cellprops = SD1.cellprops[SD1.cellprops['touching_border'] == 0]

# Remove spots that are less than the weighted snr threshold
SD1.spots = SD1.spots[SD1.spots['keep_wsnr']]

In [9]:
# Create unique cell id for every cell
SD1.cellprops['unique_cell_id'] = np.arange(len(SD1.cellprops))

# Merge the spots and clusters dataframes by the unique cell ID
SD1.spots = SD1.spots.merge(SD1.cellprops[['NAS_location', 'cell_label', 'fov', 'unique_cell_id']], 
                            on=['NAS_location', 'cell_label', 'fov'], 
                            how='left')
SD1.clusters = SD1.clusters.merge(SD1.cellprops[['NAS_location', 'cell_label', 'fov', 'unique_cell_id']], 
                            on=['NAS_location', 'cell_label', 'fov'], 
                            how='left')

In [10]:
def measure(spots, clusters, props) -> pd.DataFrame:
    results = pd.DataFrame(columns=['cell_id', 'num_ts', 'num_spots_ts', 'num_foci', 'num_spots_foci', 'num_spots', 'num_nuc_spots', 'num_cyto_spots', 
                                    'nuc_area_px', 'cyto_area_px', 'avg_nuc_int', 'avg_cyto_int', 'time', 'Dex_conc', 'replica'])
    
    # Sort spots, clusters, and props by unique_cell_id
    spots = spots.sort_values(by='unique_cell_id')
    clusters = clusters.sort_values(by='unique_cell_id')
    props = props.sort_values(by='unique_cell_id')

    # unique cell id
    cell_ids = props['unique_cell_id']

    # num of ts
    num_ts = clusters[clusters['is_nuc'] == 1].groupby('unique_cell_id').size().reindex(cell_ids, fill_value=0)

    # num of foci
    num_foci = clusters[clusters['is_nuc'] == 0].groupby('unique_cell_id').size().reindex(cell_ids, fill_value=0)

    # num of ts spots
    num_spots_ts = clusters[clusters['is_nuc'] == 1].groupby('unique_cell_id')['nb_spots'].sum().reindex(cell_ids, fill_value=0)

    # num of foci spots
    num_spots_foci = clusters[clusters['is_nuc'] == 0].groupby('unique_cell_id')['nb_spots'].sum().reindex(cell_ids, fill_value=0)

    # num of spots
    num_spots = spots.groupby('unique_cell_id').size().reindex(cell_ids, fill_value=0)

    # num of spot in nuc
    num_nuc_spots = spots[spots['is_nuc'] == 1].groupby('unique_cell_id').size().reindex(cell_ids, fill_value=0)

    # num of spot in cyto 
    num_cyto_spots = spots[spots['is_nuc'] == 0].groupby('unique_cell_id').size().reindex(cell_ids, fill_value=0)

    # nuc area
    nuc_area = props['nuc_area']

    # cyto area
    cyto_area = props['cyto_area']

    # avg int nuc
    avg_nuc_int = props['nuc_intensity_mean-0']
    
    # avg int cyto
    avg_cyto_int = props['cyto_intensity_mean-0']

    # time (experiment)
    time = props['time'] 

    # Dex conc
    dex_conc = props['Dex_Conc']

    # Replica
    replica = spots['replica']

    results['cell_id'] = cell_ids
    results['num_ts'] = num_ts.values
    results['num_foci'] = num_foci.values
    results['num_spots_ts'] = num_spots_ts.values
    results['num_spots_foci'] = num_spots_foci.values
    results['num_spots'] = num_spots.values
    results['num_nuc_spots'] = num_nuc_spots.values
    results['num_cyto_spots'] = num_cyto_spots.values
    results['nuc_area_px'] = nuc_area.values
    results['cyto_area_px'] = cyto_area.values
    results['avg_nuc_int'] = avg_nuc_int.values
    results['avg_cyto_int'] = avg_cyto_int.values
    results['time'] = time.values
    results['Dex_conc'] = dex_conc.values
    results['replica'] = replica.values

    return results

In [11]:
DUSP1_TS_R1_df = measure(SD1.spots, SD1.clusters, SD1.cellprops)

In [12]:
# Ensure num_spots = num_nuc_spots + num_cyto_spots for all rows
assert (DUSP1_TS_R1_df['num_spots'] == DUSP1_TS_R1_df['num_nuc_spots'] + DUSP1_TS_R1_df['num_cyto_spots']).all(), "Mismatch in spot counts"

In [None]:
DUSP1_TS_R1_df

In [14]:
# TODO move to section before removing cells 

# SD1.cellspots = SD1.cellspots.merge(SD1.cellprops[['NAS_location', 'cell_label', 'fov', 'unique_cell_id']], 
#                                     left_on=['NAS_location', 'cell_id', 'fov'], 
#                                     right_on=['NAS_location', 'cell_label', 'fov'], 
#                                     how='left')


# # Align indices before performing the assertion
# if 'cell_id' in DUSP1_TS_R1_df.keys():
#     DUSP1_TS_R1_df = DUSP1_TS_R1_df.set_index('cell_id')

# if 'unique_cell_id' in SD1.cellspots.keys():
#     SD1.cellspots = SD1.cellspots.set_index('unique_cell_id')

# aligned_nb_rna = SD1.cellspots['nb_rna']

# aligned_num_spots = DUSP1_TS_R1_df['num_spots']
# aligned_num_spots = aligned_num_spots - DUSP1_TS_R1_df['num_spots_ts']

# # Ensure aligned_num_spots only contains indices present in aligned_nb_rna
# aligned_num_spots = aligned_num_spots.loc[SD1.cellspots.index]

# not_close_indices = np.where(~np.isclose(aligned_nb_rna, aligned_num_spots, rtol = 0.05))[0]
# print("Indices where nb_rna and num_spots are not close:", len(not_close_indices))

# assert len(not_close_indices) == 0, "Mismatch in nb_rna and num_spots counts"


# print(f"{'cell_id':<10} {'my counting':<30} {'bigfish counting':<30} {'corrected':<30} {'ts spots':<30} {'foci spots':<30}")
# for cell_id, my, bf, cr, ts, foci in zip(aligned_nb_rna.iloc[not_close_indices].index, cell_spots.loc[aligned_nb_rna.index]['num_spots'].iloc[not_close_indices], 
#                                          aligned_nb_rna.iloc[not_close_indices], aligned_num_spots.iloc[not_close_indices], 
#                                          cell_spots.loc[aligned_nb_rna.index]['num_spots_ts'].iloc[not_close_indices], cell_spots.loc[aligned_nb_rna.index]['num_spots_foci'].iloc[not_close_indices] ):
#     print(f"{cell_id:<10} {my:<30} {bf:<30} {cr:<30} {ts:<30} {foci:<30}")

In [17]:
am1.close()