##  GR DUSP1 Gating Notebook

The Purpose of this notebook is:
1) Load in all analyisis for final dataframe preparation 
2) Filter DUSP1 data to remove partial cells and low SNR spots
3) Filter GR data to remove partial cells
4) Estimate GR cytoplasmic area from DUPS1 data
5) GR intensity to molecular counts 
6) Concatonate final GR and DUSP1 dataframes

In [None]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import dask.array as da
import os
import sys
import logging


logging.getLogger('matplotlib.font_manager').disabled = True
numba_logger = logging.getLogger('numba')
numba_logger.setLevel(logging.WARNING)

matplotlib_logger = logging.getLogger('matplotlib')
matplotlib_logger.setLevel(logging.WARNING)

src_path = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
print(src_path)
sys.path.append(src_path)

from src.Analysis import AnalysisManager, Analysis, SpotDetection_SNRConfirmation, Spot_Cluster_Analysis_WeightedSNR, GR_Confirmation

In [3]:
# Measure function to create final DUSP1 dataframe

def measure_DUSP1(spots, clusters, props) -> pd.DataFrame:
    results = pd.DataFrame(columns=['cell_id', 'num_ts', 'num_spots_ts', 'num_foci', 'num_spots_foci', 'num_spots', 'num_nuc_spots', 'num_cyto_spots', 
                                    'nuc_area_px', 'cyto_area_px', 'avg_nuc_int', 'avg_cyto_int', 'time', 'Dex_conc', 'replica'])
    
    # Sort spots, clusters, and props by unique_cell_id
    spots = spots.sort_values(by='unique_cell_id')
    clusters = clusters.sort_values(by='unique_cell_id')
    props = props.sort_values(by='unique_cell_id')

    # unique cell id
    cell_ids = props['unique_cell_id']

    # num of ts
    num_ts = clusters[clusters['is_nuc'] == 1].groupby('unique_cell_id').size().reindex(cell_ids, fill_value=0)

    # num of foci
    num_foci = clusters[clusters['is_nuc'] == 0].groupby('unique_cell_id').size().reindex(cell_ids, fill_value=0)

    # num of ts spots
    num_spots_ts = clusters[clusters['is_nuc'] == 1].groupby('unique_cell_id')['nb_spots'].sum().reindex(cell_ids, fill_value=0)

    # num of foci spots
    num_spots_foci = clusters[clusters['is_nuc'] == 0].groupby('unique_cell_id')['nb_spots'].sum().reindex(cell_ids, fill_value=0)

    # num of spots
    num_spots = spots.groupby('unique_cell_id').size().reindex(cell_ids, fill_value=0)

    # num of spot in nuc
    num_nuc_spots = spots[spots['is_nuc'] == 1].groupby('unique_cell_id').size().reindex(cell_ids, fill_value=0)

    # num of spot in cyto 
    num_cyto_spots = spots[spots['is_nuc'] == 0].groupby('unique_cell_id').size().reindex(cell_ids, fill_value=0)

    # nuc area
    nuc_area = props['nuc_area']

    # cyto area
    cyto_area = props['cyto_area']

    # avg int nuc
    avg_nuc_int = props['nuc_intensity_mean-0']
    
    # avg int cyto
    avg_cyto_int = props['cyto_intensity_mean-0']

    # time (experiment)
    time = props['time'] 

    # Dex conc
    dex_conc = props['Dex_Conc']

    # Replica
    replica = spots.groupby('unique_cell_id')['replica'].first().reindex(cell_ids, fill_value=np.nan)

    results['cell_id'] = cell_ids
    results['num_ts'] = num_ts.values
    results['num_foci'] = num_foci.values
    results['num_spots_ts'] = num_spots_ts.values
    results['num_spots_foci'] = num_spots_foci.values
    results['num_spots'] = num_spots.values
    results['num_nuc_spots'] = num_nuc_spots.values
    results['num_cyto_spots'] = num_cyto_spots.values
    results['nuc_area_px'] = nuc_area.values
    results['cyto_area_px'] = cyto_area.values
    results['avg_nuc_int'] = avg_nuc_int.values
    results['avg_cyto_int'] = avg_cyto_int.values
    results['time'] = time.values
    results['Dex_conc'] = dex_conc.values
    results['replica'] = replica.values

    return results

def measure_GR(cellprops) -> pd.DataFrame:
    results = pd.DataFrame(columns=['cell_id', 'nuc_area', 'cyto_area', 'normgrnuc', 'normgrcyt', 'nucGRint', 'cytoGRint', 'time', 'Dex_conc', 'replica'])
    
    # Sort cellprops by unique_cell_id
    props = cellprops.sort_values(by='unique_cell_id')

    # unique cell id
    cell_ids = props['unique_cell_id']

    # nuc area
    nuc_area = props['nuc_area']

    # cyto area
    cyto_area = props['cyto_area']

    # avg int nuc
    nucGRint = props['nuc_intensity_mean-0']
    
    # avg int cyto
    cytoGRint = props['cyto_intensity_mean-0']

    # time (experiment)
    time = props['time'] 

    # Dex conc
    dex_conc = props['Dex_Conc']

    # Replica
    replica = spots.groupby('unique_cell_id')['replica'].first().reindex(cell_ids, fill_value=np.nan)

    results['cell_id'] = cell_ids
    results['nuc_area_px'] = nuc_area.values
    results['cyto_area_px'] = cyto_area.values
    results['nucGRint'] = nucGRint.values
    results['cytoGRint'] = cytoGRint.values
    results['time'] = time.values
    results['Dex_conc'] = dex_conc.values
    results['replica'] = replica.values

    return results    

# Use the log file to search for analyses

In [6]:
loc = None 
log_location = r'/Volumes/share/Users/Eric/AngelFISH_data'  # r'/Volumes/share/Users/Jack/All_Analysis' 
am1 = AnalysisManager(location=loc, log_location=log_location, mac=True) 

In [None]:
# list all analysis done 
all_analysis_names = am1.list_analysis_names()
print("All discovered analyses:", all_analysis_names)

In [None]:
# GR 1, 10, 100nM Dex 3hr Time-sweep Replica 1
am1.select_analysis('GR_A_Feb0425')
# GR 1, 10, 100nM Dex 3hr Time-sweep Replica 2
am2.select_analysis('GR_B_Feb0425')
# GR 1, 10, 100nM Dex 3hr Time-sweep Replica 3
am3.select_analysis('GR_C_Feb0425')

# DUSP1 100nM Dex 3hr Time-sweep Replica 1
am4.select_analysis('DUSP1_D_020335')
# DUSP1 100nM Dex 3hr Time-sweep Replica 2
am5.select_analysis('DUSP1_E_Jan2725')
# DUSP1 100nM Dex 3hr Time-sweep Replica 3
am6.select_analysis('DUSP1_F_Jan2725')
# DUSP1 100nM Dex 3hr Time-sweep Replica 4
am7.select_analysis('DUSP1_M_020335')
# DUSP1 100nM Dex 3hr Time-sweep Replica 5
am8.select_analysis('')

# DUSP1 75min Concentration-sweep Replica 1
am9.select_analysis('DUSP1_G_Jan2725')
# DUSP1 75min Concentration-sweep Replica 2
am10.select_analysis('DUSP1_H_Jan2725')
# DUSP1 75min Concentration-sweep Replica 3
am11.select_analysis('DUSP1_I_Jan2725')

# DUSP1 0.3, 1, 10nM Dex 3hr Time-sweep Replica 1
am12.select_analysis('')
# DUSP1 0.3, 1, 10nM Dex 3hr Time-sweep Replica 2
am13.select_analysis('DUSP1_K_Jan2725')
# DUSP1 0.3, 1, 10nM Dex 3hr Time-sweep Replica 2
am14.select_analysis('DUSP1_L_Jan2725')

## Analysis/confirmation

In [9]:
# Initiate the class
SD1 = Spot_Cluster_Analysis_WeightedSNR(am1)
# Load the data
SD1.get_data()

In [None]:
# Display Segmentation, BF_spotdetection, SNR thresholding (basic and weighted), Summary Stats and plots
SD1.display(newFOV=True, newCell=True)

## Filter DUPS1 data to remove partial cells and low SNR spots

In [11]:
# Remove partial cells from dataset
SD1.cellprops = SD1.cellprops[SD1.cellprops['touching_border'] == 0]

# Remove spots that are less than the weighted snr threshold
SD1.spots = SD1.spots[SD1.spots['keep_wsnr']]

# Create unique cell id for every cell
SD1.cellprops['unique_cell_id'] = np.arange(len(SD1.cellprops))

# Merge the spots and clusters dataframes by the unique cell ID
SD1.spots = SD1.spots.merge(SD1.cellprops[['NAS_location', 'cell_label', 'fov', 'unique_cell_id']], 
                            on=['NAS_location', 'cell_label', 'fov'], 
                            how='left')
SD1.clusters = SD1.clusters.merge(SD1.cellprops[['NAS_location', 'cell_label', 'fov', 'unique_cell_id']], 
                            on=['NAS_location', 'cell_label', 'fov'], 
                            how='left')

In [12]:
DUSP1_Dex_TS_R1 = measure_DUSP1(SD1.spots, SD1.clusters, SD1.cellprops)
am1.close()