In [None]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import dask.array as da
import os
import sys
import logging


logging.getLogger('matplotlib.font_manager').disabled = True
numba_logger = logging.getLogger('numba')
numba_logger.setLevel(logging.WARNING)

matplotlib_logger = logging.getLogger('matplotlib')
matplotlib_logger.setLevel(logging.WARNING)

src_path = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
print(src_path)
sys.path.append(src_path)

from src.Analysis import Analysis, AnalysisManager, GR_Confirmation, SpotDetection_Confirmation

Loads in the data from specified location

In [2]:
loc = None # r'C:\Users\formanj\Downloads\DUSP1_Dex_0min_20220224.h5' # r"Y:\smFISH_images\Eric_smFISH_images\20230511\GR_ICC_3hr_R1_1nM_0min_050823\GR_ICC_3hr_R1_1nM_0min_050823.h5"
log_location = r'\\munsky-nas.engr.colostate.edu\share\Users\Jack\All_Analysis'
am = AnalysisManager(location=loc, log_location=log_location) # change this to your mounted drive location for this folder
# This analysis is best done with the NAS mounted 

In [None]:
# list all analysis done 
am.list_analysis_names()

In [None]:
# can filter on name and dates
am.select_analysis('ER_Dec0324_DUSP1', ['2025-01-06', '2026-01-01'])
print(am.analysis)

In [None]:
am.list_datasets()

Does analysis/confirmation

In [6]:
# select DUSP1 spot detection
SD = SpotDetection_Confirmation(am)

In [7]:
# this loads the data into memory 
SD.get_data()

In [None]:
# run this multiple times to see a new randomly selected cell
SD.display(newFOV=False, newCell=True)

In [None]:
SD.set_relationships()

In [None]:
SD.spots

In [None]:
SD.clusters

In [None]:
SD.cellprops


In [None]:
SD.cellspots

In [None]:
# find cells that have props but arent in the cell spots
allcells = SD.cellprops
cells_wSpots = SD.cellspots
# Find cells that are in allcells but not in cells_wSpots
merged = allcells.merge(cells_wSpots, how='left', left_on=['nuc_label', 'fov', 'NAS_location'], right_on=['cell_id', 'fov', 'NAS_location'], indicator=True)
print(merged.shape)
same_entries = merged[merged['_merge'] == 'both'].drop(columns=['cell_id', '_merge'])
different_entries = merged[merged['_merge'] == 'left_only'].drop(columns=['cell_id', '_merge'])

print("Same entries:")
print(same_entries.shape)
print("\nDifferent entries:")
print(different_entries.shape)

In [None]:
import random
import dask.array as da
print(f'There are {allcells.shape[0]} cells in this data set')
print(f'There are {cells_wSpots.shape[0]} cells with spots')

# how many are have bounded boxes touching the border
print(f'{different_entries['touching_border'].sum()} cells are touching the border and are not counted')

# Select a random row from the different_entries dataframe
for _ in range(2):
    random_row = different_entries[~different_entries['touching_border']].sample(n=1).iloc[0]

    # Read the h5 file
    h5_file = random_row['NAS_location']
    h5_file = os.path.join(r'\\munsky-nas.engr.colostate.edu\share', h5_file) # TODO this will need to be updated so you dont have to find it to get it to work
    with h5py.File(h5_file, 'r') as f:
        # Grab the mask and raw_image
        masks = da.from_array(f['/masks'])
        raw_images = da.from_array(f['/raw_images'])

        # Extract the bounding box coordinates
        bbox = [random_row['cell_bbox-0'], random_row['cell_bbox-1'], random_row['cell_bbox-2'], random_row['cell_bbox-3']]

        img = raw_images[random_row['fov'], random_row['timepoint_x']].squeeze()
        for c in range(img.shape[0]):
            # Display the raw image with the selected cell highlighted
            t = np.max(img[c, :, :,:], axis=0)
            t.compute()
            fig, ax = plt.subplots(1, 1, figsize=(10, 10))
            ax.imshow(t, cmap='gray')
            rect = plt.Rectangle((bbox[1], bbox[0]), bbox[3] - bbox[1], bbox[2] - bbox[0], edgecolor='r', facecolor='none')
            ax.add_patch(rect)
            plt.show()

In [None]:
# Histogram of spots
keys_to_plot = ['signal', 'snr']
for k in SD.spots.keys():
    if k in keys_to_plot:
        # Plot histogram for 'area'
        plt.figure(figsize=(10, 5))
        plt.hist(SD.spots[k], bins=200, density=True)
        plt.ylabel('Frequency')
        plt.title(f'Histogram of {k}')
        plt.legend()
        plt.show()

In [None]:
SD.cellspots.keys()

In [None]:
# spot counts as a function of time and dex
keys_to_plot = ['nb_rna', 'nb_rna_in_nuc']


tp_set = sorted(set(SD.cellspots['time']))
dex_set = sorted(set(SD.cellspots['Dex_Conc']))
for k in keys_to_plot:
    fig, axs = plt.subplots(len(tp_set), len(dex_set), figsize=(15, 15))
    fig.suptitle(f'{k} as a function of time and dex', fontsize=16)
    for i_d, d in enumerate(dex_set):
        data = SD.cellspots[SD.cellspots['Dex_Conc'] == d]
        for i_t, t in enumerate(tp_set):
            temp = data[data['time'] == t]
            mean_val = temp[k].mean()
            std_val = temp[k].std()
            if d == 0 and t == 0:
                for ax in axs[i_t, :]:
                    ax.hist(temp[k], bins=200, density=True)
                    ax.axvline(mean_val, color='r', linestyle='solid', linewidth=2)
                    ax.axvline(mean_val + std_val, color='g', linestyle='dashed', linewidth=1)
                    ax.axvline(mean_val - std_val, color='g', linestyle='dashed', linewidth=1)
                    ax.set_xlim([0, SD.cellspots[k].max()])
                    ax.grid(True)  # Turn on grid lines
                    if i_t != len(tp_set) - 1:
                        axs[i_t, i_d].set_xticks([])
                    ax.set_yticks([])
                axs[i_t, 0].set_ylabel(f'Time: {t}')
            else:
                axs[i_t, i_d].hist(temp[k], bins=200, density=True)
                axs[i_t, i_d].axvline(mean_val, color='r', linestyle='solid', linewidth=2)
                axs[i_t, i_d].axvline(mean_val + std_val, color='g', linestyle='dashed', linewidth=1)
                axs[i_t, i_d].axvline(mean_val - std_val, color='g', linestyle='dashed', linewidth=1)
                axs[i_t, i_d].set_xlim([0, SD.cellspots[k].max()])
                axs[i_t, i_d].grid(True)  # Turn on grid lines
                if i_t != len(tp_set) - 1:
                    axs[i_t, i_d].set_xticks([])
                axs[i_t, i_d].set_yticks([])
                axs[i_t, 0].set_ylabel(f'Time: {t}')
                axs[0, i_d].set_title(f'Dex: {d}')
    plt.show()



In [18]:
SD.close()

In [19]:
am.close()