# FISH - Pipeline - A Python interactive notebook to process FISH data

```
Author: Luis U. Aguilera
Contact Info: luis.aguilera@colostate.edu

Copyright (c) 2021 Munsky Group 
Colorado State University 
Licensed under BSD 3-Clause License.
```

### Notebook summary 


- Load a directory with microscope FISH images
- Establish a connection to Network-attached storage (NAS) using [pysmb](https://github.com/miketeo/pysmb)
- Perform cell segmentation using [Cellpose](https://github.com/MouseLand/cellpose)
- Perform spot detection using [Big-FISH](https://github.com/fish-quant/big-fish)
- Datamanagement using Pandas

----

### Libraries

In [None]:
# Importing libraries
import sys
import pathlib
import warnings
warnings.filterwarnings("ignore")

### Path to source directories

In [None]:
# Defining directories
current_dir = pathlib.Path().absolute()
fa_dir = current_dir.parents[1].joinpath('src')

# Importing fish_analyses module
sys.path.append(str(fa_dir))
import fish_analyses as fa

In [None]:
fa.Banner().print_banner()

----

# Parameters defined by the user 

----

### Paths to images

In [None]:
# FISH_ICC=(\
# 'smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_0min_062123' \
# 'smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_20min_062123' \
# 'smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_40min_062123' \
# 'smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_75min_062123' \
# 'smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_90min_062123' \
# 'smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_150min_062123' \
# )

# FISH_ICC_Masks=(\
# 'smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_0min_062123/masks_DUSP1_GR_Dex100nM_0min_062123___nuc_100__cyto_200.zip' \
# 'smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_20min_062123/masks_DUSP1_GR_Dex100nM_20min_062123___nuc_100__cyto_200.zip' \
# 'smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_40min_062123/masks_DUSP1_GR_Dex100nM_40min_062123___nuc_100__cyto_200.zip' \
# 'smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_75min_062123/masks_DUSP1_GR_Dex100nM_75min_062123___nuc_100__cyto_200.zip' \
# 'smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_90min_062123/masks_DUSP1_GR_Dex100nM_90min_062123___nuc_100__cyto_200.zip' \
# 'smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_150min_062123/masks_DUSP1_GR_Dex100nM_150min_062123___nuc_100__cyto_200.zip' \
# )

In [None]:
# Path to credentials
desktop_path = pathlib.Path.home()/'Desktop'
path_to_config_file = desktop_path.joinpath('config.yml')
# Path to images and masks
data_folder_path = pathlib.Path('smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_0min_062123')
path_to_masks_dir = pathlib.Path('smFISH_images/Eric_smFISH_images/20230626_DUSP1GR_DexStep/DUSP1_GR_Dex100nM_0min_062123/masks_DUSP1_GR_Dex100nM_0min_062123___nuc_100__cyto_200.zip')

In [None]:
diameter_nucleus=100                 # Approximate nucleus size in pixels
diameter_cytosol=200                 # Approximate cytosol size in pixels
psf_z=350                            # Theoretical size of the PSF emitted by a [rna] spot in the z plan, in nanometers.
psf_yx=160                           # Theoretical size of the PSF emitted by a [rna] spot in the yx plan, in nanometers.
voxel_size_z=500                     # Microscope conversion px to nanometers in the z axis.
voxel_size_yx=160                    # Microscope conversion px to nanometers in the xy axis.
channels_with_nucleus=[3]                  # Channel to pass to python for nucleus segmentation
channels_with_cytosol=[2]                 # Channel to pass to python for cytosol segmentation
channels_with_FISH=[0,1]                   # Channel to pass to python for spot detection
send_data_to_NAS=0                   # If data sent back to NAS use 1.
download_data_from_NAS=1             # Download data from NAS
#path_to_masks_dir='None'             # 'Test/test_dir/masks_test_dir___nuc_120__cyto_220.zip'
save_all_images=0                    # If true, it shows a all planes for the FISH plot detection.
threshold_for_spot_detection=None  # Thresholds for spot detection. Use an integer for a defined value, or 'None' for automatic detection.
save_filtered_images=0               #         
optimization_segmentation_method='default' # optimization_segmentation_method = 'default' 'intensity_segmentation' 'z_slice_segmentation_marker', 'gaussian_filter_segmentation' , None
remove_z_slices_borders=0       # Use this flag to remove 2 z-slices from the top and bottom of the stack. This is needed to remove z-slices that are out of focus.
remove_out_of_focus_images=0         # Flag to remove out of focus images
save_pdf_report=1
# ######### Parameters to reformat images to standard format ########
convert_to_standard_format=0
number_color_channels=0
number_of_fov=0
show_plots=1

### Parameters for segmentation and spot detection

In [None]:
# diameter_nucleus=100                         # Approximate nucleus size in pixels
# diameter_cytosol=180                         # Approximate cytosol size in pixels
# psf_z=350                                    # Theoretical size of the PSF emitted by a [rna] spot in the z plan, in nanometers
# psf_yx=160                                   # Theoretical size of the PSF emitted by a [rna] spot in the yx plan, in nanometers
# voxel_size_z=500                             # Microscope conversion px to nanometers in the z axis.
# voxel_size_yx=160                            # Microscope conversion px to nanometers in the xy axis.
# channels_with_nucleus= [2]                   # Channel to pass to python for nucleus segmentation
# channels_with_cytosol=[1]                    # Channel to pass to python for cytosol segmentation
# channels_with_FISH=[0]                       # Channel to pass to python for spot detection
# optimization_segmentation_method='default'   # optimization_segmentation_method = 'default', 'intensity_segmentation' 'z_slice_segmentation', 'gaussian_filter_segmentation' , None
# save_all_images=0                            # To save all images for all the z-slices
# save_filtered_images = False                 # To save filtered images.
number_of_images_to_process = None           # Use an integer to process a subset of images. None is used to indicate that all images need to be processed.
minimum_spots_cluster = 4                    # Minimal value used to define a cluster.
# threshold_for_spot_detection=None            # To select an specific threshold for spot detection. The default is None, and use automated threshold detection.
# show_plots=1                                 # Flag to show plots
# send_data_to_NAS=0                           # If data sent back to NAS use 1
# download_data_from_NAS=1                     # If data downloaded from NAS use 1
# remove_z_slices_borders=0                    # Use this flag to remove 2 z-slices from the top and bottom of the stack. This is needed to remove z-slices that are out of focus.
# save_pdf_report=False


## Downloading data from NAS

In [None]:
%%capture
# Download data from NAS
convert_to_standard_format = False
if convert_to_standard_format == False:
    local_data_dir, masks_dir, _, _, _,list_images = fa.Utilities().read_images_from_folder( path_to_config_file, data_folder_path, path_to_masks_dir,  download_data_from_NAS)
    number_color_channels = list_images[0].shape[-1]
else:
    ####################################################################
    #### Parameters to reformat images from the terminator scope ####
    number_color_channels = 0                    # Only use if it is needed to convert to standard format
    number_of_fov = 0                            # Only use if it is needed to convert to standard format     
    ####################################################################
    local_data_dir, masks_dir, _, _, _= fa.Utilities().convert_to_standard_format(data_folder_path=data_folder_path, 
                                                                                path_to_config_file=path_to_config_file, 
                                                                                download_data_from_NAS = download_data_from_NAS,
                                                                                number_color_channels=number_color_channels)

## Running the pipeline

----

In [None]:
dataframe_FISH,_,_,_,output_identification_string = fa.PipelineFISH(local_data_dir, channels_with_cytosol, channels_with_nucleus, channels_with_FISH,diameter_nucleus, 
                                       diameter_cytosol, minimum_spots_cluster, masks_dir=masks_dir,  voxel_size_z=voxel_size_z,
                                       voxel_size_yx=voxel_size_yx ,psf_z=psf_z,psf_yx=psf_yx, show_plots=show_plots,  
                                       file_name_str=data_folder_path.name, 
                                       optimization_segmentation_method=optimization_segmentation_method,
                                       save_all_images=save_all_images,
                                       threshold_for_spot_detection=threshold_for_spot_detection,
                                       save_filtered_images=save_filtered_images,
                                       number_of_images_to_process=number_of_images_to_process,
                                       remove_z_slices_borders=remove_z_slices_borders,
                                       save_pdf_report=save_pdf_report).run()

## Extracting data from the dataframe

----

In [None]:
# Be aware that the code keeps track of cells without spots. In the data frame, the fields for cells without spots are populated with -1 values.
dataframe_FISH.head()

## Plots

In [None]:
list_files_distributions = fa.Plots().plot_all_distributions(dataframe_FISH,channels_with_cytosol, channels_with_nucleus,channels_with_FISH,minimum_spots_cluster,output_identification_string )

In [None]:
file_plots_bleed_thru = fa.Plots().plot_scatter_bleed_thru(dataframe_FISH, channels_with_cytosol, channels_with_nucleus,output_identification_string)

In [None]:
# plots for cytosol nucleus intensity ratios
if not fa.Utilities().is_None(channels_with_cytosol):
    file_plots_int_ratio = fa.Plots().plot_nuc_cyto_int_ratio_distributions(dataframe_FISH,output_identification_string=None,plot_for_pseudo_cytosol=False)
else:
    file_plots_int_ratio = None
file_plots_int_pseudo_ratio = fa.Plots().plot_nuc_cyto_int_ratio_distributions(dataframe_FISH,output_identification_string=None,plot_for_pseudo_cytosol=True)


## Saving data

In [None]:
# Saving data and plots, and sending data to NAS
fa.Utilities().save_output_to_folder(output_identification_string, data_folder_path, 
                                   list_files_distributions=list_files_distributions,
                                   file_plots_bleed_thru=file_plots_bleed_thru,
                                   file_plots_int_ratio=file_plots_int_ratio,
                                   file_plots_int_pseudo_ratio=file_plots_int_pseudo_ratio,
                                   channels_with_FISH=channels_with_FISH,save_pdf_report=save_pdf_report)

# sending data to NAS
analysis_folder_name, mask_dir_complete_name = fa.Utilities().sending_data_to_NAS(output_identification_string, data_folder_path, path_to_config_file, path_to_masks_dir, diameter_nucleus, diameter_cytosol, send_data_to_NAS, masks_dir)

# Moving the complete analysis folder to final analyses folder 
fa.Utilities().move_results_to_analyses_folder( output_identification_string, data_folder_path, mask_dir_complete_name, path_to_masks_dir, save_filtered_images, download_data_from_NAS )

In [1]:
raise

RuntimeError: No active exception to reraise

In [None]:
# Calculating ts intensity
dataframe = dataframe_FISH.copy()
dataframe.columns

In [None]:
dataframe.head()

In [None]:
dataframe.loc[ (dataframe['is_cluster']==1) ]

In [None]:
# Extracting TS intensity. for all cells

number_cells = dataframe['cell_id'].nunique()

minimum_spots_cluster = 2

ts_size =  dataframe.loc[ (dataframe['is_cluster']==True) & (dataframe['is_nuc']==True) &  (dataframe['cluster_size']>=minimum_spots_cluster)  & (dataframe['is_cell_fragmented']!=-1)   ].cluster_size.values

ts_size

In [None]:
#list_ts = sorted(dataframe.loc[ (dataframe['cell_id']==df_index) & (dataframe['is_cluster']==True) &   (dataframe['cluster_size']>=minimum_spots_cluster)  ].cluster_size.values, reverse=True)
#min_length = min(len(ts_array), len(list_ts))
#ts_array[:min_length] = list_ts[:min_length]
# Size of the nucleus of each cell
#nuc_area = dataframe.loc[   (dataframe['cell_id']==df_index) ].nuc_area_px.values[0]   