# FISH - Pipeline - A Python interactive notebook to process FISH data

```
Author: Luis U. Aguilera
Contact Info: luis.aguilera@colostate.edu

Copyright (c) 2021 Munsky Group 
Colorado State University 
Licensed under BSD 3-Clause License.
```

### Notebook summary 


- Load a directory with microscope FISH images
- Establish a connection to Network-attached storage (NAS) using [pysmb](https://github.com/miketeo/pysmb)
- Perform cell segmentation using [Cellpose](https://github.com/MouseLand/cellpose)
- Perform spot detection using [Big-FISH](https://github.com/fish-quant/big-fish)
- Datamanagement using Pandas

----

<img src= /home/luisub/Desktop/FISH_Processing/docs/images/code_architecture.png alt="drawing" width="1000"/>

### Libraries

In [None]:
# Importing libraries
import sys
import pathlib
import warnings
warnings.filterwarnings("ignore")

### Path to source directories

In [None]:
# Defining directories
current_dir = pathlib.Path().absolute()
fa_dir = current_dir.parents[0].joinpath('src')

# Importing fish_analyses module
sys.path.append(str(fa_dir))
import fish_analyses as fa

In [None]:
fa.Banner().print_banner()

----

# Parameters defined by the user 

----

### Paths to images

In [None]:
# Path to credentials
desktop_path = pathlib.Path.home()/'Desktop'
path_to_config_file = desktop_path.joinpath('config.yml')
# Path to images and masks
#data_folder_path =pathlib.Path('smFISH_images/Eric_smFISH_images/20220131/DUSP1_Dex_75min')
#path_to_masks_dir = None 

#data_folder_path =pathlib.Path('smFISH_images/Linda_smFISH_images/Confocal/20220714/MS2-CY5_Cyto543_560_5hTPL_5uM')
#path_to_masks_dir = pathlib.Path('smFISH_images/Linda_smFISH_images/Confocal/20220714/MS2-CY5_Cyto543_560_5hTPL_5uM/masks_MS2-CY5_Cyto543_560_5hTPL_5uM___nuc_70__cyto_0.zip') 

In [None]:

time_point = '18min' # 'wo' '18min', '5h'

if time_point == 'wo':
    data_folder_path = pathlib.Path('smFISH_images/Linda_smFISH_images/Confocal/20220714/MS2-CY5_Cyto543_560_woStim')
    path_to_masks_dir = pathlib.Path('smFISH_images/Linda_smFISH_images/Confocal/20220714/MS2-CY5_Cyto543_560_woStim/masks_MS2-CY5_Cyto543_560_woStim___nuc_70__cyto_0.zip') 
    #name_merged_mask_tensors = 'polygons_wo'

if time_point == '18min':
    data_folder_path = pathlib.Path('smFISH_images/Linda_smFISH_images/Confocal/20220714/MS2-CY5_Cyto543_560_18minTPL_5uM')
    path_to_masks_dir = pathlib.Path('smFISH_images/Linda_smFISH_images/Confocal/20220714/MS2-CY5_Cyto543_560_18minTPL_5uM/masks_MS2-CY5_Cyto543_560_18minTPL_5uM___nuc_70__cyto_0.zip') 
    #name_merged_mask_tensors = 'polygons_18min'
    
if time_point == '5h':
    data_folder_path = pathlib.Path('smFISH_images/Linda_smFISH_images/Confocal/20220714/MS2-CY5_Cyto543_560_5hTPL_5uM')
    path_to_masks_dir = pathlib.Path('smFISH_images/Linda_smFISH_images/Confocal/20220714/MS2-CY5_Cyto543_560_5hTPL_5uM/masks_MS2-CY5_Cyto543_560_5hTPL_5uM___nuc_70__cyto_0.zip') 
    #name_merged_mask_tensors = 'polygons_5h'

In [None]:
download_data_from_NAS = True
send_data_to_NAS = True

### Parameters for segmentation and spot detection

In [None]:
'''
# Parameters fo cell segmentation 
channels_with_cytosol = [2]                  # list or int indicating the channels where the cytosol is detectable. Use None to not segment the cytosol.
channels_with_nucleus = [0]                  # list or int indicating the channels where the nucleus is detectable. Use None to not segment the cytosol.
channels_with_FISH = [1]                     # list or int with the channels with FISH spots that are used for the quantification
diameter_nucleus = 122                       # approximate nucleus size in pixels
diameter_cytosol = 222                       # approximate cytosol size in pixels
voxel_size_z = 500                           # Microscope conversion px to nanometers in the z axis.
voxel_size_yx = 160                          # Microscope conversion px to nanometers in the xy axis.
psf_z = 350                                  # Theoretical size of the PSF emitted by a [rna] spot in the z plan, in nanometers.
psf_yx = 120                                 # Theoretical size of the PSF emitted by a [rna] spot in the yx plan, in nanometers.
minimum_spots_cluster = 3                    # The number of spots in a neighborhood for a point to be considered as a core point (from which a cluster is expanded). This includes the point itself.
show_plots = True                            # Flag to display plots
optimization_segmentation_method =  'z_slice_segmentation' #  # optimization_segmentation_method = 'intensity_segmentation' 'z_slice_segmentation', 'gaussian_filter_segmentation' , 'z_slice_segmentation_marker' , 'center_slice', None
save_all_images=False                        # If true, it shows a all planes for the FISH plot detection. 
threshold_for_spot_detection = None          # Intensity threshold to detect  spots. If None, it will automatically detect the spots. Pass a list if you have more than one FISH channel.
save_filtered_images = False                 # Flag to save images after the gaussian-laplace filter
list_selected_z_slices = None                # [6,7,8,9] # subsection of slices to process.
number_of_images_to_process = 5              # This section allows the user to select a subset of images to process. Use an integer to indicate the n images to process.
'''

In [None]:
NUMBER_OF_CORES=1
diameter_nucleus=70                        # Approximate nucleus size in pixels
diameter_cytosol=0                         # Approximate cytosol size in pixels
psf_z=350                                  # Theoretical size of the PSF emitted by a [rna] spot in the z plan, in nanometers
psf_yx=160                                 # Theoretical size of the PSF emitted by a [rna] spot in the yx plan, in nanometers
voxel_size_z=500                           # Microscope conversion px to nanometers in the z axis.
voxel_size_yx=160                          # Microscope conversion px to nanometers in the xy axis.
channels_with_nucleus=[0]                        # Channel to pass to python for nucleus segmentation
channels_with_cytosol=None                          # Channel to pass to python for cytosol segmentation
channels_with_FISH=[1,3]                         # Channel to pass to python for spot detection
send_data_to_NAS=1                         # If data sent back to NAS use 1
download_data_from_NAS=1                   # If data downloaded from NAS use 1
optimization_segmentation_method='z_slice_segmentation' # optimization_segmentation_method = 'intensity_segmentation' 'z_slice_segmentation', 'gaussian_filter_segmentation' , None
save_all_images=0  
save_filtered_images = False 
number_of_images_to_process = None  # None
minimum_spots_cluster = 2
threshold_for_spot_detection=[550,400]
show_plots=0

## Downloading data from NAS

In [None]:
# Download data from NAS
local_data_dir, masks_dir, _, _, list_files_names = fa.Utilities.read_images_from_folder( path_to_config_file, data_folder_path, 
                                                                        path_to_masks_dir,  download_data_from_NAS)


## Running the pipeline

----

In [None]:
dataframe_FISH,_,_,_,output_identification_string = fa.PipelineFISH(local_data_dir, channels_with_cytosol, channels_with_nucleus, channels_with_FISH,diameter_nucleus, 
                                       diameter_cytosol, minimum_spots_cluster, masks_dir=masks_dir,  voxel_size_z=voxel_size_z,
                                       voxel_size_yx=voxel_size_yx ,psf_z=psf_z,psf_yx=psf_yx, show_plots=show_plots,  
                                       file_name_str =data_folder_path.name, optimization_segmentation_method = optimization_segmentation_method,
                                       save_all_images=save_all_images,threshold_for_spot_detection=threshold_for_spot_detection,save_filtered_images=save_filtered_images,
                                       number_of_images_to_process=number_of_images_to_process).run()

## Extracting data from the dataframe

----

In [None]:
# Be aware that the code keeps track of cells without spots. In the data frame, the fields for cells without spots are populated with -1 values.
dataframe_FISH.tail()

In [None]:
#import matplotlib.pyplot as plt
#import numpy as np
#counter_total_images = np.max( dataframe_FISH['image_id'].values)
#for i in range (counter_total_images):
#    print(str(i)+' ',np.unique(dataframe_FISH.loc[ (dataframe_FISH['image_id']==i) & (dataframe_FISH['spot_type']==1) ].cell_id.values))
#plt.plot(dataframe_FISH['cell_id'].values )

In [None]:
if isinstance(channels_with_FISH, list):
    number_fish_channels = (len(channels_with_FISH))
else:
    number_fish_channels = 1

## Plots

In [None]:
list_file_plots_spot_intensity_distributions =[]
list_file_plots_distributions =[]
list_file_plots_cell_size_vs_num_spots =[]
list_file_plots_cell_intensity_vs_num_spots =[]
# extracting data for each spot type
for i in range (number_fish_channels):
    number_of_spots_per_cell, number_of_spots_per_cell_cytosol, number_of_spots_per_cell_nucleus, number_of_TS_per_cell, ts_size, cell_size, number_cells, nuc_size, cyto_size = fa.Utilities.dataframe_extract_data(dataframe_FISH,spot_type=i,minimum_spots_cluster=minimum_spots_cluster)
    file_plots_cell_intensity_vs_num_spots = fa.Plots.plot_cell_intensity_spots(dataframe_FISH, number_of_spots_per_cell_nucleus, number_of_spots_per_cell_cytosol,output_identification_string,spot_type=i)
    file_plots_spot_intensity_distributions = fa.Plots.plot_spot_intensity_distributions(dataframe_FISH,output_identification_string,spot_type=i)
    file_plots_distributions = fa.Plots.plotting_results_as_distributions(number_of_spots_per_cell, number_of_spots_per_cell_cytosol, number_of_spots_per_cell_nucleus, ts_size, number_of_TS_per_cell, minimum_spots_cluster, output_identification_string=output_identification_string,spot_type=i)
    file_plots_cell_size_vs_num_spots = fa.Plots.plot_cell_size_spots(channels_with_cytosol, channels_with_nucleus, cell_size, number_of_spots_per_cell, cyto_size, number_of_spots_per_cell_cytosol, nuc_size, number_of_spots_per_cell_nucleus,output_identification_string=output_identification_string,spot_type=i)
    
    # Appending list of files
    list_file_plots_spot_intensity_distributions.append(file_plots_spot_intensity_distributions)
    list_file_plots_distributions.append(file_plots_distributions)
    list_file_plots_cell_size_vs_num_spots.append(file_plots_cell_size_vs_num_spots)
    list_file_plots_cell_intensity_vs_num_spots.append(file_plots_cell_intensity_vs_num_spots)
    del number_of_spots_per_cell, number_of_spots_per_cell_cytosol, number_of_spots_per_cell_nucleus, number_of_TS_per_cell, ts_size

In [None]:
file_plots_bleedthru = fa.Plots.plot_scatter_bleedthru(dataframe_FISH, channels_with_cytosol, channels_with_nucleus,output_identification_string)

## Saving data

In [None]:
# Saving data and plots, and sending data to NAS
    
fa.Utilities.save_output_to_folder(output_identification_string, 
                                   data_folder_path, 
                                   file_plots_distributions=list_file_plots_distributions, 
                                   file_plots_cell_size_vs_num_spots=list_file_plots_cell_size_vs_num_spots, 
                                   file_plots_cell_intensity_vs_num_spots=list_file_plots_cell_intensity_vs_num_spots, 
                                   file_plots_spot_intensity_distributions=list_file_plots_spot_intensity_distributions,
                                   file_plots_bleedthru=file_plots_bleedthru)

analysis_folder_name, mask_dir_complete_name = fa.Utilities.sending_data_to_NAS(output_identification_string, data_folder_path, path_to_config_file, path_to_masks_dir, diameter_nucleus, diameter_cytosol, send_data_to_NAS, masks_dir)
fa.Utilities.move_results_to_analyses_folder( output_identification_string, data_folder_path, mask_dir_complete_name, path_to_masks_dir, save_filtered_images, download_data_from_NAS )