# Integrated Simulations

In [None]:
import os; from os import listdir; from os.path import isfile, join
import re  
from skimage.io import imread
from skimage.exposure import rescale_intensity
import numpy as np 
from tqdm.notebook import tqdm
from timeit import default_timer as timer
import scipy
import pandas as pd
import shutil
import pathlib
import sys
import seaborn as sns
import rsnapsim as rss
import scipy.stats as stats
import matplotlib.pyplot as plt 

In [None]:
plt.style.use("dark_background")

In [None]:
# Defining directories
current_dir = pathlib.Path().absolute()
sequences_dir = current_dir.parents[1].joinpath('DataBases','gene_files')
video_dir = current_dir.parents[1].joinpath('DataBases','videos_for_sim_cell')
rsnaped_dir = current_dir.parents[1].joinpath('rsnaped')
gene_file = current_dir.parents[1].joinpath('DataBases','gene_files','KDM5B_withTags.txt')
masks_dir = current_dir.parents[1].joinpath('DataBases','masks_for_sim_cell')

In [None]:
# Importing rSNAPed
sys.path.append(str(rsnaped_dir))
import rsnaped as rsp

In [None]:
rsp.Banner().print_banner()

In [None]:
# These are the parameters that need to be tested. 
number_of_simulated_cells = 1     # PLEASE TEST MIN 1 MAX 10
number_spots_per_cell = 60           # PLEASE TEST MIN 5 MAX 200
simulation_time_in_sec = 200          # PLEASE TEST MIN 10 MAX 100
step_size_in_sec = 1

min_percentage_time_tracking = 0.3   # (normalized) minimum time to consider a trajectory.
average_cell_diameter = 400

In [None]:
diffusion_coefficient = 0.2       # This value must have units of micrometers_square_per_seconds
microns_per_pixel = 0.13

In [None]:
intensity_calculation_method = 'disk_donut'  # options are : 'total_intensity' and 'disk_donut' 'gaussian_fit'
mask_selection_method = 'max_area' # options are : 'max_spots' and 'max_area' 
use_optimization_for_tracking = 1 # 0 not using, 1 is using optimization

selected_channel_tracking = 0
selected_channel_segmentation = 1

frame_selection_empty_video = 'gaussian' # Options are: 'constant' , 'shuffle' and 'loop' 'linear_interpolation', 'gaussian', 'poisson'
dataframe_format = 'long' # 'short'  'long'

store_videos_in_memory = False
save_as_gif = False 
show_plot = False  # Flag to show plots for the detection and tracking process.


In [None]:
spot_size = 5 # spot size for the simulation and tracking.
spot_sigma = 1
elongation_rate = 10
initiation_rate = 0.03
simulated_RNA_intensities_method = 'random'

In [None]:
basal_intensity_in_background_video = 50000
scale_intensity_in_base_video=False

In [None]:
intensity_scale_ch0 = 1
intensity_scale_ch1 = 3
intensity_scale_ch2 = None

In [None]:
particle_detection_size = spot_size

## Running the simulations

In [None]:
list_videos, list_dataframe_simulated_cell, merged_dataframe_simulated_cells, ssa_trajectories, list_files_names, video_path, dataframe_path = rsp.simulate_cell( video_dir, 
                                                                        list_gene_sequences = gene_file,
                                                                        list_number_spots= number_spots_per_cell,
                                                                        list_target_channels_proteins = 1,
                                                                        list_target_channels_mRNA = 0, 
                                                                        list_diffusion_coefficients=diffusion_coefficient,
                                                                        list_elongation_rates=elongation_rate,
                                                                        list_initiation_rates=initiation_rate,
                                                                        masks_dir=masks_dir, 
                                                                        list_label_names=1,
                                                                        number_cells = number_of_simulated_cells,
                                                                        simulation_time_in_sec = simulation_time_in_sec,
                                                                        step_size_in_sec = step_size_in_sec,
                                                                        save_as_gif = save_as_gif,
                                                                        frame_selection_empty_video=frame_selection_empty_video,
                                                                        spot_size = spot_size,
                                                                        spot_sigma = spot_sigma,
                                                                        intensity_scale_ch0 = intensity_scale_ch0,
                                                                        intensity_scale_ch1 = intensity_scale_ch1,
                                                                        intensity_scale_ch2 = intensity_scale_ch2,
                                                                        dataframe_format = 'long',
                                                                        simulated_RNA_intensities_method=simulated_RNA_intensities_method,
                                                                        store_videos_in_memory= store_videos_in_memory,
                                                                        scale_intensity_in_base_video=scale_intensity_in_base_video,
                                                                        basal_intensity_in_background_video=basal_intensity_in_background_video,
                                                                        microns_per_pixel=microns_per_pixel)

In [None]:
number_images = len(list_videos)

In [None]:
# # Reads the folder with the results and import the simulations as lists
list_files_names = sorted([f for f in listdir(video_path) if isfile(join(video_path, f)) and ('.tif') in f], key=str.lower)  # reading all tif files in the folder
list_files_names.sort(key=lambda f: int(re.sub('\D', '', f)))  # sorting the index in numerical order
path_files = [ str(video_path.joinpath(f).resolve()) for f in list_files_names ] # creating the complete path for each file
path_files.sort(key=lambda f: int(re.sub('\D', '', f)))  # sorting the index in numerical order

# # Reading the microscopy data
number_images = number_of_simulated_cells
number_images

# Display simulations for single time point

## <span style="color:red">Channel 0</span>

In [None]:
# Showing the simulated images
list_videos = [imread(f)[:,:,:,:] for f in  path_files] # List with all the videos
rsp.VisualizerImage(list_videos,list_files_names=list_files_names,selected_channel =0,selected_time_point= 0,normalize=0,individual_figure_size=7).plot()

## <span style="color:lightgreen">Channel 1</span>

In [None]:
# Showing the simulated images
rsp.VisualizerImage(list_videos,list_files_names=list_files_names,selected_channel =1,selected_time_point= 0,normalize=0,individual_figure_size=7).plot()
del list_videos

In [None]:
list_DataFrame_particles_intensities, list_array_intensities, list_time_vector, list_selected_mask = rsp.image_processing( files_dir_path_processing=video_path,
                                                                                                                            particle_size=particle_detection_size,
                                                                                                                            selected_channel_tracking = selected_channel_tracking,
                                                                                                                            selected_channel_segmentation = selected_channel_segmentation,
                                                                                                                            intensity_calculation_method =intensity_calculation_method, 
                                                                                                                            mask_selection_method = mask_selection_method,
                                                                                                                            show_plot=show_plot,
                                                                                                                            use_optimization_for_tracking=use_optimization_for_tracking,
                                                                                                                            real_positions_dataframe = list_dataframe_simulated_cell,
                                                                                                                            average_cell_diameter=average_cell_diameter,
                                                                                                                            print_process_times=True,
                                                                                                                            min_percentage_time_tracking=min_percentage_time_tracking,
                                                                                                                            dataframe_format=dataframe_format)

In [None]:
list_DataFrame_particles_intensities[0].head()

In [None]:
def df_to_array(dataframe_simulated_cell,selected_field):
    '''
    This function takes the dataframe and extracts the information from it. 

    Input
        dataframe_simulated_cell : pandas dataframe
            Dataframe with fields [cell_number, particle, frame, red_int_mean, green_int_mean, blue_int_mean, red_int_std, green_int_std, blue_int_std, x, y, SNR_red,SNR_green,SNR_blue].
        selected_field : str,
            selected field to extract data.

    Returns

        field_as_array : Selected Field for each particle. NumPy array with dimensions [number_particles, max_time_points]. The maximum time points are defined by the longest trajectory. Short trajectories are populated with zeros.
    '''
    # get the total number of particles in all cells
    total_particles = 0
    for cell in set(dataframe_simulated_cell['cell_number']):
        total_particles += len(set(dataframe_simulated_cell[dataframe_simulated_cell['cell_number'] == 0]['particle'] ))
    #preallocate numpy array sof n_particles by nframes
    field_as_array = np.zeros([total_particles, np.max(dataframe_simulated_cell['frame'])+1] ) 
    field_as_array[:] = np.nan
    k = 0
    # For loops that iterate for each particle and stores the data in the previously pre-alocated arrays.
    for cell in set(dataframe_simulated_cell['cell_number']):  #for every cell 
        for particle in set(dataframe_simulated_cell[dataframe_simulated_cell['cell_number'] == 0]['particle'] ): #for every particle
            temporal_dataframe = dataframe_simulated_cell[(dataframe_simulated_cell['cell_number'] == cell) & (dataframe_simulated_cell['particle'] == particle)]  #slice the dataframe
            frame_values = temporal_dataframe['frame'].values
            field_as_array[k, frame_values] = temporal_dataframe[selected_field].values  #fill the arrays to return out
            k+=1 #iterate over k (total particles)
    return field_as_array 

In [None]:
def extract_field_from_dataframe(dataframe_path=None, dataframe_name=None,selected_time=None,selected_field='green_int_mean'):
    '''
    This function extracts the selected_field as a vector for a given frame. If selected_time is None, the code will return the extracted data as a NumPy array with dimensions [number_particles, max_time_points]. The maximum time points are defined by the longest trajectory.
    '''
    list_with_extracted_data = []
    if not(dataframe_path is None):
        temporal_dataframe = pd.read_csv(dataframe_path)
    else:
        temporal_dataframe = dataframe_name
    if not(selected_time is None):
        extracted_data = temporal_dataframe.loc[(temporal_dataframe['frame']==selected_time)][selected_field].values
    else:
        extracted_data = df_to_array(temporal_dataframe,selected_field)
    return extracted_data

In [None]:
selected_channel = 1

In [None]:
# Calculate background intensity distribution.
list_videos = [imread(f)[:,:,:,:] for f in  path_files] # List with all the videos
image = list_videos[0]
#df = list_DataFrame_particles_intensities[0].copy()
df = list_dataframe_simulated_cell[0].copy()
print(image.shape)
plt.imshow(image[0,:,:,selected_channel])

In [None]:
def remove_spots_from_image(img, x_values, y_values,spot_size):
    img_removed_spots = img.copy()
    for i in range(len(x_values)):
        img_removed_spots[ y_values[i]-spot_size//2:y_values[i]+(spot_size//2)+1,  x_values[i]-spot_size//2:x_values[i]+(spot_size//2)+1 ] = 0
    return img_removed_spots

In [None]:
# Apply mask to image.
selected_mask= list_selected_mask[0].copy()
img_removed_mask = rsp.MaskingImage(video=image,mask=selected_mask).apply_mask()

In [None]:
plt.imshow(img_removed_mask[0,:,:,1])

In [None]:
# function that returns the pixel values for the image, removing the positions where spots are detected.
test_image = img_removed_mask[:,:,:,selected_channel].copy()
for i in range (test_image.shape[0]):
    x_values = df.loc[(df['frame']==i)]['x'].values
    y_values = df.loc[(df['frame']==i)]['y'].values
    test_image[i]= remove_spots_from_image(test_image[i], x_values, y_values,spot_size)    

In [None]:
plt.imshow(test_image[5])

In [None]:
data_wo_spots = test_image[test_image>0]
data_w_spots = img_removed_mask[:,:,:,selected_channel].flatten()
data_w_spots = data_w_spots[data_w_spots>0]
data_wo_spots = data_wo_spots[data_wo_spots<20000]
data_w_spots = data_w_spots[data_w_spots<20000]

In [None]:
# Pixel intensity distribution for the background
plt.style.use(['default', 'fivethirtyeight'])
plt.figure(figsize=(7,5))
plt.hist(data_w_spots, bins=50, alpha=0.5, label="with_spots", stacked=True, histtype='barstacked',edgecolor='orangered',linewidth=1)
plt.hist(data_wo_spots, bins=50, alpha=0.8, label="background", stacked=True, histtype='barstacked',edgecolor='k',linewidth=1)

plt.xlabel("Intensity", size=14)
plt.ylabel("Count", size=14)
plt.title("Pixel intensity distribution")
plt.legend(loc='upper right')
#plt.xlim((0,10000))
plt.show()

In [None]:
snr_green_channel = extract_field_from_dataframe( dataframe_name = df, selected_time = None, selected_field = 'SNR_green')
# Dataframe with fields [cell_number, particle, frame, red_int_mean, green_int_mean, blue_int_mean, red_int_std, green_int_std, blue_int_std, x, y, SNR_red,SNR_green,SNR_blue].
data = snr_green_channel.flatten()
plt.figure(figsize=(7,5))
plt.hist(data, bins=50,histtype='barstacked',edgecolor='orangered',linewidth=2,color = '#1C00FE', alpha=0.8)
plt.xlabel("SNR", size=14)
plt.ylabel("Count", size=14)
plt.title("SNR distribution")
#plt.legend(loc='upper right')
#plt.xlim((0,35000))
plt.show()

In [None]:
int_green_channel = extract_field_from_dataframe( dataframe_name = df, selected_time = None, selected_field = 'green_int_mean')
# Dataframe with fields [cell_number, particle, frame, red_int_mean, green_int_mean, blue_int_mean, red_int_std, green_int_std, blue_int_std, x, y, SNR_red,SNR_green,SNR_blue].
data = int_green_channel.flatten()
plt.figure(figsize=(7,5))
plt.hist(data, bins=30,histtype='barstacked',edgecolor='orangered',linewidth=2,color = '#1C00FE', alpha=0.8)
plt.xlabel("Intensity", size=14)
plt.ylabel("Count", size=14)
plt.title("Spot intensity distribution")
#plt.legend(loc='upper right')
#plt.xlim((0,35000))
plt.show()

In [None]:
mean_acf_data,err_acf_data,lags, decorrelation_time, auto_correlation_matrix=rsp.Covariance(dataframe_particles=df,selected_field='green_int_mean', max_lagtime= 150, show_plot= True,figure_size=(6,4)).calculate_autocovariance()

In [None]:
calculated_diffusion_coefficient, MSD_series, trackpy_df = rsp.ParticleMotion(trackpy_dataframe=df,microns_per_pixel=microns_per_pixel,step_size_in_sec=1.,max_lagtime=50,show_plot=True,remove_drift=False).calculate_msd()