### Set user-defined parameters

In [None]:
### Set user-defined parameters for analysis ###

# Point the settings file to JSON file with analysis parameters.
# This path (and others in this notebook) can be relative or absolute.
settings_file = '../data/analysis_settings/Fig3sup4_SASPT_MSD_analysis.json'

# Set plotting and figure saving params
plot_figs = True
plot_all_tracks = False # warning - takes forever!
save_figs = True # Save output figures
save_data = True # Save filtered tracks to see which ones were identified as correlated.

split_by_color = False # Analyze diffusion of dyes in two channels separately if True

plot_settings = '../src/plotting_settings.py' # File containing matplotlib settings
save_dir_reports = '../reports/figures' # Base directory for saving figures

### Load modules and data

In [None]:
### Load modules ###

# Uncomment the following two lines for debugging
%load_ext autoreload
%autoreload 2

# Import general Python modules
import os, sys, inspect
import matplotlib
import matplotlib.pylab as plt
import numpy as np
import math
import pandas as pd
import seaborn as sns

# Import relevant parts of the SASPT module (see ref in manuscript)
from saspt import StateArray, RBME, StateArrayDataset, normalize_2d

# Add source code directory (src) to path to enable user module import
module_dir = '../src'
os.sys.path.insert(0, module_dir)

# Import user modules from source code directory
import parse_trackmate as pt
import correlation_analysis as corr
import diffusion as dif

In [None]:
### Load the JSON settings file that specifies analysis details ###
# Note: for diffusion analysis, the JSON settings file must contain
# the following keys: 'diff_dim', 'min_averages_for_msd', 'dc_fit_nframes',
# and "frame_interval_sec" (the latter is in seconds)

conditions, params = corr.read_analysis_params(settings_file, 
                                               save_dir_reports, 
                                               print_summary=True)



In [None]:
### Set up figure save directories and load plotting style ###

save_dir = params['save_dir']
save_dir_data = params['save_dir_data']

# Prepare directory for saving CSV exports of trajectories
save_dir_csv = os.path.join(save_dir_data, 'csv_for_saspt')
if not os.path.exists(save_dir_csv):
        os.makedirs(save_dir_csv)

if save_figs: # Save figure files
    %matplotlib
    %run $plot_settings save_large
    
    # Make directories for saving figures
    dir_sum_figs = os.path.join(save_dir, 'summary_figures')
    dir_examples = os.path.join(save_dir, 'examples')
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    if not os.path.exists(dir_sum_figs):
        os.makedirs(dir_sum_figs)
    if not os.path.exists(dir_examples):
        os.makedirs(dir_examples)    
else: # Plot interactively
    %matplotlib
    %run $plot_settings plot_only
    
if save_data: # Save filtered TrackMate trajectories
    if not os.path.exists(save_dir_data):
        os.makedirs(save_dir_data)

In [None]:
### Parse track data from TrackMate xml files into Pandas dataframes ###

data_parsed = {} # List of dataframes, one dataframe per condition
for condition in conditions:
    print("Now processing condition: " + condition)
    data_parsed[condition] = pt.read_2color_data(conditions[condition],
                                    do_int_analysis=params['do_int_analysis'],
                                    int_settings=params['int_settings'])
print('Done parsing. Data loading is now complete.')

## Beginning of SASPT analysis

In [None]:
# Analysis of trajectories pooled by condition (e.g. treatment or construct type).
# Export all trajectories as CSV files that can be read by SASPT

# Create organizer CSV file to keep track of CSV files storing individual trajectories
csv_organizer_base_name = os.path.basename(settings_file).split('.')[0]
csv_organizer_filename = 'org_'+csv_organizer_base_name+'.csv'
csv_organizer_path = os.path.join(save_dir_csv, csv_organizer_filename)

# Save trajectories as CSV files with columns "trajectory", "frame", "x", and "y".
# These files can then be directly read by the saspt module.
csv_paths = []
csv_conditions =[]
for condition in data_parsed:
    data = data_parsed[condition]
    for row in data.itertuples(index=False):
        filename_source = row[0]
        data_to_save = row[4][['track_ID','t','x','y']]
        #rename columns as needed
        df = data_to_save.rename(columns={"track_ID": "trajectory", "t": "frame"})
        
        csv_filename = filename_source + '.csv'
        csv_path = os.path.join(save_dir_csv,csv_filename)
        df.to_csv(csv_path, index=False)
        
        # Append color to the condition if needed
        if split_by_color:
            color = row[2]
            csv_condition = condition + '_' + color
        else:
            csv_condition = condition
        csv_paths.append(csv_path)
        csv_conditions.append(csv_condition)

csv_organizer = pd.DataFrame({"filepath":csv_paths, "condition":csv_conditions})
csv_organizer.to_csv(csv_organizer_path, index=False)

In [None]:
# Initialize state array dataset (all trajectories from all conditions)

paths = pd.read_csv(csv_organizer_path)
#print(paths) # Uncomment for debugging
num_workers = os.cpu_count()
print(num_workers)

print(params['raw_json']['frame_interval_sec'])

# Load settings form params file if possible:
try:
    frame_interval = params['raw_json']['frame_interval_sec']
except:
    print('Frame interval not loaded, reverting to defaults')
    frame_interval = 0.06
try:
    focal_depth = params['raw_json']['focal_depth_um']
except:
    print('Focal depth not loaded, reverting to defaults')
    focal_depth = 0.2

settings = dict(
    likelihood_type = RBME,
    pixel_size_um = 1,
    frame_interval = frame_interval,
    focal_depth = focal_depth,
    diff_coefs = np.logspace(-2.0,0.0,num=100),
    loc_errors = np.linspace(0.0, 0.05, num=20),
    path_col = 'filepath',
    condition_col = 'condition',
    progress_bar = True,
    num_workers = num_workers,
)

SAD = StateArrayDataset.from_kwargs(paths, **settings)
print(SAD)

In [None]:
# Generate posterior heat maps and line plots, sorted by file and condition.
# Takes a while for large datasets.

SAD.posterior_heat_map(os.path.join(dir_sum_figs,'posterior_heat_map.pdf'))
SAD.posterior_line_plot(os.path.join(dir_sum_figs,'posterior_line_plot.pdf'))

In [None]:
# Aggregate all files in each condition

posterior_occs, condition_names = SAD.infer_posterior_by_condition('condition')
print(posterior_occs.shape)
print(condition_names)

In [None]:
# Plot posterior occupations for aggregated trajectories

limit_conditions_to_plot = None
# Optionally, select a subset of conditions for a plot below:
#limit_conditions_to_plot = ['HaloTag-1x_C1', 'HaloTag-GST_C1','EndoIRE1-NoStress_C1','EndoIRE1_4h-Tm_C1']

posterior_occs = normalize_2d(posterior_occs, axis=1)
diff_coefs = SAD.likelihood.diff_coefs
for c in range(posterior_occs.shape[0]):
    if limit_conditions_to_plot is None or condition_names[c] in limit_conditions_to_plot:
        plt.plot(diff_coefs, posterior_occs[c,:], label=condition_names[c])
plt.xscale('log')
plt.xlabel('Diff. coef. ($\mu$m$^{2}$ s$^{-1}$)')
plt.ylabel('Mean posterior occupation')
plt.ylim((0, plt.ylim()[1]))

# Custom x-limits if needed
plt.xlim((1e-4, plt.xlim()[1]))

plt.legend()
plt.show()

# Save figures and statistics if needed
if save_figs:
    fig_name = 'Aggregated_posterior_occupations_by_condition'
    full_fig_path = os.path.join(dir_sum_figs, fig_name+'.pdf')
    plt.savefig(full_fig_path)

In [None]:
# Calculate fraction of trajectories within a specific range of diff. coeff.:
occupations = SAD.marginal_posterior_occs_dataframe
#print(occupations)
diff_cutoff = 0.17
cond = occupations['diff_coef'] < diff_cutoff
temp = occupations.loc[cond].groupby('filepath')['posterior_occupation'].sum().reset_index()
temp.rename(columns={"posterior_occupation": "fraction_cutoff"}, inplace=True)
grouped = occupations.groupby('filepath').first().reset_index()
result = grouped.merge(temp, how='inner',on='filepath')

#by_cond = result.loc[cond].groupby('condition')['fraction_cutoff'].mean()
by_cond = result.groupby('condition')['fraction_cutoff'].agg(['mean', 'count', 'std', 'sem'])
print(by_cond)
if save_data:
    data_file_name = 'Fraction_of_trajectories_below_diff_'+str(diff_cutoff)
    full_data_file_path = os.path.join(dir_sum_figs, data_file_name+'.csv')
    by_cond.to_csv(full_data_file_path, index=True)

In [None]:
# Calculate mean diffusion coefficients per condition:

temp = occupations.copy()
# To get the mean diffusion coefficient in a movie, multiply each value by its posterior occupation
temp['weighed_dc'] = temp['diff_coef'] * temp ['posterior_occupation']
temp2 = temp.groupby('filepath')['weighed_dc'].sum().reset_index()
temp2.rename(columns={"weighed_dc": "mean_dc"}, inplace=True)
grouped = occupations.groupby('filepath').first().reset_index()
result = grouped.merge(temp2, how='inner',on='filepath')

by_cond = result.groupby('condition')['mean_dc'].agg(['mean', 'count', 'std', 'sem'])
print(by_cond)

if save_data:
    data_file_name = 'Mean_diffusion_coeff_by_condition'
    full_data_file_path = os.path.join(dir_sum_figs, data_file_name+'.csv')
    by_cond.to_csv(full_data_file_path, index=True)

In [None]:
# Perform pairwise Monte-Carlo permutation tests to check for differences between conditions
test_results = corr.pairwise_perm_tests(result, 'mean_dc', 'condition', num_iter=1000)

# Save the statistics results
if save_data:
    data_file_name = 'Pairwise_correlations_diff_const'
    full_data_file_path = os.path.join(dir_sum_figs, data_file_name+'.txt')
    with open(full_data_file_path, "w") as f:
        f.writelines(test_results)

print('finished')

In [None]:
# Uncomment this block to check the sample dataset.
# Use this only to troubleshoot your saspt module's installation

"""
from saspt import sample_detections

detections = sample_detections()

settings = dict(
    likelihood_type = RBME,
    pixel_size_um = 0.122,
    frame_interval = 0.01,
    focal_depth = 0.7,
    progress_bar = True,
)
"""