## Correlated trajectory analysis for dual-color single-particle data

This notebook takes the tracks of diffusing molecules in two channels and calculates the fraction of trajectories that are correlated between the channels, which can in turn be used to estimate the oligomeric state of diffusing molecules.

### Set user-defined parameters

In [None]:
### Set user-defined parameters for analysis ###

# Point the settings file to JSON file with analysis parameters.
# This path (and others in this notebook) can be relative or absolute.
#settings_file = '../data/analysis_settings/2021_12_03_reviewerExpts.json'
#settings_file = '../data/analysis_settings/FigS1B_GST_dimer_rev1.json'
settings_file = '../data/analysis_settings/FigXX_Labeling_Pre_Post_stress.json'

# Set plotting and figure saving params
plot_figs = True
plot_all_tracks = False # warning - takes forever!
save_figs = True # Save output figures
save_data = True # Save filtered tracks to see which ones were identified as correlated.

plot_settings = '../src/plotting_settings.py' # File containing matplotlib settings
save_dir_reports = '../reports/figures' # Base directory for saving figures

### Load modules and data

In [None]:
### Load modules ###

# Uncomment the following two lines for debugging
%load_ext autoreload
%autoreload 2

# Import general Python modules
import os, sys, inspect
import matplotlib
import matplotlib.pylab as plt
import numpy as np
import math
import pandas as pd

# Add source code directory (src) to path to enable user module import
module_dir = '../src'
os.sys.path.insert(0, module_dir)

# Import user modules from source code directory
import parse_trackmate as pt
import correlation_analysis as corr

In [None]:
### Load the JSON settings file that specifies analysis details ###

conditions, params = corr.read_analysis_params(settings_file, 
                                               save_dir_reports, 
                                               print_summary=True)

In [None]:
### Set up figure save directories and load plotting style ###

save_dir = params['save_dir']
save_dir_data = params['save_dir_data']

if save_figs: # Save figure files
    %matplotlib
    %run $plot_settings save_large
    
    # Make directories for saving figures
    dir_sum_figs = os.path.join(save_dir, 'summary_figures')
    dir_examples = os.path.join(save_dir, 'examples') 
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    if not os.path.exists(dir_sum_figs):
        os.makedirs(dir_sum_figs)
    if not os.path.exists(dir_examples):
        os.makedirs(dir_examples)    
else: # Plot interactively
    %matplotlib
    %run $plot_settings plot_only
    
if save_data: # Save filtered TrackMate trajectories
    if not os.path.exists(save_dir_data):
        os.makedirs(save_dir_data)

In [None]:
### Parse track data from TrackMate xml files into Pandas dataframes ###

data_parsed = {} # List of dataframes, one dataframe per condition
for condition in conditions:
    print("Now processing condition: " + condition)
    data_parsed[condition] = pt.read_2color_data(conditions[condition],
                                    do_int_analysis=params['do_int_analysis'],
                                    int_settings=params['int_settings'])
print('Done parsing. Data loading is now complete.')

### Analyze data

In [None]:
### Compute correlations between trajectories of different colors ###

if params['do_corr_analysis']:
    corr_results = pd.DataFrame()
    
    for condition in data_parsed:
        result = corr.corr_analysis(data_parsed[condition], params['window'], 
                        params['pcc_cutoff'], params['save_dir_data'], 
                        corr_by_window=False, plot_all_tracks=plot_all_tracks)
        result['Condition'] = condition
        corr_results = pd.concat([corr_results, result], ignore_index=True)

print('Done')     

### Plot and save results

In [None]:
#### Plot correlation summary and optionally save the figure and statistics ###
cond_to_plot_1 = 'frac_corr_C1' # Parameter to plot in the first plot
cond_to_plot_2 = 'num_tot_C1' # Parameter to plot in the second plot
# Typical parameters are the fraction of correlated trajectories in channel 1
# 'frac_corr_C1' and the otal number of trajectories in channel 1 'num_tot_C1'

if plot_figs:
    fig, f2 = corr.plot_correlation_results (corr_results, 
                                         plot_1=cond_to_plot_1,
                                         plot_2=cond_to_plot_2)
    
# Calculate t-test statistics
t_test_summaries = corr.pairwise_t_tests(corr_results, cond_to_plot_1)
print(t_test_summaries)

# Save figures and statistics if needed
if save_figs:
    fig_name = 'All_conditions_correlation'
    data_file_name = 'All_conditions_correlation'
    full_fig_path = os.path.join(dir_sum_figs, fig_name+'.pdf')
    f2_path = os.path.join(dir_sum_figs, fig_name+'_linreg.pdf')
    full_data_file_path = os.path.join(dir_sum_figs, data_file_name+'.txt')
    fig.savefig(full_fig_path)
    f2.savefig(f2_path)
    
    # Save the settings and statistics associated with this figure
    with open(full_data_file_path, "w") as f:
        if params['do_corr_analysis']:
            f.write("PCC cutoff: " + str(params['pcc_cutoff']) + '\n')
            f.write("PCC window, in frames: " + str(params['window']) + '\n')
        f.writelines(t_test_summaries)