#### Workflow example:
<ol>
    <li>Data Correction:
        <ol>
            <li>Find the absolute value of the minimal extension for each DNA molecule (z0-files)</li>
            <li>Correct the absolute extensions by substracting the minimal extension value to each data point</li>
            <li>Plot the data from z0-files, corrected ID-files and corrected experiment-files with respect to time, for visual evaluation.</li>
            <li>Visually evaluate each bead timecourse. Define which beads are to be excluded from data processing.</li>
            Criteria to exclude beads:
            <ul>
                <li>Tethers with multiple DNA molecules (evaluated as described in literature)</li>
                <li>DNA molecules that were not fully extended from the begining of the assay.</li>
                <li>Beads that got stuck to the surface and/or to the bead (delta_z smaller than -0.5 um) for half of the duration of the assay.</li>
                <li>Beads lost while tracking.</li>
            </ul>
        </ol>
    </li><br>
    <li>Data Collection:
        <ol>
            <li>For each of each area:
                <ol>
                    <li>Get baseline extension (maximum extension, step = 1)</li>
                    <li>Classify force cycles (from step 2 forward)</li>
                    <li>For each point of the dataset substract the baseline</li>
                    <li>Plot each timecourse aligned to deltaZ to visual evaluation </li>
                </ol>
            </li>
            <li>Create (or import) the pickle file in which all the data points from different beads, areas, and dates are stored for a particular protein/RNA combination.
                <ol>
                    <li>If exists, import. If not, create pickle file  for protein/RNA combination.</li>
                    <li>For each new area, add beads to dictionary.</li>
                    <li>Export updated pickle.</li>
               </ol>
            </li>
        </ol>
    </li><br>
    <li>Data Processing:
        <ol>
            <li>Import the data for the conditions of choice
            <li>Process the data per condition:
               <ol>
                   <li>Merge the extension data points for all beads</li>
                   <li>Extract values of interest: total number of molecules and number of molecules with events</li>
                   <li>For each cycle and molecule, extract the dwell time, initial extension and final extension</li>
               </ol>
            </li>
            <li>Plot the extension distributions and the times distributions</li>
        </ol>
    </li>
</ol>



### Data Correction 

In [None]:
%matplotlib notebook
import corrections as corr
corr.def_fig_parameters()

In [None]:
folder = r'2021_12_15'
condition = '71G+Ku+X4L4+NIHCO'
z0 = [0,4,7,14,17,21]
ID = [1,5,8,15,18,22]
R = [2,6,9,16,19,23]
corr.correction_by_areas(folder, z0, ID, R)

### Data Processing

In [1]:
%matplotlib notebook
import processing as proc
import ipywidgets as widgets
cm = 1/2.54 # conversion: 1 centimeter in inches 

#### 01 - Merge all the data for each protein/RNA condition into a pickle file.

Choose the folder, label the condition, define the files to be processed and list the beads to be excluded. 

In [2]:
date = '2021_12_15' # folder where the data is located
condition = '71G+Ku+X4L4+NIHCO' # experiment label

results = [[2, [5,11,22]],
           [6, [18,20,21]],
           [9, [7,18,19,21]],
           [16, [11,16,20]],
           [19, [10,14]],
           [23, [14,15,21]]]
# for line in results: results_file_number, list_of_bead_numbers_to_exclude = line

expment = proc.align_classify_savepickle(date, condition, results)

NameError: name 'values_align_to' is not defined

Plot the individual timecourses aligned to deltaz=0.

In [None]:
fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(6,4), sharex= True, 
                        gridspec_kw={'width_ratios':[1], 'height_ratios':[5, 1],
                                     'top':.91, 'bottom':.13, 'hspace':0.05, 'left':.15, 'right':.99})

def get_force(expment_aligned, time):
    mag_pos = expment_aligned['Shift pos (mm)']
    force = 10**(0.7344-0.31104*mag_pos)
    x, y = time, force
    axs[1].plot(x, y, color='blue')

areas=list(expment.keys())
def update_area(area):
    expment_processed = expment[f'{area}']
    max_bead = len(expment_processed['beads_labels'])
    def update_bead(number):
        axs[0].clear(); axs[1].clear();
        # ---
        label = expment_processed['beads_labels'][number]
        name = label.split('-')[0]
        aligned = expment_processed['aligned']
        extension = aligned[label]
        time = aligned['Time (sg)']
        # ---
        fig.suptitle(f'{date}_{condition}_{area}_{name}')
        # ---
        x, y = time, extension
        axs[0].scatter(x, y, c='tomato', s=1, marker='.', alpha=0.5, label = f'{condition}')
        axs[0].set(ylabel='∆Z (µm)', ylim=(-1.40, 0.2))
        lines = [0, -0.21]
        [axs[0].axhline((value), 0, 1, ls = ':', color='black', ms = 1) for value in lines]
        # ---
        get_force(aligned, time)
        axs[1].set(xlim=(time.min(), time.max()), xlabel='Time (s)',
                   ylim=(-0.2,2.4), ylabel='F (pN)')
    
    widgets.interact(update_bead, number=widgets.BoundedIntText(min=0, max=max_bead-1, step=1, description='bead:'))
widgets.interact(update_area,
                 area=widgets.Dropdown(options=areas, value=areas[0], description='area:'))

Create or import a pickle file for the protein/RNA combination and save the data to the pickle file.

In [None]:
_ = proc.process_and_save_all(date, condition, results, save=True)

#### 02 - Import the pickle files for the conditions we want to process and compare.

In [None]:
%matplotlib notebook
import processing as proc
import matplotlib.pyplot as plt
import numpy as np
import os
cm = 1/2.54 # conversion: 1 centimeter in inches 

Import the data within the dictionaries grouped by condition:

In [None]:
pickle_path = os.path.join('Pickles')
dataset = proc.manage_data_from_pickles(folder_path=pickle_path)
conditions = list(dataset.keys())
print('Finished!')

Process the data and extract parameters of interest (times and deltas_z). Merge the data points for each condition.

In [None]:
dataset_processed, dataset_merged = proc.process_and_merge(dataset, conditions)

Define the function to plot the position distributions

In [None]:
# FIGURE Example
fig_name = 'Fig_example'
figsize = (16*cm,11*cm)
line = ['71G+Ku+X4L4+NIHCO', '1 nM Ku + 10 nM X4L4 + 10nM NIHCOLE']

Plot the relative extension reduction and the dwell time distributions.

In [None]:
# PLOT FIGURES WITH COMPARATIVE TAU PLOT
fig, axs = plt.subplots(figsize=figsize, nrows=2, sharex=False, sharey=False, 
                        gridspec_kw={'width_ratios':[1], 'height_ratios':[3,1], 'wspace':0.25, 'hspace':0.05,
                                     'left':0.1, 'right':0.95, 'bottom':0.13, 'top':0.93})
# -- first row
xmin, xmax = -0.65, 0.15
ymin, ymax = 0.01, 3.8
coords=[xmin, ymin, xmax, ymax]
axs[0].set(ylim=(ymin, ymax), xlim=(xmin, xmax), xlabel='Reduction in DNA extension, ∆z (µm)')
proc.plot_distribution(dataset_merged, dataset_processed, line, coords, axs)
    
# -- second row 
condition, label = line
tau = np.array(dataset_processed[condition]['time_rupture_merged'])/120
mean = tau.mean()
axs[1] = sns.violinplot(data=tau, scale='width',inner=None, cut=0, color='tomato') # scale=None, 
for violin in xbig.collections:
    violin.set_linewidth(1)
    violin.set_alpha(0.7)
    violin.set_edgecolor('black')
axs[1].scatter(x=1, y=mean, marker='o', s=25, color='black', edgecolor='white')
axs[1].annotate(f'{round(mean,2)} s', xycoords='data', xy=(i, mean+0.5), size=10, ha='center', va='center')
axs[1].set(ylim=(None, None), ylabel='Dwell time, t (s)', xticks=range(0, len(labels)))

Save the figure.

In [None]:
path = os.path.join('figures',fig_name)
plt.savefig(path+'.pdf', transparent=True)
#plt.savefig(path+'.png', transparent=True)