In [None]:
import os, glob, pickle

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

## Load Data

In [None]:
directory = 'INSERT_DIRECTORY' # Load simulated data created by "CreateData.ipynb"
files = glob.glob(os.path.join(directory, '*.pkl'))
files

In [None]:
molecules = [x.split('_')[-2] for x in files]
molecules

In [None]:
all_data = []
for i in range(len(molecules)):
    molecule = molecules[i]
    
    print(molecule)
    
    file = files[i]
    with open(file, 'rb') as f:
        data = pickle.load(f)
    all_data.append(data)

## Event Analysis

### Duration
Calculate the duration of each event for each class

In [None]:
all_event_durs = []
for i in range(len(molecules)):
    all_durs = []
    all_mol_evts = all_data[i][1]
    for trace_evts in all_mol_evts:
        for evt in trace_evts:
            dur = evt[1] - evt[0]
            if dur == 0:
                continue
            all_durs.append(dur)
    all_event_durs.append(all_durs)      

Plot the distribution of event durations grouped by class

In [None]:
all_event_durs_flattened = [x for y in all_event_durs for x in y]
bin_edges = np.histogram_bin_edges(all_event_durs_flattened, bins=60)
bin_mids = bin_mids = bin_edges[1:] - ((bin_edges[1] - bin_edges[0]) / 2)

In [None]:
fig, ax = plt.subplots()
for durs in all_event_durs:
    counts = np.histogram(durs, bins=bin_edges)[0]
    res = ax.bar(bin_mids, counts, width=np.diff(bin_edges), alpha=0.7)
ax.legend(molecules)

### Height
Calculate the height of each event for each class by taking the median

In [None]:
all_event_heights = []
for i in range(len(molecules)):
    all_heights = []
    all_mol_evts = all_data[i][1]
    for j, trace_evts in enumerate(all_mol_evts):
        trace_data = all_data[i][0][:, j]
        for evt in trace_evts:
            if evt[1] - evt[0] == 0:
                continue
            evt_points = trace_data[evt[0]:evt[1]]
            height = np.median(evt_points)
            all_heights.append(height)
    all_event_heights.append(all_heights)   

Plot the distribution of event heights grouped by class

In [None]:
all_event_heights_flattened = [x for y in all_event_heights for x in y]
bin_edges = np.histogram_bin_edges(all_event_heights_flattened, bins=60)
bin_mids = bin_mids = bin_edges[1:] - ((bin_edges[1] - bin_edges[0]) / 2)

In [None]:
fig, ax = plt.subplots()
for heights in all_event_heights:
    counts = np.histogram(heights, bins=bin_edges)[0]
    res = ax.bar(bin_mids, counts, width=np.diff(bin_edges), alpha=0.7)
ax.legend(molecules)

### Scatter

Plot a scatter of event height vs duration grouped by class

In [None]:
fig, ax = plt.subplots()
for i in range(len(molecules)):
    ax.scatter(all_event_durs[i], all_event_heights[i], s=3, alpha=0.7)

## Intraevent Current

Calculate the standard deviation of values within each event

In [None]:
all_event_devs = []
for i in range(len(molecules)):
    all_devs = []
    all_mol_evts = all_data[i][1]
    for j, trace_evts in enumerate(all_mol_evts):
        trace_data = all_data[i][0][:, j]
        for evt in trace_evts:
            if evt[1] - evt[0] == 0:
                continue
            evt_points = trace_data[evt[0]:evt[1]]
            dev = evt_points.std()
            all_devs.append(dev)
    all_event_devs.append(all_devs)
    

Plot the distribution of event deviations grouped by class

In [None]:
all_event_devs_flattened = [x for y in all_event_devs for x in y]
bin_edges = np.histogram_bin_edges(all_event_devs_flattened, bins=60)
bin_mids = bin_mids = bin_edges[1:] - ((bin_edges[1] - bin_edges[0]) / 2)

In [None]:
fig, ax = plt.subplots()
for devs in all_event_devs:
    counts = np.histogram(devs, bins=bin_edges)[0]
    res = ax.bar(bin_mids, counts, width=np.diff(bin_edges), alpha=0.7)
ax.legend(molecules)

## Duration and Height

Organise the calculated properties into a Pandas DataFrame for easier handling

In [None]:
columns = ['Molecule', 'Event Height', 'Event Duration']

df = pd.DataFrame(columns=columns)

for i in range(len(molecules)):
    all_mol_evts = all_data[i][1]
    for j, trace_evts in enumerate(all_mol_evts):
        trace_data = all_data[i][0][:, j]
        for evt in trace_evts:
            dur = evt[1] - evt[0]
            if dur == 0:
                continue
            evt_points = trace_data[evt[0]:evt[1]]
            height = np.median(evt_points)
            row = pd.DataFrame([[molecules[i], height, dur]], columns=columns)
            
            df = pd.concat([df, row], ignore_index=True)
df

In [None]:
df['Event Duration'] = df['Event Duration'] / 25000 * 1000 # Convert durations from datapoints to milliseconds. Assumes 25kHz sampling rate

Displays both distributions and scatter plots for event height and duration on a joint plot

In [None]:
g = sns.JointGrid(height=5)
g.figure.set_dpi(600)

sns.scatterplot(data=df, x='Event Duration', y='Event Height', hue='Molecule', ax=g.ax_joint)
sns.histplot(data=df, x='Event Duration', hue='Molecule', ax=g.ax_marg_x, bins=59)
sns.histplot(data=df, y='Event Height', hue='Molecule', ax=g.ax_marg_y)

g.ax_joint.set_xlabel('Event Duration / ms', weight='bold')
g.ax_joint.set_ylabel('Event Height / nA', weight='bold')

g.ax_marg_x.get_legend().remove()
g.ax_marg_y.get_legend().remove()
g.ax_marg_x.set(ylim=(0, 1500))
g.ax_marg_y.set(xlim=(0, 3000))

g.ax_joint.tick_params(width=1.5)
for axis in ['top', 'right', 'bottom', 'left']:
    g.ax_joint.spines[axis].set_linewidth(1.5)
    
g.figure.tight_layout()
    
# g.figure.savefig('SimEvts.png')