In [None]:
import os, pickle

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

Merger of all the trialed analysis processes for a final result of molecules 1 and 2

# Load Data

In [None]:
ope_directory = 'INSERT DIRECTORY' # Directory containing the cluster and cleaned data files for molecule 1
bpy_directory = 'INSERT DIRECTORY' # Directory containing the cluster and cleaned data files for molecule 2

dfile = 'all_traces_clean_df_2_1.pkl' # Cleaned data file names
cfile = 'clean_clust_df_2_1.pkl' # Cluster data file names

# Load DataFrames
ope_df_all = pd.read_pickle(os.path.join(ope_directory, dfile)).reset_index(drop=True)
bpy_df_all = pd.read_pickle(os.path.join(bpy_directory, dfile)).reset_index(drop=True)

ope_clust_df = pd.read_pickle(os.path.join(ope_directory, cfile))
bpy_clust_df = pd.read_pickle(os.path.join(bpy_directory, cfile))

In [None]:
# Select Subsets
ope_df = ope_df_all.loc[(ope_df_all.trial != 'Trial4') & (ope_df_all.passed == 1)]
bpy_df = bpy_df_all.loc[(bpy_df_all.trial == 'Trial6') & (bpy_df_all.passed == 1)]

In [None]:
## ope best pca, bpy best umap
ope_red = 'pca'
bpy_red = 'umap'

ope_clusts = [1, 4]
bpy_clusts = [0, 1, 2, 3, 5, 6, 7, 9]

ope_selected = ope_clust_df.loc[ope_red, 'kmeans'].isin(ope_clusts)
bpy_selected = bpy_clust_df.loc[bpy_red, 'kmeans'].isin(bpy_clusts)

ope_logG = np.vstack(ope_df.loc[ope_selected.values].logG.values)
bpy_logG = np.vstack(bpy_df.loc[bpy_selected.values].logG.values)

ope_lens = np.vstack(ope_df.loc[ope_selected.values].PlateauLen.values)
bpy_lens = np.vstack(bpy_df.loc[bpy_selected.values].PlateauLen.values)


# Plot Figure

In [None]:
# histogram constants
logG_bins = 128
z_bins = 116
logG_min, logG_max = -6, 0.8

from scipy.optimize import curve_fit

def gauss(x, A, mu, sig):
    y = A * np.exp((-1 * (x - mu) ** 2) / (2 * sig**2))
    return y
    
def gauss2(x, A, mu1, sig1, B, mu2, sig2):
    return gauss(x, A, mu1, sig1) + gauss(x, B, mu2, sig2)

def fit_dist(func, x, y, p0, lims):
    '''
    Apply a curve fit with the given function
    '''
    idxs = (x > lims[0]) & (x < lims[1])
    x = x[idxs]
    y = y[idxs]
    params, cov = curve_fit(func, x, y)
    return params

def plot_logG_hist(logGs, logG_max_counts, fit_func, p0, lims, ax, p0_double=None, lims_double=None, double_fit=False):
    '''
    Plot a conductance histogram with gaussian fit on the provided axis
    '''
    bin_edges = np.histogram_bin_edges(logGs.flatten(), bins=logG_bins)
    bin_mids = bin_edges[:-1] + (np.diff(bin_edges) / 2)
    counts, _ = np.histogram(logGs.flatten(), bins=bin_edges)
    
    ax.bar(bin_mids, counts, width=np.diff(bin_edges))
    
    if double_fit:
        params1 = fit_dist(fit_func, bin_mids, counts, p0, lims)
        params2 = fit_dist(fit_func, bin_mids, counts, p0_double, lims_double)
        print(params1)
        print(params2)
        
        y1 = fit_func(bin_mids, *params1)
        y2 = fit_func(bin_mids, *params2)
        
        ax.plot(bin_mids, y1, color='red')
        ax.plot(bin_mids, y2, color='red')
        
    else:
        params = fit_dist(fit_func, bin_mids, counts, p0, lims)
        print(params)
        ax.plot(bin_mids, fit_func(bin_mids, *params), color='red')
    
    ax.set(xlim=(logG_min, logG_max), ylim=(0, logG_max_counts))
    
def plot_len_hist(lens, z_min, z_max, fit_func, p0, lims, ax):
    '''
    Plot a plateau length histogram on the provided axis
    '''
    bin_edges = np.linspace(z_min, z_max, z_bins+1)
    bin_mids = bin_edges[:-1] + (np.diff(bin_edges) / 2)
    counts, _ = np.histogram(lens, bins=bin_edges)
    
    params = fit_dist(fit_func, bin_mids, counts, p0, lims)
    print(params)
    ax.plot(bin_mids, fit_func(bin_mids, *params), color='red')
    
    ax.bar(bin_mids, counts, width=np.diff(bin_edges))

In [None]:
# Plot Histograms of conductance and plateau lengths for both molecules 1 and 2 with applied gaussian fits
fig, axs = plt.subplots(2, 2, figsize=(6, 4), dpi=600)

plot_logG_hist(ope_logG, 3000, ax=axs[0, 0], fit_func=gauss, p0=(2000, -4, 0.5), lims=(-4, -1))
plot_logG_hist(bpy_logG, 15000, ax=axs[1, 0], fit_func=gauss, p0=(3000, -3.7, 0.05), lims=(-4, -3.5), double_fit=True, p0_double=(6000, -3, 0.5), lims_double=(-3.5, -2.5))

plot_len_hist(ope_lens, 0, 5E-3, ax=axs[0, 1], fit_func=gauss, p0=(7, 0.0021, 0.0002), lims=(0, 0.005))
plot_len_hist(bpy_lens, 0, 5E-3, ax=axs[1, 1], fit_func=gauss, p0=(20, 0.001, 0.0001), lims=(0, 0.002))

axs[0, 0].set(xticklabels=[], yticks=[])
axs[0, 1].set(xticklabels=[], yticks=[])
axs[1, 0].set(yticks=[])
axs[1, 1].set(yticks=[])

axs[0, 0].set_ylabel("Counts", weight='bold')
axs[1, 0].set_ylabel("Counts", weight='bold')
axs[1, 0].set_xlabel("log(G/G0)", weight='bold')
axs[1, 1].set_xlabel("Displacement / {}m".format(chr(956)), weight='bold')


labels = ['(a)', '(b)', '(c)', '(d)']
for i, ax in enumerate(axs.flatten()):
    ax.annotate(labels[i], xy=(-0.1, 1.05), xytext=(0, 0), xycoords='axes fraction', textcoords='offset pixels', weight='bold')
    for axis in ['top', 'right', 'bottom', 'left']:
        ax.spines[axis].set_linewidth(1.5)


fig.tight_layout()
# fig.savefig("Molecule Results.png", bbox_inches='tight')