In [None]:
import os, sys

import numpy as np
import pandas as pd

from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

# Load Data

In [None]:
directory = 'INSERT DIRECTORY' # Directory to clean Rotaxane data generated by BJ_filter.ipynb
file = 'INSERT FILENAME'
# Load DataFrame
all_df = pd.read_pickle(os.path.join(directory, file)).reset_index(drop=True)
all_df.head()

# Data Preprocessing

In [None]:
# Select data subset and extract BJ propoerties
df = all_df.loc[all_df.trial == 'Trial 2']
logGs = np.vstack(df.logG.values)
Zs = np.vstack(df.Z.values)
lens = np.vstack(df.PlateauLen.values)
logGs.shape

In [None]:
# Analysing clean or raw data?
clean = True
if clean:
    df = df.loc[df.passed == 1]
    logGs = np.vstack(df.logG.values)
    Zs = np.vstack(df.Z.values)
    lens = np.vstack(df.PlateauLen.values)
logGs.shape

# Dimensionality Reduction

In [None]:
pca = PCA(random_state=42)
pca_red = pca.fit_transform(logGs)

In [None]:
plt.scatter(pca_red[:, 0], pca_red[:, 1], s=1)

# Manually Select Cluster

In [None]:
from LassoSelector import SelectFromCollection
%matplotlib qt

data_to_cluster = pca_red
fig, ax = plt.subplots()
pts = ax.scatter(data_to_cluster[:, 0], data_to_cluster[:, 1], s=5)
idxs = []

selector = SelectFromCollection(ax, pts)

def accept(event):
    if event.key == "enter":
        idxs = selector.ind
        passed_traces = np.vstack(df.iloc[idxs].logG.values)
        passed_Zs = np.vstack(df.iloc[idxs].Z.values)
                              
        fig, ax = plt.subplots()
        h = ax.hist2d(passed_Zs.flatten(), passed_traces.flatten(), bins=128)

fig.canvas.mpl_connect('key_press_event', accept)
ax.set_title("Press enter to accept selected points.")
plt.show()

In [None]:
selected = selector.ind

In [None]:
# Visualise selected data points
%matplotlib inline
clusts = np.zeros(len(logGs))
clusts[selected] = 1
fig, ax = plt.subplots()
ax.scatter(pca_red[:, 0], pca_red[:, 1], s=1, c=clusts)

In [None]:
# extract traces from selection
selected_logG = logGs[selected, :]
selected_Z = Zs[selected, :]
selected_lens = lens[selected, :]

# Plot Histograms

In [None]:
# Constants
logG_bins = 128
z_bins = 115

logG_max, logG_min = 0.8, -6
z_max, z_min = 30E-3, -1.0E-3

logG_max_counts = 40000
logGz_max_counts = 1500

In [None]:
# Plot joint figure of histograms with corresponding cluster selection from 2D embedding
fig = plt.figure(figsize=(6, 3), dpi=600)

gs0 = gridspec.GridSpec(1, 2, figure=fig, wspace=0.3)
gs1 = gridspec.GridSpecFromSubplotSpec(2, 2, subplot_spec=gs0[1], width_ratios=(2, 1), height_ratios=(2, 1))
ax1 = fig.add_subplot(gs0[0])
ax2 = fig.add_subplot(gs1[0, 0])
ax3 = fig.add_subplot(gs1[1, 0])
ax4 = fig.add_subplot(gs1[0, 1])

norm = plt.Normalize(0, logGz_max_counts)
h1 = ax2.hist2d(selected_Z.flatten(), selected_logG.flatten(), bins=(z_bins, logG_bins), range=((z_min, z_max), (logG_min, logG_max)), cmap='Blues', norm=norm)

sm = plt.cm.ScalarMappable(cmap='Blues', norm=norm)
sm.set_array([])
cax = ax2.inset_axes([0.75, 0.48, 0.05, 0.47])
cbar = plt.colorbar(sm, ax=ax2, cax=cax)
cax.tick_params(axis='y', labelsize=6, right=False, length=-2)#, color='White', )#, labelcolor='White')

bin_edges = np.histogram_bin_edges(selected_logG.flatten(), bins=logG_bins)
bin_mids = bin_edges[:-1] + (np.diff(bin_edges) / 2)
counts, _ = np.histogram(selected_logG.flatten(), bins=bin_edges)
ax4.barh(bin_mids, counts, height=np.diff(bin_edges))
ax4.set(xlim=(0, logG_max_counts), ylim=(logG_min, logG_max))

bin_edges = np.linspace(z_min, z_max, z_bins+1)
bin_mids = bin_edges[:-1] + (np.diff(bin_edges) / 2)
counts, _ = np.histogram(selected_lens[~pd.isnull(selected_lens)], bins=bin_edges)
ax3.bar(bin_mids, counts, width=np.diff(bin_edges))
ax3.set(xlim=(z_min, z_max))

ax1.scatter(pca_red[:, 0], pca_red[:, 1], c=clusts, s=2)


ax2.set(xticklabels=[])
ax4.set(xticklabels=[], yticklabels=[], xticks=[])
ax3.set(yticklabels=[], yticks=[])

ax2.set_ylabel('log(G/G0)', weight='bold')
ax3.set_ylabel('Counts', weight='bold')
ax3.set_xlabel('Displacement /' + chr(956) +  'm', weight='bold')
ax4.set_xlabel('Counts', weight='bold')

for axis in ['top', 'right', 'bottom', 'left']:
    ax1.spines[axis].set_linewidth(1.5)
    ax2.spines[axis].set_linewidth(1.5)
    ax3.spines[axis].set_linewidth(1.5)
    ax4.spines[axis].set_linewidth(1.5)

ax1.annotate('(a)', xy=(-0.2, 1.05), xytext=(0, 0), xycoords='axes fraction', textcoords='offset pixels', weight='bold')
ax2.annotate('(b)', xy=(-0.35, 1.05), xytext=(0, 0), xycoords='axes fraction', textcoords='offset pixels', weight='bold')
ax4.annotate('(c)', xy=(-0.3, 1.05), xytext=(0, 0), xycoords='axes fraction', textcoords='offset pixels', weight='bold')
ax3.annotate('(d)', xy=(-0.35, 1.05), xytext=(0, 0), xycoords='axes fraction', textcoords='offset pixels', weight='bold')

fig.savefig("Rot Clean+ PCA.png", bbox_inches='tight')

# Alignment

In [None]:
def compute_gradients_two(x, y, windows_size):
    '''
    Returns the difference between the gradients of two windows gliding together through a 2D trace.
    '''
    diffs = []
    for i in range(len(y) - 2 * windows_size):
        past_x, future_x = x[i : i + windows_size], x[i + windows_size : i + 2*windows_size]
        past_y, future_y = y[i : i + windows_size], y[i + windows_size : i + 2*windows_size]
        
        past_m, _ = np.linalg.lstsq(np.vstack([past_x, np.ones(len(past_x))]).T, past_y, rcond=None)[0]
        future_m, _ = np.linalg.lstsq(np.vstack([future_x, np.ones(len(future_x))]).T, future_y, rcond=None)[0]
        
        diff = future_m - past_m
        
        diffs.append(diff)
    return diffs
        

In [None]:
windows_size = 100
# Calculate the numerical 2nd derivative
all_diffs = []
for i in range(selected_logG.shape[0]):
    diffs = compute_gradients_two(selected_Z[i, :], selected_logG[i, :], windows_size)
    all_diffs.append(diffs)
all_diffs = np.array(all_diffs)
all_diffs.shape

In [None]:
# Visualise the numerical 2nd derivatives on 2D histogram
fig, ax = plt.subplots()
# ax.hist2d(selected_Z[:, windows_size:-windows_size].flatten(), all_diffs.flatten(), bins=400, cmin=0, cmax=200) # x axis as displacement
ax.hist2d(np.tile(np.linspace(0, 1800, 1800), all_diffs.shape[0]), all_diffs.flatten(), bins=400, cmin=0, cmax=200) # x axis as indices
ax.set(xlabel="Displacement / um", ylabel="d^2log(G/G0)/dx^2")
# ax.set(ylim=(-1000, 1000), xlim=(0, 0.015))

## Perform Alignment
Aligns all 2nd derivative traces at the first occurence of the chosen threshold

In [None]:
plat_starts = []
search_start = 250 # skip parameter
threshold = 100 # y value to align at

# for each trace
for i in range(len(all_diffs)):
    trace = all_diffs[i, :]
    x = selected_Z[i, :]
    
    j = search_start
    # iterates through the 2nd derivatives
    while j < len(trace):
        diff = trace[j]
        if diff > threshold:
            # store current location
            offset = x[j + windows_size]
            plat_starts.append(offset)
            break
        elif j == len(trace) - 1:
            offset = x[0]
            plat_starts.append(offset)
            break
        j += 1

# alignment x offsets
plat_starts = np.array(plat_starts)

In [None]:
# align using the offsets
align_xs = []
for i in range(selected_logG.shape[0]):
    align_xs.append(selected_Z[i] - plat_starts[i])
    
align_xs = np.array(align_xs)
align_xs.shape

In [None]:
# Visualise alignment on 2D histogram
fig, ax = plt.subplots()
ax.hist2d(align_xs[:, windows_size : -windows_size].flatten(), all_diffs.flatten(), bins=400, cmin=0, cmax=100)
ax.set(xlabel="Displacement / um", ylabel="d^2log(G/G0)/dx^2")

In [None]:
# Plot a comparison of the numerical 2nd derivative before and after alignment
fig, axs = plt.subplots(1, 2, figsize=(6, 4), dpi=600, sharey=True)

norm1 = plt.Normalize(0, 60)
norm2 = plt.Normalize(0, 60)

h1 = axs[0].hist2d(selected_Z[:, windows_size : -windows_size].flatten(), all_diffs.flatten(), bins=400, cmap='Blues', norm=norm1)
h2 = axs[1].hist2d(align_xs[:, windows_size : -windows_size].flatten(), all_diffs.flatten(), bins=400, cmap='Blues', norm=norm2)

axs[0].set_xlabel('Displacement / {}m'.format(chr(956)), weight='bold')
axs[1].set_xlabel('Displacement / {}m'.format(chr(956)), weight='bold')

axs[0].set_ylabel('d\u00b2log(G/G\u2080) / dx\u00b2 / {}m\u207b\u00b2'.format(chr(956)), weight='bold')

axs[0].annotate('(a)', xy=(-0.15, 1.05), xytext=(0, 0), xycoords='axes fraction', textcoords='offset pixels', weight='bold')
axs[1].annotate('(b)', xy=(-0.15, 1.05), xytext=(0, 0), xycoords='axes fraction', textcoords='offset pixels', weight='bold')

for axis in ['top', 'right', 'bottom', 'left']:
    axs[0].spines[axis].set_linewidth(1.5)
    axs[1].spines[axis].set_linewidth(1.5)
    
sm1 = plt.cm.ScalarMappable(cmap='Blues', norm=norm1)
sm1.set_array([])
cax1 = axs[0].inset_axes([0.80, 0.7, 0.05, 0.25])
cbar1 = plt.colorbar(sm1, ax=axs[0], cax=cax1)
cax1.tick_params(axis='y', labelsize=6, right=False, length=-2)

sm2 = plt.cm.ScalarMappable(cmap='Blues', norm=norm2)
sm2.set_array([])
cax2 = axs[1].inset_axes([0.80, 0.7, 0.05, 0.25])
cbar2 = plt.colorbar(sm2, ax=axs[1], cax=cax2)
cax2.tick_params(axis='y', labelsize=6, right=False, length=-2)

# fig.savefig("Rot Second Diff.png", bbox_inches='tight')

In [None]:
# Visualise the alignment back in conductance space
fig, ax = plt.subplots()
ax.hist2d(align_xs.flatten(), selected_logG.flatten(), bins=400, cmin=0, cmax=1 * len(selector.ind))
ax.set(xlabel="Displacement / um", ylabel="log(G/G0)")
# ax.set(xlim=(-0.01, 0.02))

# ROI Focussing

In [None]:
def extract_currents(x, y, xlims, resampling=False):
    '''
    Extracts the y within a chosen x range
    '''
    idxs = (x > xlims[0]) & (x < xlims[1])
    
    new_ys = []
    new_xs = []
    for i in range(idxs.shape[0]):
        this_idx = idxs[i]
        this_y = y[i]
        this_x = x[i]
        new_y = y[i, this_idx]
        new_x = x[i, this_idx]
        new_ys.append(new_y)
        new_xs.append(new_x)
    return np.array(new_xs), np.array(new_ys)

In [None]:
# extract the conductance values within a chosen displacement range
zlims = (0, 0.010)

new_Zs, new_logGs = extract_currents(align_xs, selected_logG, zlims)
plats = selected_lens.flatten() - plat_starts

# Visualise Result of alignment and ROI focussing

In [None]:
# Histogram constants
logG_bins = 128
z_bins = 115

logG_max, logG_min = 0.8, -6
z_max, z_min = 12E-3, -0.0E-3

logG_max_counts = 30000
logGz_max_counts = 1000

In [None]:
# Break Junction histograms using the ROI and aligment
fig, axs = plt.subplots(2, 2, figsize=(3, 3), dpi=600, gridspec_kw={'width_ratios':[2, 1], 'height_ratios':[2, 1]})
axs[1, 1].remove()

norm = plt.Normalize(0, logGz_max_counts)
# h1 = axs[0, 0].hist2d(Zs.flatten(), logGs.flatten(), bins=(z_bins, logG_bins), cmin=0, cmax=logGz_max_counts, range=((z_min, z_max), (logG_min, logG_max)), cmap='Blues')
h1 = axs[0, 0].hist2d(new_Zs.flatten(), new_logGs.flatten(), bins=(z_bins, logG_bins), range=((z_min, z_max), (logG_min, logG_max)), cmap='Blues', norm=norm)

bin_edges = np.histogram_bin_edges(new_logGs.flatten(), bins=logG_bins)
bin_mids = bin_edges[:-1] + (np.diff(bin_edges) / 2)
counts, _ = np.histogram(new_logGs.flatten(), bins=bin_edges)
axs[0, 1].barh(bin_mids, counts, height=np.diff(bin_edges))
axs[0, 1].set(xlim=(0, logG_max_counts), ylim=(logG_min, logG_max))

bin_edges = np.linspace(z_min, z_max, z_bins+1)
bin_mids = bin_edges[:-1] + (np.diff(bin_edges) / 2)
counts, _ = np.histogram(plats[~pd.isnull(plats)], bins=bin_edges)
axs[1, 0].bar(bin_mids, counts, width=np.diff(bin_edges))
axs[1, 0].set(xlim=(z_min, z_max))

axs[0, 0].set(xticklabels=[])
axs[0, 1].set(xticklabels=[], yticklabels=[], xticks=[])
axs[1, 0].set(yticklabels=[], yticks=[])

axs[0, 0].set_ylabel('log(G/G0)', weight='bold')
axs[1, 0].set_ylabel('Counts', weight='bold')
axs[1, 0].set_xlabel('Displacement /' + chr(956) +  'm', weight='bold')
axs[0, 1].set_xlabel('Counts', weight='bold')


sm = plt.cm.ScalarMappable(cmap='Blues', norm=norm)
sm.set_array([])

cax = axs[0, 0].inset_axes([0.75, 0.48, 0.05, 0.47])
cbar = plt.colorbar(sm, ax=axs[0, 0], cax=cax)
cax.tick_params(axis='y', labelsize=6, right=False, length=-2)#, color='White', )#, labelcolor='White')

axs[0, 0].tick_params(width=1.5)
axs[0, 1].tick_params(width=1.5)
axs[1, 0].tick_params(width=1.5)
for axis in ['top', 'right', 'bottom', 'left']:
    axs[0, 0].spines[axis].set_linewidth(1.5)
    axs[0, 1].spines[axis].set_linewidth(1.5)
    axs[1, 0].spines[axis].set_linewidth(1.5)
    
axs[0, 0].annotate('(a)', xy=(-0.35, 1.05), xytext=(0, 0), xycoords='axes fraction', textcoords='offset pixels', weight='bold')
axs[0, 1].annotate('(b)', xy=(-0.3, 1.05), xytext=(0, 0), xycoords='axes fraction', textcoords='offset pixels', weight='bold')
axs[1, 0].annotate('(c)', xy=(-0.35, 1.05), xytext=(0, 0), xycoords='axes fraction', textcoords='offset pixels', weight='bold')
    
# plt.subplots_adjust(left=-0.2, right=1.2, bottom=0, top=1)

# fig.savefig("Rot Clean ROI.png", bbox_inches='tight')

# Save the aligned ROI focussed dataset

In [None]:
columns = ['logG', 'Z', 'PlateauLen']
clean_df = pd.DataFrame(columns=columns)
for i in range(new_logGs.shape[0]):
    vals = [[new_logGs[i, :]], [new_Zs[i, :]], plats[i]]
    entry_df = pd.DataFrame(dict(zip(columns, vals)))
    clean_df = pd.concat((clean_df, entry_df))
clean_df.head()

In [None]:
clean_df.to_pickle('INSERT FILENAME')