In [None]:
import os, pickle

import numpy as np
import pandas as pd

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from umap import UMAP

from sklearn.cluster import KMeans

import matplotlib as mpl
import matplotlib.pyplot as plt

# Load Data

In [None]:
directory = 'INSERT DIRECTORY' # Directory to folder containing both outputs from BJ_filter.ipynb and BJ_ML_filter.ipynb

dfile = 'INSERT FILENAME' # BJ_filter.ipynb output file
cfile = 'INSERT FILENAME' # BJ_ML_filter.ipynb output file

# Load DataFrames
df_all = pd.read_pickle(os.path.join(directory, dfile)).reset_index()
clust_df = pd.read_pickle(os.path.join(directory, cfile))

# Select subset
df = df_all.loc[(df_all.trial == 'Trial6') & (df_all.passed == 1)]
df.shape

# Data Precprocessing

In [None]:
# Select clean cluster from chosen DR method found in BJ_ML_filter.ipynb
reducer = 'umap'
clusts = [0, 1, 2, 3, 5, 6, 7, 9]

selected = clust_df.loc[reducer, 'kmeans'].isin(clusts)
selected_df = df.loc[selected.values]
selected_df.head()

In [None]:
# Extract conductance and displacement data from selected cluster
logGs = np.vstack(selected_df.logG.values)
Zs = np.vstack(selected_df.Z.values)

fig, ax = plt.subplots()
h = ax.hist2d(Zs.flatten(), logGs.flatten(), bins=(133, 128), cmin=0, cmax=300, range=((0, 0.002), (-6, 0.3)))

In [None]:
def reduce_dims(traces, perp):
    '''
    Perform DR on chosen data subset
    '''
    pca = PCA(n_components=2, random_state=42)
    tsne = TSNE(perplexity=perp, random_state=42)
    umap = UMAP(n_neighbors=perp, min_dist=0.0, random_state=42)
    
    pca_red = pca.fit_transform(traces)
    tsne_red = tsne.fit_transform(traces)
    umap_red = umap.fit_transform(traces)
    
    return pca_red, tsne_red, umap_red

In [None]:
def extract_currents(x, y, xlims, resampling=False):
    '''
    Extract y values within x limits
    '''
    idxs = (x > xlims[0]) & (x < xlims[1])
    
    new_ys = []
    new_xs = []
    for i in range(idxs.shape[0]):
        this_idx = idxs[i]
        this_y = y[i]
        this_x = x[i]
        new_y = y[i, this_idx]
        new_x = x[i, this_idx]
        new_ys.append(new_y)
        new_xs.append(new_x)
    return np.array(new_xs), np.array(new_ys)
        
    

## Alignment and ROI Focussing

In [None]:
# Alignment and ROI
start_z, end_z = 0.000, 0.002

new_Zs, new_logGs = extract_currents(Zs, logGs, xlims=(start_z, end_z), resampling=True)

In [None]:
aligning = True
if aligning:
    old_logGs = logGs
    logGs = new_logGs
    
    old_Zs = Zs
    Zs = new_Zs

## Dimensionality Reduction

In [None]:
# Perform DR on aligned ROI
pca_red, tsne_red, umap_red = reduce_dims(logGs, perp=1100)

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(6, 2), dpi=600)
axs[0].scatter(pca_red[:, 0], pca_red[:, 1], s=0.5)
axs[1].scatter(tsne_red[:, 0], tsne_red[:, 1], s=0.5)
axs[2].scatter(umap_red[:, 0], umap_red[:, 1], s=0.5)

fig.tight_layout()

In [None]:
# Use a Lasso Selector to manually inspect a cluster.
from LassoSelector import SelectFromCollection
%matplotlib qt

data_to_cluster = tsne_red
fig, ax = plt.subplots()
pts = ax.scatter(data_to_cluster[:, 0], data_to_cluster[:, 1], s=5)
idxs = []

selector = SelectFromCollection(ax, pts)

def accept(event):
    if event.key == "enter":
        idxs = selector.ind
        passed_traces = np.vstack(df.iloc[idxs].logG.values)
        passed_Zs = np.vstack(df.iloc[idxs].Z.values)
        avg_trace = passed_traces.mean(axis=0)
        fig, ax = plt.subplots()
        ax.plot(avg_trace)
        # h = ax.hist2d(passed_Zs.flatten(), passed_traces.flatten(), bins=128, range=((0, 0.002), (-6, 0.3)), cmax=len(idxs))
        # ax.hist(passed_traces.flatten(), bins=128)
        # ax.set(ylim=(0, len(idxs)*10))

fig.canvas.mpl_connect('key_press_event', accept)
ax.set_title("Press enter to accept selected points.")
plt.show()

In [None]:
# Use a Data Picker to manually inspect a scatter point.
%matplotlib qt
data_to_cluster = tsne_red
fig, ax = plt.subplots()
tolerance = 1
pts = ax.scatter(data_to_cluster[:, 0], data_to_cluster[:, 1], s=5, picker=tolerance)
idxs = []

def on_pick(event):
    artist = event.artist
    xmouse, ymouse = event.mouseevent.xdata, event.mouseevent.ydata
    # x, y = artist.get_xdata(), artist.get_ydata()
    ind=event.ind
    traces = np.vstack(selected_df.iloc[ind].logG.values)
    Zs = np.vstack(selected_df.iloc[ind].Z.values)
    fig, ax = plt.subplots()
    ax.plot(Zs.T, traces.T)
    ax.set(xlim=(0, 0.002))

fig.canvas.callbacks.connect('pick_event', on_pick)

## Clustering

Cluster the 2D embeddings from the DR performed on aligned ROIs

In [None]:
%matplotlib inline

In [None]:
p_kmeans = KMeans(10, random_state=42).fit(pca_red)
t_kmeans = KMeans(10, random_state=42).fit(tsne_red)
u_kmeans = KMeans(10, random_state=42).fit(umap_red)

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(6, 2), dpi=600)
axs[0].scatter(pca_red[:, 0], pca_red[:, 1], s=0.5, c=p_kmeans.labels_)
axs[1].scatter(tsne_red[:, 0], tsne_red[:, 1], s=0.5, c=t_kmeans.labels_)
axs[2].scatter(umap_red[:, 0], umap_red[:, 1], s=0.5, c=u_kmeans.labels_)

In [None]:
# Inspects the calculated clusters
reducer = 'pca'
labels = [2, 6, 1]
fig, ax = plt.subplots()
if reducer == 'pca':
    kmean_clusts = p_kmeans.labels_
    selected = np.isin(kmean_clusts, labels)
    ax.scatter(pca_red[:, 0], pca_red[:, 1], c=selected)
if reducer == 'tsne':
    kmean_clusts = t_kmeans.labels_
    selected = np.isin(kmean_clusts, labels)
    ax.scatter(tsne_red[:, 0], tsne_red[:, 1], c=selected)
if reducer == 'umap':
    kmean_clusts = u_kmeans.labels_
    selected = np.isin(kmean_clusts, labels)
    ax.scatter(umap_red[:, 0], umap_red[:, 1], c=selected)

In [None]:
# Inspect grouping of clusters (example groupings shown)
kmean_clusts = p_kmeans.labels_

# clust1_labels = [0, 5]
# clust2_labels = [3, 6]
# clust3_labels = [4, 8]
# clust4_labels = [2, 7, 9]

clust1_labels = [0, 3, 4, 8]
clust2_labels = [1, 5, 7, 9]
clust3_labels = [2, 6]

num_traces = tsne_red.shape[0]
new_labels = np.zeros(num_traces)

# new_labels = new_labels + (1 * np.isin(kmean_clusts, clust1_labels))
# new_labels = new_labels + (2 * np.isin(kmean_clusts, clust2_labels))
# new_labels = new_labels + (3 * np.isin(kmean_clusts, clust3_labels))
# new_labels = new_labels + (4 * np.isin(kmean_clusts, clust4_labels))

new_labels = new_labels + (0 * np.isin(kmean_clusts, clust1_labels))
new_labels = new_labels + (1 * np.isin(kmean_clusts, clust2_labels))
new_labels = new_labels + (2 * np.isin(kmean_clusts, clust3_labels))

plt.scatter(pca_red[:, 0], pca_red[:, 1], c=new_labels)

## Average BJ Traces
Creates average BJ traces based on how clusters were grouped above

In [None]:
mean_traces = []
mean_Zs = []
for label in np.unique(new_labels):
    idxs = np.where(new_labels == label)
    vals = logGs[idxs].mean(axis=0)
    zvals = Zs[idxs].mean(axis=0)
    mean_traces.append(vals)
    mean_Zs.append(zvals)

In [None]:
unique_labels = np.unique(new_labels)
num_clusters = len(unique_labels)

cmap = mpl.cm.get_cmap('viridis')
colors = [cmap(x) for x in np.linspace(0, 1, num_clusters)]
artists = []
fig, ax = plt.subplots(figsize=(6, 4), dpi=600)
for i in range(num_clusters):
    label = int(unique_labels[i])
    print(label)
    
    art = ax.plot(np.array(mean_Zs)[label].T, np.array(mean_traces)[label].T, color=colors[label])
    artists.append(art)
ax.set(xlim=(0, 0.001))
    
# ax.legend(artists)

# Figures

In [None]:
# Plots both the grouped clusters alongside their corresponding average BJ traces
fig, axs = plt.subplots(1, 2, figsize=(6, 3), dpi=600)

# axs[0].scatter(tsne_red[:, 0], tsne_red[:, 1], c=new_labels, s=1)
axs[0].scatter(pca_red[:, 0], pca_red[:, 1], c=new_labels, s=1)

for i in range(num_clusters):
    label = int(unique_labels[i])
    print(label)
    
    axs[1].plot(np.array(mean_Zs)[label].T, np.array(mean_traces)[label].T, color=colors[label])
    # axs[1].hist(np.array)

axs[0].set()
axs[1].set(xlim=(-0.000, 0.0015), ylim=(-6, 0.3))

axs[1].set_ylabel('log(G/G0)', weight='bold')
axs[1].set_xlabel('Displacement / {}m'.format(chr(956)), weight='bold')

for axis in ['top', 'right', 'bottom', 'left']:
    axs[0].spines[axis].set_linewidth(1.5)
    axs[1].spines[axis].set_linewidth(1.5)
    
axs[0].annotate('(a)', xy=(-0.2, 1.05), xytext=(0, 0), xycoords='axes fraction', textcoords='offset pixels', weight='bold')
axs[1].annotate('(b)', xy=(-0.2, 1.05), xytext=(0, 0), xycoords='axes fraction', textcoords='offset pixels', weight='bold')
    
fig.tight_layout()
fig.savefig("BPY Clean AlignROI DR Avg Traces.png")


In [None]:
# Plot of DR outputs after alignment and ROI focussing
fig, axs = plt.subplots(1, 3, figsize=(6, 2), dpi=600)

axs[0].scatter(pca_red[:, 0], pca_red[:, 1], s=1)#, c=p_kmeans.labels_)
axs[1].scatter(tsne_red[:, 0], tsne_red[:, 1], s=1)#, c=t_kmeans.labels_)
axs[2].scatter(umap_red[:, 0], umap_red[:, 1], s=1)#, c=u_kmeans.labels_)


axs[0].tick_params(width=1.5)
axs[1].tick_params(width=1.5)
axs[2].tick_params(width=1.5)
for axis in ['top', 'right', 'bottom', 'left']:
    axs[0].spines[axis].set_linewidth(1.5)
    axs[1].spines[axis].set_linewidth(1.5)
    axs[2].spines[axis].set_linewidth(1.5)

labels = ['(a)', '(b)','(c)']
for i in range(len(axs)):
    axs[i].annotate(labels[i], xy=(-0.25, 1.05), xytext=(0, 0), xycoords='axes fraction', textcoords='offset pixels', weight='bold')

fig.tight_layout()
# fig.savefig("BPY Clean AlignROI DR.png")