In [1]:
import logging
logging.basicConfig(level=logging.INFO)

import sys
sys.path.insert(0, "D:/IIT/DDP/DDP/B-SOID/")
import random
import numpy as np
import seaborn as sns 

from BSOID.bsoid import *
from BSOID.preprocessing import *
from BSOID.features import *
from behavelet import wavelet_transform



In [2]:
%matplotlib notebook
def plot(X, **kwargs):
    plt.scatter(X[:,0], X[:,1], **kwargs)
    plt.show()

# Embedding for Visualization

In [None]:
def extract_feats(filtered_data, fps, stride_window):
    x_raw, y_raw = filtered_data['x'], filtered_data['y']
    assert x_raw.shape == y_raw.shape
    N, n_dpoints = x_raw.shape

#     win_len = np.int(np.round(0.05 / (1 / fps)) * 2 - 1) if stride_window  is None else stride_window // 2
    win_len = 3
    x, y = np.zeros_like(x_raw), np.zeros_like(y_raw)
    for i in range(n_dpoints):
        x[:,i] = smoothen_data(x_raw[:,i], win_len)
        y[:,i] = smoothen_data(y_raw[:,i], win_len)
    
#     disp = np.linalg.norm(np.array([x[1:,:] - x[0:N-1,:], y[1:,:] - y[0:N-1,:]]), axis=0)
    links = [np.array([x[:,i] - x[:,j], y[:,i] - y[:,j]]).T for i, j in combinations(range(n_dpoints), 2)]
    link_lens = np.vstack([np.linalg.norm(link, axis=1) for link in links]).T
    link_angles = np.vstack([np.arctan2(link[:,1], link[:,0]) for i, link in enumerate(links)]).T
    
    if stride_window > 1:
        link_lens = windowed_feats(link_lens, stride_window, mode="mean")
        link_angles = windowed_feats(link_angles, stride_window, mode="sum")
#         disp = windowed_feats(disp, stride_window, mode="sum")

    return np.hstack((link_angles, link_lens))

def subsample(data, num_points):
    N = data.shape[0]
    skipLength = np.floor(N / num_points).astype(int)
    if skipLength == 0:
        skipLength = 1
        numPoints = N
    firstFrame = N % num_points
    data = data[firstFrame:int(firstFrame + (num_points) * skipLength): skipLength]
    return data

def strided_subsample(data, stride_window):
    N = data.shape[0]
    data = data[0:-1:stride_window]
    return data

In [3]:
bsoid = BSOID("../config/config.yaml")
fdata = bsoid.load_filtered_data()
# fdata = fdata[random.sample(list(fdata.keys()), 1)[0]]
fdata = fdata["C57BL/6NJ"]
feats = np.vstack(Parallel(n_jobs=-1)(delayed(extract_comb_feats)(data, bsoid.fps, 3) for data in fdata))
del fdata
feats.shape

    Run ID       : dis
 Save Location   : D:/IIT/DDP/data\dis/output
      FPS        : 30
 Min. Confidence : 0.3
  Stride Window  : 333ms



(179990, 56)

In [None]:
def par_wav_trans(x, numPoints):
    x = wavelet_transform(x.reshape(-1,1), n_freqs=25, fmax=15, fmin=0.15, fsample=30)[2]
    return strided_subsample(x, numPoints)
wav = np.hstack(Parallel(n_jobs=3)(delayed(par_wav_trans)(feats[:,i], 3) for i in range(28, feats.shape[1])))
wav.shape

In [None]:
from numba import njit
@njit(fastmath=True)
def KL(x, y):
    x, y = x / x.sum(), y / y.sum()
    logx, logy = np.log(x), np.log(y)
    logx[~np.isfinite(logx)] = 0
    logy[~np.isfinite(logy)] = 0
    
    D = np.sum(x * logx) - np.sum(x * logy)
    D /= np.log(2)
    return np.square(D)

In [5]:
# from openT
umap_params = {"n_neighbors": 90, "min_dist": 0.0, "metric": "euclidean", "verbose": True}
embed = umap.UMAP(n_components=12, **umap_params).fit_transform(StandardScaler().fit_transform(feats[:,:28]))
assignments, _, soft_assignments, _ = cluster_with_hdbscan(embed[~np.isnan(embed).any(axis=1)], [0.4, 1.2, 9], bsoid.hdbscan_params)

UMAP(dens_frac=0.0, dens_lambda=0.0, min_dist=0.0, n_components=12,
     n_neighbors=90, verbose=True)
Construct fuzzy simplicial set
Wed May 12 23:01:22 2021 Finding Nearest Neighbors
Wed May 12 23:01:22 2021 Building RP forest with 26 trees
Wed May 12 23:01:23 2021 NN descent for 17 iterations
	 1  /  17
	 2  /  17
	 3  /  17
	Stopping threshold met -- exiting after 3 iterations
Wed May 12 23:03:16 2021 Finished Nearest Neighbor Search
Wed May 12 23:03:21 2021 Construct embedding
	completed  0  /  200 epochs
	completed  20  /  200 epochs
	completed  40  /  200 epochs
	completed  60  /  200 epochs
	completed  80  /  200 epochs
	completed  100  /  200 epochs
	completed  120  /  200 epochs
	completed  140  /  200 epochs
	completed  160  /  200 epochs
	completed  180  /  200 epochs
Wed May 12 23:06:45 2021 Finished embedding


INFO:root:identified 4 clusters (max is 4) with min_sample_prop=0.4 and entropy_ratio=0.153
INFO:root:identified 4 clusters (max is 4) with min_sample_prop=0.5 and entropy_ratio=0.153
INFO:root:identified 4 clusters (max is 4) with min_sample_prop=0.6 and entropy_ratio=0.153
INFO:root:identified 4 clusters (max is 4) with min_sample_prop=0.7 and entropy_ratio=0.153
INFO:root:identified 4 clusters (max is 4) with min_sample_prop=0.8 and entropy_ratio=0.153
INFO:root:identified 3 clusters (max is 4) with min_sample_prop=0.9 and entropy_ratio=0.182
INFO:root:identified 3 clusters (max is 4) with min_sample_prop=1.0 and entropy_ratio=0.182
INFO:root:identified 2 clusters (max is 4) with min_sample_prop=1.1 and entropy_ratio=0.209
INFO:root:identified 2 clusters (max is 4) with min_sample_prop=1.2 and entropy_ratio=0.209


In [None]:
vis_embed = umap.UMAP(n_components=2, **umap_params).fit_transform(StandardScaler().fit_transform(feats))
plot(vis_embed[~np.isnan(embed).any(axis=1)], c=soft_assignments, s=0.1)