In [1]:
import morphs
%matplotlib inline
import seaborn as sns
import pandas as pd
import numpy as np
import itertools



In [2]:
accuracies, cluster_accuracies = morphs.load.cluster_accuracies()
good_recs = morphs.data.accuracies.good_recs(cluster_accuracies)

In [3]:
# try to find nice block used below, else use last one found
for block_path in morphs.paths.blocks():
    if 'Pen01_Lft_AP2500_ML750__Site04_Z2300__B1101_cat_P01_S04_3' in block_path:
        break
subj = morphs.data.parse.bird_id(block_path)
good_clusters = morphs.data.accuracies.good_clusters(block_path)

In [4]:
spikes = morphs.load.ephys_data(block_path, good_clusters=good_clusters)

In [None]:
X, labels = morphs.spikes.create_neural_rep(spikes, max_id_len=5)
X_red = morphs.data.neurometric.logistic_dim_reduction(X, labels)
# psychometric_params = morphs.load.psychometric_params()

In [None]:
spects = morphs.load.morph_spectrograms()

In [None]:
spect_reps = {'%s%s%03d' % (l, g, i): spects[l][g][i][:,:,0] for l in spects for g in spects[l] for i in spects[l][g]}

In [None]:
def parse_morph_dim(df, morph_dim='morph_dim', lesser_dim='lesser_dim', greater_dim='greater_dim'):
    df[lesser_dim] = df[morph_dim].str[0]
    df[greater_dim] = df[morph_dim].str[1]

In [None]:
label_df = pd.DataFrame(data={'stim_id':labels})
morphs.data.parse.stim_id(label_df)

In [None]:
df_list = []
for morph_dim, group in label_df.groupby('morph_dim'):
    df_list.append(pd.DataFrame.from_records([(morph_dim, i1, i2) for i1, i2 in itertools.combinations(group.index.values, 2)], 
                                             columns=('morph_dim', 'lesser_index', 'greater_index')))
pair_df = pd.concat(df_list, ignore_index=True)
parse_morph_dim(pair_df)

In [None]:
for index in ['greater', 'lesser']:
    pair_df[index + '_morph_pos'] = label_df.loc[pair_df[index + '_index'].values, 'morph_pos'].values

In [None]:
pair_df['morph_dist'] = pair_df['greater_morph_pos'] - pair_df['lesser_morph_pos']

In [None]:
pair_df['spect_euclidean_dist'] = (pair_df['morph_dim'].str.cat(pair_df['greater_morph_pos'].map(lambda x: '%03d' % (x))).map(spect_reps) - 
                                           pair_df['morph_dim'].str.cat(pair_df['lesser_morph_pos'].map(lambda x: '%03d' % (x))).map(spect_reps)).apply(np.linalg.norm)

In [None]:
def blocked_norm(arr, block_size=2000, out=None):
    if out is None:
        ret = np.empty(arr.shape[0])
    else:
        ret = out
    for i in range(0, arr.shape[0], block_size):
        u = min(i + block_size, arr.shape[0])
        ret[i:u] = np.linalg.norm(arr[i:u], axis=1)
    return ret

In [None]:
def test_blocked_norm():
    a = np.random.rand(1000000,100)
    assert np.allclose(np.linalg.norm(a, axis=1), blocked_norm(a))
test_blocked_norm()

In [None]:
pair_df['neural_euclidian_dist'] = blocked_norm(X[pair_df['greater_index'].values, :] - X[pair_df['lesser_index'].values, :])

In [None]:
pair_df['red_neural_euclidian_dist'] = blocked_norm(X_red[pair_df['greater_index'].values, :] - X_red[pair_df['lesser_index'].values, :])

In [None]:
for col in ['lesser_index', 'greater_index', 'lesser_dim', 'greater_dim']:
    del pair_df[col]

In [None]:
for col in ['greater_morph_pos', 'lesser_morph_pos', 'morph_dist']:
    pair_df[col] = pair_df[col].astype('uint8')

In [None]:
pair_df['block_path'] = block_path

In [None]:
pair_df.memory_usage(deep=True)/1024/1024

In [None]:
for col in ['morph_dim', 'block_path']:
    pair_df[col] = pair_df[col].astype('category')
pair_df['block_path'].cat.set_categories(morphs.data.accuracies.good_recs(cluster_accuracies));

In [None]:
pair_df.dtypes

In [None]:
pair_df.memory_usage(deep=True)/1024/1024