In [5]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [14]:
data_path = 'M:\\analysis\Axel_Bisi\mice_data\AB007\Recording\Ephys\AB007_g0\AB007_g0_imec0\ks25'
data_path

'M:\\analysis\\Axel_Bisi\\mice_data\\AB007\\Recording\\Ephys\\AB007_g0\\AB007_g0_imec0\\ks25'

#### spike_cluster.npy
Cluster for each spike

In [21]:
spk_clusters = np.load(os.path.join(data_path,'spike_clusters.npy'))
df = pd.DataFrame(spk_clusters, columns=['cluster'])
df.head()

Unnamed: 0,cluster
0,198
1,267
2,187
3,183
4,33


In [16]:
np.unique(df['cluster'].values).shape

(297,)

In [18]:
np.save(os.path.join(data_path,'clus_lbl.npy'), df['cluster'].values)

#### spike_times.npy
Spikes times for each spike

In [28]:
spk_times = np.load(os.path.join(data_path,'spike_times.npy'))
df = pd.DataFrame(spk_times, columns = ['ts']) #timestamps column
df.head()

Unnamed: 0,ts
0,215
1,221
2,229
3,269
4,272


In [29]:
np.save(os.path.join(data_path,'clus_time.npy'), df['ts'].values)

#### cluster_group.tsv
Cluster label (only curated and confirmed!) for each cluster id. If not validated by then there are less clusters than cluster id generated by KS.

In [31]:
clus_group = pd.read_csv(os.path.join(data_path,'cluster_group.tsv'), sep='\t')
clus_group

Unnamed: 0,cluster_id,group
0,0,mua
1,1,mua
2,2,mua
3,3,mua
4,4,mua
...,...,...
292,341,good
293,342,good
294,343,good
295,346,mua


In [32]:
np.unique(clus_group.group, return_counts=True)

(array(['good', 'mua', 'noise'], dtype=object),
 array([ 72, 216,   9], dtype=int64))

#### cluster_KSlabel.tsv
KS output label for all clusters.

In [33]:
clus_ks =  pd.read_csv(os.path.join(data_path,'cluster_KSLabel.tsv'), sep='\t')
clus_ks

Unnamed: 0,cluster_id,KSLabel
0,0,mua
1,1,mua
2,2,mua
3,3,mua
4,4,mua
...,...,...
284,341,mua
285,342,mua
286,343,mua
287,346,mua


In [34]:
np.unique(clus_ks.KSLabel, return_counts=True)

(array(['good', 'mua'], dtype=object), array([ 59, 230], dtype=int64))

#### cluster_info.tsv
Info displayed on Phy. Also includes number of spikes and peak channel number for C_waves.

In [47]:
clus_info = pd.read_csv(os.path.join(data_path,'cluster_info.tsv'), sep='\t')
clus_info

Unnamed: 0,cluster_id,Amplitude,ContamPct,KSLabel,amp,ch,depth,fr,group,n_spikes,sh
0,0,1181.6,77.7,mua,78.235954,0,20.0,7.031047,mua,38913,0
1,1,1334.6,0.0,mua,79.419922,0,20.0,0.002891,mua,16,0
2,2,1088.7,0.0,mua,81.428429,0,20.0,0.003614,mua,20,0
3,3,737.8,143.8,mua,58.966412,25,260.0,0.218269,mua,1208,0
4,4,963.8,49.0,mua,73.670265,2,40.0,4.245044,mua,23494,0
...,...,...,...,...,...,...,...,...,...,...,...
292,341,823.4,27.0,mua,67.751808,10,120.0,0.258020,good,1428,0
293,342,823.4,27.0,mua,67.751808,10,120.0,6.273610,good,34721,0
294,343,790.9,25.7,mua,65.629524,0,20.0,5.423300,good,30015,0
295,346,3235.8,27.8,mua,105.748535,69,700.0,0.063060,mua,349,0


In [51]:
np.unique(clus_info.cluster_id).shape

(297,)

In [49]:
clus_info[clus_info['cluster_id']==151]

Unnamed: 0,cluster_id,Amplitude,ContamPct,KSLabel,amp,ch,depth,fr,group,n_spikes,sh
140,151,5179.3,8.0,good,123.251617,106,1080.0,0.782552,good,4331,0


There are missing cluster ids, thus the total row number N is lower than max cluster ID number... hence mismatch.
The missing cluster ids are:

In [52]:
missing_cluster_ids = np.asarray([i for i in range(np.max(clus_info.cluster_id)) if i not in np.unique(clus_info.cluster_id)])
missing_cluster_ids

array([  5,  13,  21,  24,  40,  42,  43,  88,  90,  96, 117, 173, 190,
       192, 197, 208, 222, 237, 238, 239, 244, 245, 251, 283, 287, 293,
       294, 296, 303, 309, 310, 312, 313, 314, 315, 316, 319, 321, 322,
       323, 324, 325, 327, 328, 330, 332, 334, 337, 338, 344, 345])

Fill in with missing ids with zeroes... (nan gives weird negative numbers when loading table) so that it matches C_waves requirement to have a cluster ID <-> row ID mapping


In [55]:
#First set index to cluster_id
clus_info.set_index(keys='cluster_id', drop=False, inplace=True)

In [56]:
clus_info.set_index(keys='cluster_id', drop=False, inplace=True)
#Reindex with missing value
clus_info_all_ids = clus_info.reindex(range(np.max(clus_info.cluster_id)+1), fill_value=0, copy=True)
clus_info_all_ids

Unnamed: 0_level_0,cluster_id,Amplitude,ContamPct,KSLabel,amp,ch,depth,fr,group,n_spikes,sh
cluster_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,0,1181.6,77.7,mua,78.235954,0,20.0,7.031047,mua,38913,0
1,1,1334.6,0.0,mua,79.419922,0,20.0,0.002891,mua,16,0
2,2,1088.7,0.0,mua,81.428429,0,20.0,0.003614,mua,20,0
3,3,737.8,143.8,mua,58.966412,25,260.0,0.218269,mua,1208,0
4,4,963.8,49.0,mua,73.670265,2,40.0,4.245044,mua,23494,0
...,...,...,...,...,...,...,...,...,...,...,...
343,343,790.9,25.7,mua,65.629524,0,20.0,5.423300,good,30015,0
344,0,0.0,0.0,0,0.000000,0,0.0,0.000000,0,0,0
345,0,0.0,0.0,0,0.000000,0,0.0,0.000000,0,0,0
346,346,3235.8,27.8,mua,105.748535,69,700.0,0.063060,mua,349,0


In [44]:
clus_info_all_ids.group

cluster_id
0       mua
1       mua
2       mua
3       mua
4       mua
       ... 
343    good
344       0
345       0
346     mua
347    good
Name: group, Length: 348, dtype: object

In [45]:
clus_info_all_ids[clus_info_all_ids['cluster_id']==151]

Unnamed: 0_level_0,cluster_id,Amplitude,ContamPct,KSLabel,amp,ch,depth,fr,group,n_spikes,sh
cluster_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
151,151,5179.3,8.0,good,123.251617,106,1080.0,0.782552,good,4331,0


Make clus_table array for C_waves

In [46]:
clus_table = clus_info_all_ids[['n_spikes','ch']]
np.array(clus_table).shape

(348, 2)

In [140]:
np.save('M:\\analysis\Axel_Bisi\mice_data\AB007\Recording\Ephys\AB007_g0\ks25\clus_table.npy', np.array(clus_table.values, dtype=np.int32))

In [145]:
clus_tb = np.load('M:\\analysis\Axel_Bisi\mice_data\AB007\Recording\Ephys\AB007_g0\ks25\clus_table.npy')

clus_tb[151]

array([4331,  106])