In [213]:
import random
import numpy as np
import json
from tqdm import tqdm
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from scipy.spatial.distance import pdist, squareform
from scipy.optimize import linear_sum_assignment
from collections import Counter
import time
import os


In [214]:
filename = 'scan_stats.json'

In [148]:
def generate_synthetic_data(params):
    '''
    Synthetic Data Generation
    '''
    # Dense K: matrix of binary images of sizeNxDxM
    # Sparse K: set of (delay d, neuron a, and pg b)

    M,N,D,T,nrn_fr,pg_fr,background_noise_fr = params['M'], params['N'], params['D'], params['T'], params['nrn_fr'],params['pg_fr'],params['background_noise_fr'],    
    '''
    Synthetic Data Generation
    '''
    # Dense K: matrix of binary images of sizeNxDxM
    # Sparse K: set of (delay d, neuron a, and pg b)

    K_dense = np.random.rand(N,D,M)*1000
    nrn_frs = np.zeros((M))
    for m in range(M):
        nrn_frs[m] = np.random.poisson(nrn_fr)
        K_dense[:,:,m] = (K_dense[:,:,m] < nrn_frs[m]).astype('int')
    K_sparse = np.where(K_dense)
    K_sparse = (K_sparse[0],K_sparse[1],K_sparse[2]+1)


    # dense B: the binary image of the occurrences of the spiking motif as a ( M x T) matrix
    # spare B: set of all times t and pg's b
    B_dense = np.random.rand(M,T)*1000
    pg_frs = np.zeros((M))
    for m in range(M):
        pg_frs[m] = np.random.poisson(pg_fr)
        B_dense[m,:] = (B_dense[m,:] < pg_frs[m]).astype('int')
    B_sparse = np.where(B_dense)
    B_sparse = (B_sparse[0]+1,B_sparse[1])# This way the first motif starts at index 1 instead of index 0

    # now to make the full raster plot keeping the labels in-tact
    # dense A: the layered binary images of all neuron spikes by PG ( N x T x M
    A_dense = np.zeros((N,T+D,M+1))
    A_dense[...,0] = np.random.rand(N,T+D)*1000
    A_dense[...,0] = (A_dense[...,0] < background_noise_fr).astype('int')
    for i in range(len(B_sparse[0])):
        t = B_sparse[1][i]
        b = B_sparse[0][i]
        A_dense[:, t:t+D, b] += K_dense[...,b-1]

    A_sparse = np.where(A_dense)
    A_dense = np.sum(A_dense,axis=2)
    A_dense[A_dense>1] = 1
    
    stats = _get_stats(A_sparse, B_sparse, K_sparse)
    
    return A_dense, A_sparse, B_dense, B_sparse, K_dense, K_sparse, stats

def _get_stats(A_sparse, B_sparse, K_sparse):
    A_fr = [A_sparse[0].tolist().count(n) for n in np.unique(A_sparse[0])]
    B_fr = [B_sparse[0].tolist().count(n) for n in np.unique(B_sparse[0])]
    K_fr = [[K_sparse[0][K_sparse[2]==m].tolist().count(n) for n in np.unique(A_sparse[0])] for m in np.unique(K_sparse[2])]
    return A_fr, B_fr, K_fr

In [216]:
def scan_raster(T_labels, N_labels, window_dim = None):
    '''
    T_labels an array of spiketimes
    N_labels corresponding array of neuron labels
    window_dim is the size of the window to cluster the spikes
    '''
    if window_dim == None:
        window_dim = 100
        
    T_labels = np.round(T_labels).astype(int)
    T_labels, N_labels = np.unique(np.array([T_labels,N_labels]),axis=1) # This removes any spikes that occur at the same neuron at the same time
    N=max(N_labels)+1

    print(f'{len(T_labels)} Windows')
    windows = np.zeros((len(T_labels)),dtype='object')
    for i,window_time in enumerate(T_labels):
        condition = (T_labels > window_time-window_dim) & (T_labels < window_time + window_dim)
        window = np.array([T_labels[condition]-window_time, N_labels[condition]]).T
        window =  {tuple(row) for row in  window}
        windows[i] = window
        print(f'Windowing... {round(100*i/len(T_labels))}%',end='\r')
        
    # Set the cutoff value for clustering
    cutoff = 0
    lr = 0.01

    max_iter=50
    lr = 0.01
    iter_ = 0

    opt_cutoff = cutoff
    max_seq_rep = 0
    sim_mats = _get_sim_mats(windows, T_labels, N_labels)
    
    
    print("Clustering...",end="\r")

    while iter_ <= max_iter: # this is just a for loop...
        clusters = _cluster_windows(cutoff, N_labels, sim_mats)
        cluster_sq, _sq_counts, sublist_keys_filt = _check_seq(clusters, T_labels, N_labels)

        if len(sublist_keys_filt) != 0:
            max_ = np.max([len(k) for k in sublist_keys_filt])
            if max_seq_rep < max_:
                max_seq_rep = max_
                opt_cutoff=cutoff

        cutoff += lr
        iter_ +=1


        print(f'iter - {iter_/max_iter} | cutoff - {cutoff} | opt_cutoff - {opt_cutoff} | most_detections - {max_seq_rep}',end='\r')

    clusters = _cluster_windows(opt_cutoff, N_labels, sim_mats)
    cluster_sq, sq_counts, sublist_keys_filt = _check_seq(clusters, T_labels, N_labels)
    

    ''' to get the timings'''

    # Sort y according to x
    sorted_indices = np.argsort(T_labels)
    sorted_x = T_labels[sorted_indices]
    
    print("Re-Clustering...",end="\r")

    all_times = []
    all_labels = []
    for key in sublist_keys_filt:
        pattern_repetition_labels = np.zeros((len(cluster_sq[str(key)]),len(clusters)))
        for i,k in enumerate(cluster_sq[str(key)]):
            pattern_repetition_labels[i][clusters==k] = 1
            pattern_repetition_labels[i] *= np.cumsum(pattern_repetition_labels[i])
        pattern_repetition_labels = np.sum(pattern_repetition_labels,axis=0,dtype='int')
        all_labels.append(pattern_repetition_labels)

        sorted_y = pattern_repetition_labels[sorted_indices]
        pattern_times = np.array([sorted_x[sorted_y==i][0] for i in range(1,max(pattern_repetition_labels)+1)])
        all_times.append(pattern_times)
        
    print("Extracting templates...",end="\r")

    pattern_template = []
    patterns = []
    for i in range(len(all_times)):
        pattern = []
        pattern_template.append([])
        for time in all_times[i]:
            condition = (T_labels > time-window_dim*2) & (T_labels < time + window_dim*2)
            pattern = [tuple(k) for k in np.array([T_labels[condition]-time, N_labels[condition]]).T] # creating a list of tuples
            pattern_template[-1] += pattern # adds all points of each pattern to template_pattern
            patterns.append(pattern)
        print(f"Extracting templates... {round(100*i/len(all_times))}%",end="\r")
    
    print(f"{len(pattern_template)} patterns found...")

    for i,pattern in enumerate(pattern_template):
        counts = [pattern.count(k) for k in pattern]
        pattern_template[i] = np.array(pattern)[np.where(counts == np.max(counts))[0]]
        pattern_template[i][:,0] -= min(pattern_template[i][:,0])
        pattern_template[i] = np.unique(pattern_template[i],axis=0)
    
    if len(pattern_template) == 0:
        return pattern_template, sublist_keys_filt, None
    
    win_size = (N,1+max([max(k[:,0]) for k in pattern_template]))
    pattern_img = np.zeros((len(pattern_template),*win_size))
    for p,pattern in enumerate(pattern_template):
        for (i,j) in pattern:
            pattern_img[p,j,i] = 1

    return pattern_template, sublist_keys_filt, pattern_img

def _get_sim_mats(windows, T_labels, N_labels):
    sim_mats = np.zeros(np.max(N_labels)+1,dtype='object')
    for n in np.unique(N_labels):
        idc = np.where(N_labels==n)[0]
        windows_n = windows[idc]
        if len(windows_n) > 1:
            x = np.zeros((len(windows_n),len(windows_n)))
            for i in range(windows_n.shape[0]):
                for j in range(windows_n.shape[0]):
                    common_rows = windows_n[i].intersection(windows_n[j])
                    num_identical_rows = len(common_rows)
                    x[i,j] = len(common_rows)/min(len(windows_n[i]),len(windows_n[j]))
            np.fill_diagonal(x,0)# make sure the diagonals are zero, this is important the more spikes there are...
            sim_mats[n] = x-1 
        print(f"Generating sim matrices {round(n*100/np.max(N_labels))}%", end = "\r")
    return sim_mats

def _cluster_windows(cutoff, N_labels, sim_mats):
    clusters = np.zeros_like(N_labels)
    for n in np.unique(N_labels):
        idc = np.where(N_labels==n)[0]
        if (type(sim_mats[n]) == np.ndarray) and (not np.all(sim_mats[n] == 0)):
            l = max(clusters)+1
            clusters[idc]= l+fcluster(linkage(sim_mats[n], method='complete'), cutoff, criterion='distance')
    return clusters

def _check_seq(clusters, T_labels, N_labels):

    time_differences = []
    cluster_sq = {}
    for cluster in np.unique(clusters):
        temp = list(np.diff(np.unique(T_labels[clusters == cluster])))
        str_temp = str(temp)
        time_differences.append(temp)
        if str_temp in cluster_sq.keys():
            cluster_sq[str_temp] = cluster_sq[str_temp] + [cluster]
        else:
            cluster_sq[str_temp] = [cluster]

    # Convert the list of lists to a set of tuples to remove duplicates
    unique_sublists_set = set(tuple(sublist) for sublist in time_differences if sublist)

    # Convert the set of tuples back to a list of lists
    unique_sublists = [list(sublist) for sublist in unique_sublists_set]

    # Count the occurrences of each unique sublist in the original list
    sublist_counts = Counter(tuple(sublist) for sublist in time_differences if sublist)

    # Print the unique sublists and their respective counts
    sq_counts = np.zeros(len(sublist_counts)) 
    for i,sublist in enumerate(unique_sublists):
        count = sublist_counts[tuple(sublist)]
        sq_counts[i] = count
    #     print(f"{sublist}: {count} occurrences")
    sublist_keys_np = np.array([list(key) for key in sublist_counts.keys()],dtype='object')
    sublist_keys_filt = sublist_keys_np[np.array(list(sublist_counts.values())) >1] # only bother clustering repetitions that appear for more than one neuron
    
    return cluster_sq, sq_counts, sublist_keys_filt


In [157]:
from scipy.signal import correlate
def get_acc(ground_truths,detected_patterns):
    # Calculate cross-correlation matrix
    cross_corr_matrix = np.zeros((ground_truths.shape[2], detected_patterns.shape[2]))
    SM_acc = np.zeros((ground_truths.shape[2]))
    
    if len(detected_patterns == 0):
        return SM_acc, cross_corr_matrix
    
    for ground_truths_idx in range(ground_truths.shape[2]):
        for detected_patterns_idx in range(detected_patterns.shape[2]):
            cross_corr = np.zeros((ground_truths.shape[1]+detected_patterns.shape[1]-1))
            for n in range(ground_truths.shape[0]):
                cross_corr += correlate(ground_truths[n, :, ground_truths_idx], detected_patterns[n, :, detected_patterns_idx], mode='full')
            max_corr = np.max(cross_corr) / max(np.sum(ground_truths[...,ground_truths_idx]),np.sum(detected_patterns[...,detected_patterns_idx]))
            cross_corr_matrix[ground_truths_idx, detected_patterns_idx] = max_corr
#     print(cross_corr_matrix)
#     print( np.sum(ground_truths[...,ground_truths_idx]))
    SM_acc = np.max(cross_corr_matrix,axis=1)
    return SM_acc, cross_corr_matrix

In [250]:
 # Define the number of random samples you want to take
num_samples = 5  # Adjust this based on your computational resources

trials = 1

# List to hold the results
results = []

param_combinations = np.array(np.meshgrid(*scan_dict.values())).T.reshape(-1, len(scan_dict))
num_iterations = len(param_combinations)

# Generate random indices for sampling
random_indices = random.sample(range(num_iterations), num_samples)


# Iterate through parameter combinations
for idx in tqdm(random_indices):
    for trial in range(0,trials):
        seed=trial
        
        if os.path.isfile(filename):
            with open(filename, 'r') as results_file:
                results = json.load(results_file)
        
        if idc in df['idc'].tolist():
            while seed in df[df['idc'] == idc]['trial'].tolist():
                seed+=1
        
        np.random.seed(seed)
        params = {key: int(val) for key, val in zip(scan_dict.keys(), param_combinations[idx])}

        # Run your program here to generate performance results
        print("Params:", params)
        print("Generating raster plot...")
        start = time.time()
        _, A_sparse, _, B_sparse, K_dense, K_sparse, stats = generate_synthetic_data(params)
        pattern_template, sublist_keys_filt, pattern_img = scan_raster(A_sparse[1],A_sparse[0],window_dim=params['D'])
        if type(pattern_img) != np.ndarray:
            performance_result = (0,0)
        else:
            pattern_img = np.transpose(pattern_img,axes=[1,2,0])
            SM_acc, _ = get_acc(K_dense, pattern_img)
            performance_result = (np.sum(SM_acc>0.8)/len(SM_acc), np.mean(SM_acc))
        end = time.time()

        # Create a dictionary to store the result
        result = {
            'idc': idx,
            'seed':seed,
            'raster_fr':stats[0],
            'pg_fr':stats[1],
            'spikes_in_pg':stats[2],
            **params,  # Unpack the parameters as separate columns
            'performance':performance_result,
            'time':round(end-start)
        }

        print(performance_result)

        # Append the result to the list
        results.append(result)
    # Write the entire list of results to a JSON file
    with open(filename, 'w') as results_file:
        json.dump(results, results_file, indent=4)

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]

Params: {'M': 64, 'N': 30, 'D': 150, 'T': 1000, 'nrn_fr': 5, 'pg_fr': 5, 'background_noise_fr': 5}
Generating raster plot...
6012 Windows
80 patterns found...... 99%00000000000002 | opt_cutoff - 0.3900000000000002 | most_detections - 332


 20%|████████████████▌                                                                  | 1/5 [10:36<42:26, 636.65s/it]

(0.0, 0.0)
Params: {'M': 64, 'N': 30, 'D': 30, 'T': 1000, 'nrn_fr': 15, 'pg_fr': 5, 'background_noise_fr': 5}
Generating raster plot...
3910 Windows
iter - 1.02 | cutoff - 0.5100000000000002 | opt_cutoff - 0.36000000000000015 | most_detections - 11

 40%|█████████████████████████████████▏                                                 | 2/5 [11:10<14:06, 282.32s/it]

1 patterns found...s... 0%
(0.0, 0.0)
Params: {'M': 4, 'N': 5, 'D': 150, 'T': 1000, 'nrn_fr': 15, 'pg_fr': 3, 'background_noise_fr': 2}
Generating raster plot...
259 Windows
iter - 0.98 | cutoff - 0.49000000000000027 | opt_cutoff - 0.38000000000000017 | most_detections - 4

 60%|█████████████████████████████████████████████████▊                                 | 3/5 [11:11<05:07, 153.84s/it]

1 patterns found...s... 0%100000000000002 | opt_cutoff - 0.38000000000000017 | most_detections - 4
(0.0, 0.0)
Params: {'M': 16, 'N': 5, 'D': 30, 'T': 1000, 'nrn_fr': 10, 'pg_fr': 4, 'background_noise_fr': 0}
Generating raster plot...
126 Windows
iter - 0.82 | cutoff - 0.4100000000000002 | opt_cutoff - 0 | most_detections - 000

 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [11:12<01:33, 93.29s/it]

0 patterns found...s...0.5100000000000002 | opt_cutoff - 0 | most_detections - 00
(0, 0)
Params: {'M': 64, 'N': 60, 'D': 70, 'T': 1000, 'nrn_fr': 15, 'pg_fr': 3, 'background_noise_fr': 0}
Generating raster plot...
11245 Windows
62 patterns found...... 98%00000000000002 | opt_cutoff - 0.4300000000000002 | most_detections - 442


100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [24:32<00:00, 294.43s/it]

(0.0, 0.0)





In [251]:
if os.path.isfile(filename):
    with open(filename, 'r') as results_file:
        test = json.load(results_file)

In [252]:
[k['idc'] for k in test]

[4157, 4277, 2624, 348, 218]

In [253]:
import pandas as pd
df = pd.DataFrame(test)

In [254]:
df

Unnamed: 0,idc,seed,raster_fr,pg_fr,spikes_in_pg,M,N,D,T,nrn_fr,background_noise_fr,performance,time
0,4157,0,"[246, 223, 328, 157, 215, 207, 194, 228, 197, ...",5,"[[0, 2, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0,...",64,30,150,1000,5,5,"[0.0, 0.0]",637
1,4277,0,"[111, 173, 170, 152, 187, 111, 111, 144, 153, ...",5,"[[0, 1, 0, 0, 1, 1, 2, 2, 0, 1, 0, 2, 0, 1, 1,...",64,30,30,1000,15,5,"[0.0, 0.0]",34
2,2624,0,"[65, 52, 38, 71, 37]",3,"[[2, 1, 1, 1, 1], [4, 4, 1, 2, 1], [5, 5, 3, 5...",4,5,150,1000,15,2,"[0.0, 0.0]",1
3,348,0,"[6, 42, 27, 11, 42]",4,"[[0, 0, 0, 1, 1], [0, 0, 0, 1, 0], [0, 0, 1, 0...",16,5,30,1000,10,0,"[0, 0]",0
4,218,0,"[236, 206, 175, 210, 288, 264, 157, 159, 157, ...",3,"[[1, 0, 3, 1, 2, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,...",64,60,70,1000,15,0,"[0.0, 0.0]",800


In [204]:
df['idc']

0    4622
Name: idc, dtype: int64

In [211]:
df[df['idc'] == idc]['trial'].tolist()

[0]

In [199]:
df['idc'].tolist()

[4622]

In [None]:
if idc in df['idc'].tolist():
    while seed in df[df['idc'] == idc]['trial'].tolist():
        seed+=1
        

In [212]:
with open('scan_statds.json', 'r') as results_file:
    test = json.load(results_file)

FileNotFoundError: [Errno 2] No such file or directory: 'scan_statds.json'

In [144]:
def main():
        # Define the number of random samples you want to take
    num_samples = 1  # Adjust this based on your computational resources
    
    trials = 1
    
    # List to hold the results
    results = []
    
    param_combinations = np.array(np.meshgrid(*scan_dict.values())).T.reshape(-1, len(scan_dict))
    num_iterations = len(param_combinations)
    
    # Generate random indices for sampling
    random_indices = random.sample(range(num_iterations), num_samples)
    
    # Iterate through parameter combinations
    for idx in tqdm(random_indices):
        for trial in range(0,trials):
            seed=trial
            np.random.seed(seed)
            params = {key: int(val) for key, val in zip(scan_dict.keys(), param_combinations[idx])}

            # Run your program here to generate performance results
            print("Params:", params)
            print("Generating raster plot...")
            _, A_sparse, _, B_sparse, K_dense, K_sparse, stats = generate_synthetic_data(params)
            print("Clustering...")
            pattern_template, sublist_keys_filt, pattern_img = scan_raster(A_sparse[1],A_sparse[0],window_dim=params['D'])
            if type(pattern_img) != np.ndarray:
                performance_result = (0,0)
            else:
                pattern_img = np.transpose(pattern_img,axes=[1,2,0])
                SM_acc, _ = get_acc(K_dense, pattern_img)
                performance_result = (np.sum(SM_acc>0.8)/len(SM_acc), np.mean(SM_acc))

            # Create a dictionary to store the result
            result = {
                'idc': idx,
                'trial':trial,
                'data':[A_sparse,K_sparse,B_sparse],
                **params,  # Unpack the parameters as separate columns
                'performance':performance_result
            }
            
            print(performance_result)

            # Append the result to the list
            results.append(result)
        # Write the entire list of results to a JSON file
    with open('scan_stats.json', 'w') as results_file:
        json.dump(results, results_file, indent=4)
        
    return results


In [145]:
'''
Model default parameters
'''

M = 4 # Number of Spiking motifs
N = 20 # Number of input neurons
D = 71 # temporal depth of receptive field
T = 1000
nrn_fr = 15 # hz
pg_fr = 6 # hz
background_noise_fr = 10 # hz
seed=41
np.random.seed(seed)

In [146]:
default_params = {
    'M':M,
    'N':N,
    'D':D,
    'T':T,
    'nrn_fr':nrn_fr,
    'pg_fr':pg_fr,
    'background_noise_fr':background_noise_fr,
    'seed':seed
}
scan_dict = {
    'M':[1,4,16,32,64],
    'N':[5,30,60,100],
    'D':[10,30,70,150],
    'T':[1000],
    'nrn_fr':[5,10,15],
    'pg_fr':[3,4,5,8,10],
    'background_noise_fr':[0,1,2,5,10]
}

In [147]:
results = main()

  0%|                                                                                            | 0/1 [00:00<?, ?it/s]

Params: {'M': 1, 'N': 60, 'D': 70, 'T': 1000, 'nrn_fr': 15, 'pg_fr': 5, 'background_noise_fr': 2}
Generating raster plot...





ValueError: too many values to unpack (expected 6)

In [19]:
params = {'M': 32, 'N': 5, 'D': 150, 'T': 1000, 'nrn_fr': 5, 'pg_fr': 5, 'background_noise_fr': 0}

In [21]:
_, A_sparse, _, B_sparse, K_dense, K_sparse = generate_synthetic_data(params)
N_labels, T_labels = A_sparse[0], A_sparse[1]

In [26]:
window_dim = None

In [27]:

print(f'Cleaning spikes...',end='\r')
if window_dim == None:
    window_dim = 100

T_labels = np.round(T_labels).astype(int)
T_labels, N_labels = np.unique(np.array([T_labels,N_labels]),axis=1) # This removes any spikes that occur at the same neuron at the same time
N=max(N_labels)+1

print(f'Windowing... {len(T_labels)}')
windows = np.zeros((len(T_labels)),dtype='object')
for i,window_time in enumerate(T_labels):
    condition = (T_labels > window_time-window_dim) & (T_labels < window_time + window_dim)
    window = np.array([T_labels[condition]-window_time, N_labels[condition]]).T
    window =  {tuple(row) for row in  window}
    windows[i] = window


# Set the cutoff value for clustering
cutoff = 0
lr = 0.01

max_iter=50
lr = 0.01
iter_ = 0

opt_cutoff = cutoff
max_seq_rep = 0
sim_mats = _get_sim_mats(windows, T_labels, N_labels)

while iter_ <= max_iter: # this is just a for loop...
    clusters = _cluster_windows(cutoff, N_labels, sim_mats)
    cluster_sq, _sq_counts, sublist_keys_filt = _check_seq(clusters, T_labels, N_labels)

    if len(sublist_keys_filt) != 0:
        max_ = np.max([len(k) for k in sublist_keys_filt])
        if max_seq_rep < max_:
            max_seq_rep = max_
            opt_cutoff=cutoff

    cutoff += lr
    iter_ +=1


    print(f'iter - {iter_/max_iter} | cutoff - {cutoff} | opt_cutoff - {opt_cutoff} | most_detections - {max_seq_rep}',end='\r')

clusters = _cluster_windows(opt_cutoff, N_labels, sim_mats)
cluster_sq, sq_counts, sublist_keys_filt = _check_seq(clusters, T_labels, N_labels)


''' to get the timings'''

# Sort y according to x
sorted_indices = np.argsort(T_labels)
sorted_x = T_labels[sorted_indices]

all_times = []
all_labels = []
for key in sublist_keys_filt:
    pattern_repetition_labels = np.zeros((len(cluster_sq[str(key)]),len(clusters)))
    for i,k in enumerate(cluster_sq[str(key)]):
        pattern_repetition_labels[i][clusters==k] = 1
        pattern_repetition_labels[i] *= np.cumsum(pattern_repetition_labels[i])
    pattern_repetition_labels = np.sum(pattern_repetition_labels,axis=0,dtype='int')
    all_labels.append(pattern_repetition_labels)

    sorted_y = pattern_repetition_labels[sorted_indices]
    pattern_times = np.array([sorted_x[sorted_y==i][0] for i in range(1,max(pattern_repetition_labels)+1)])
    all_times.append(pattern_times)

pattern_template = []
patterns = []
for i in range(len(all_times)):
    pattern = []
    pattern_template.append([])
    for time in all_times[i]:
        condition = (T_labels > time-window_dim*2) & (T_labels < time + window_dim*2)
        pattern = [tuple(k) for k in np.array([T_labels[condition]-time, N_labels[condition]]).T] # creating a list of tuples
        pattern_template[-1] += pattern # adds all points of each pattern to template_pattern
        patterns.append(pattern)

for i,pattern in enumerate(pattern_template):
    counts = [pattern.count(k) for k in pattern]
    pattern_template[i] = np.array(pattern)[np.where(counts == np.max(counts))[0]]
    pattern_template[i][:,0] -= min(pattern_template[i][:,0])
    pattern_template[i] = np.unique(pattern_template[i],axis=0)

if len(pattern_template) == 0:
    return pattern_template, sublist_keys_filt, None

win_size = (N,1+max([max(k[:,0]) for k in pattern_template]))
pattern_img = np.zeros((len(pattern_template),*win_size))
for p,pattern in enumerate(pattern_template):
    for (i,j) in pattern:
        pattern_img[p,j,i] = 1



Cleaning spikes...Windowing... 717


IndexError: index 4 is out of bounds for axis 0 with size 4

In [38]:
sim_mats = np.zeros(np.max(N_labels)+1,dtype='object')
for n in np.unique(N_labels):
    idc = np.where(N_labels==n)[0]
    windows_n = windows[idc]
    if len(windows_n) > 1:
        x = np.zeros((len(windows_n),len(windows_n)))
        for i in range(windows_n.shape[0]):
            for j in range(windows_n.shape[0]):
                common_rows = windows_n[i].intersection(windows_n[j])
                num_identical_rows = len(common_rows)
                x[i,j] = len(common_rows)/min(len(windows_n[i]),len(windows_n[j]))
        np.fill_diagonal(x,0)# make sure the diagonals are zero, this is important the more spikes there are...
        sim_mats[n] = x-1 

In [36]:
N_labels

array([0, 1, 3, 1, 1, 2, 1, 2, 1, 1, 3, 1, 4, 0, 2, 1, 3, 4, 3, 0, 4, 0,
       2, 2, 3, 0, 2, 2, 3, 4, 1, 3, 0, 4, 4, 2, 0, 4, 4, 3, 4, 1, 3, 1,
       4, 3, 4, 1, 3, 0, 1, 0, 2, 2, 4, 2, 4, 2, 3, 1, 3, 4, 1, 2, 2, 3,
       2, 1, 2, 4, 4, 0, 1, 3, 1, 2, 0, 4, 0, 1, 1, 0, 1, 2, 3, 1, 4, 3,
       0, 2, 4, 0, 1, 0, 4, 0, 1, 3, 2, 4, 0, 3, 1, 2, 3, 1, 0, 3, 3, 0,
       3, 4, 0, 0, 0, 2, 3, 0, 4, 0, 1, 3, 3, 3, 2, 3, 4, 0, 3, 1, 3, 3,
       0, 2, 0, 0, 2, 1, 2, 0, 2, 1, 2, 4, 1, 3, 4, 0, 3, 4, 1, 1, 1, 2,
       0, 2, 2, 0, 0, 1, 3, 0, 2, 2, 3, 3, 0, 1, 3, 3, 0, 1, 4, 3, 3, 4,
       0, 1, 3, 3, 0, 3, 0, 1, 2, 1, 2, 3, 4, 1, 0, 0, 1, 2, 4, 0, 3, 1,
       0, 3, 2, 3, 4, 3, 1, 4, 1, 2, 1, 0, 4, 3, 0, 3, 1, 2, 0, 3, 0, 2,
       2, 2, 2, 1, 3, 4, 1, 0, 4, 2, 0, 1, 4, 4, 0, 2, 1, 1, 2, 3, 1, 3,
       0, 3, 0, 0, 1, 1, 2, 2, 3, 4, 1, 3, 2, 1, 4, 0, 1, 0, 4, 2, 3, 2,
       1, 4, 3, 0, 1, 0, 0, 3, 0, 2, 1, 1, 1, 2, 0, 4, 1, 0, 2, 0, 1, 3,
       0, 1, 0, 2, 3, 1, 0, 2, 4, 0, 3, 1, 3, 0, 4,

In [24]:

def _get_sim_mats(windows, T_labels, N_labels):
    sim_mats = np.zeros(np.max(N_labels),dtype='object')
    for n in np.unique(N_labels):
        idc = np.where(N_labels==n)[0]
        windows_n = windows[idc]
        if len(windows_n) > 1:
            x = np.zeros((len(windows_n),len(windows_n)))
            for i in range(windows_n.shape[0]):
                for j in range(windows_n.shape[0]):
                    common_rows = windows_n[i].intersection(windows_n[j])
                    num_identical_rows = len(common_rows)
                    x[i,j] = len(common_rows)/min(len(windows_n[i]),len(windows_n[j]))
            np.fill_diagonal(x,0)# make sure the diagonals are zero, this is important the more spikes there are...
            sim_mats[n] = x-1 
    return sim_mats

def _cluster_windows(cutoff, N_labels, sim_mats):
    clusters = np.zeros_like(N_labels)
    for n in np.unique(N_labels):
        idc = np.where(N_labels==n)[0]
        if (type(sim_mats[n]) == np.ndarray) and (not np.all(sim_mats[n] == 0)):
            l = max(clusters)+1
            clusters[idc]= l+fcluster(linkage(sim_mats[n], method='complete'), cutoff, criterion='distance')
    return clusters

def _check_seq(clusters, T_labels, N_labels):

    time_differences = []
    cluster_sq = {}
    for cluster in np.unique(clusters):
        temp = list(np.diff(np.unique(T_labels[clusters == cluster])))
        str_temp = str(temp)
        time_differences.append(temp)
        if str_temp in cluster_sq.keys():
            cluster_sq[str_temp] = cluster_sq[str_temp] + [cluster]
        else:
            cluster_sq[str_temp] = [cluster]

    # Convert the list of lists to a set of tuples to remove duplicates
    unique_sublists_set = set(tuple(sublist) for sublist in time_differences if sublist)

    # Convert the set of tuples back to a list of lists
    unique_sublists = [list(sublist) for sublist in unique_sublists_set]

    # Count the occurrences of each unique sublist in the original list
    sublist_counts = Counter(tuple(sublist) for sublist in time_differences if sublist)

    # Print the unique sublists and their respective counts
    sq_counts = np.zeros(len(sublist_counts)) 
    for i,sublist in enumerate(unique_sublists):
        count = sublist_counts[tuple(sublist)]
        sq_counts[i] = count
    #     print(f"{sublist}: {count} occurrences")
    sublist_keys_np = np.array([list(key) for key in sublist_counts.keys()],dtype='object')
    sublist_keys_filt = sublist_keys_np[np.array(list(sublist_counts.values())) >1] # only bother clustering repetitions that appear for more than one neuron

    return cluster_sq, sq_counts, sublist_keys_filt