Step 1: Loading and concatenating time series
----------------------

The length is $N$ was equal to 100620 as a result of concatenating timeseries of length 53040 for dual n-back task and 47580 for rest.

In [3]:
import numpy as np

# Loading concatenated timeseries
data = np.load("./data/neuroimaging/concat_timeseries_shaefer400_pipeline-24HMP_8Phys_SpikeReg_4GS.npy", allow_pickle=True).item()

# Concatenating dualnback and rest
concat_ts_dualnback = data['tasks']['dualnback']['timeseries']
concat_ts_rest = data['tasks']['rest']['timeseries']

len_dualnback = concat_ts_dualnback.shape[0]
len_rest = concat_ts_rest.shape[0]

n_rois = data['tasks']['dualnback']['timeseries'].shape[1]

X = np.zeros((len_dualnback + len_rest, n_rois))
X[:len_dualnback,:] = concat_ts_dualnback 
X[len_dualnback:, :] = concat_ts_rest

print(f"Shape of concatenated timeseries: {X.shape}")

Shape of concatenated timeseries: (100620, 400)


Step 2: Clustering the timeseries into brain states
----------------------

To discover main brain states existing in time-series we performed 100 repetitions of $k$-means clustering from $k$ = 2 to $k$ = 18 using Euclidean distance as a measure of similarity. 

In [None]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from scipy.spatial.distance import cdist,pdist
from sklearn import metrics
import numpy as np
import matplotlib.pyplot as plt
import os

# Setup
k_min = 2
k_max = 18
n_jobs = 8
n_init = 100
K = range(k_min, k_max+1)

# K-means clustering
kmeans_model = [KMeans(n_clusters=k, n_init=n_init, random_state=1234, n_jobs=n_jobs).fit(X) for k in K]

# Getting additional K-means quality measures
total_wss = np.array([cent.inertia_ for cent in kmeans_model]) # Total within-cluster sum of squares
total_ss = sum(pdist(X)**2)/X.shape[0]     # Total sum of squares
between_ss = total_ss - total_wss          # Between-cluster sum of squares

# Silhouette score
silhouette = np.array([silhouette_score(X, lab.labels_, metric='euclidean', sample_size=10) for lab in kmeans_model])

# Saving dictionary with all metadata
kmeans_output = data.copy()
kmeans_output['kmeans'] = {'kmeans_models': kmeans_model, 
                           'total_wss': total_wss, 
                           'total_ss': total_ss,
                           'between_ss': between_ss,
                           'variance_explained': between_ss/total_ss*100,
                           'silhouette': silhouette,
                           'k_range': K,
                           'n_init': n_init}

kmeans_output['tasks']['dualnback']['len'] = len_dualnback                                                
kmeans_output['tasks']['rest']['len'] = len_rest
kmeans_output['tasks']['dual_rest'] = {'timeseries': X,
                                       'denoising': np.unique([kmeans_output['tasks']['dualnback']['denoising'], 
                                                               kmeans_output['tasks']['rest']['denoising']]),
                                       'len': len_dualnback + len_rest
                                      }                                  
kmeans_output['tasks']['dualnback']['timeseries'] = None
kmeans_output['tasks']['rest']['timeseries'] = None

filename = 'kmeans_' + data['filename']
kmeans_output['filename'] = filename

np.save(f"./data/neuroimaging/{filename}.npy", kmeans_output)