In [2]:
from h5py import File
import numpy as np
import random
import pandas as pd
import pickle
from datetime import datetime

Analyze raw data and save some features of them including Variance spectre, PC matrix, Cascade, etc.

In [3]:
## Days in vitro (DIV)
def get_DIV(start_date, date_list):
    start_date_str = "20" + str(start_date)  # assume after the year 2000
    
    DIV_list = np.zeros(len(date_list))
    for i in range(len(date_list)):
        date_str = "20" + str(date_list[i])      #   # assume after the year 2000
    
        start_date_obj = datetime.strptime(start_date_str, "%Y%m%d")
        date_obj = datetime.strptime(date_str, "%Y%m%d")
    
        days_between = (date_obj - start_date_obj).days
        DIV_list[i] = int(days_between)
    return DIV_list

## Binned data
def get_images(T, N, bin, times, channs):
    images = np.zeros((N, int(T*1000/bin)+1)) # each column correspond to a image (binned snapshot)
    for i in range(len(times)):
        time = times[i]
        chann = channs[i]
        if time < T:
            images[chann, int(time*1000/bin)] += 1
            
    meanzero_images = images - np.mean(images, axis=1, keepdims=True)
    return images, meanzero_images


def PCA(images):
    corr = images@images.T/len(images[0,:])
    U, eig_val, V = np.linalg.svd(corr, hermitian=True)
    return U, eig_val

## Principal Component Analysis for original and shuffled data
def get_U_var(images):
    U, varspec = PCA(images)
    
    N_neuron = len(images[:,0])
    N_timepoint = len(images[0,:])
    
    rand_shift = np.random.randint(0,N_timepoint,size=N_neuron)
    images_shift = np.zeros((N_neuron,N_timepoint))
    images_ind = np.zeros((N_neuron,N_timepoint))
    for n in range(N_neuron):
        images_shift[n,:] = np.roll(images[n,:], rand_shift[n])
    for t in range(N_timepoint):
        images_ind[:,t] = np.random.shuffle(images[:,t])
        
    U_shift, varspec_shift = PCA(images_shift)
    U_ind, varspec_ind = PCA(images_ind)
    # U_shift, varspec_shift = PCA(images)
    # U_ind, varspec_ind = PCA(images)
    return varspec, varspec_shift, varspec_ind, U, U_shift, U_ind

## Burstiness index (BI)
def get_burstiness(T, N, times, channs):
    images, meanzero_images = get_images(T,N,1000,times,channs)
    nums_spike = np.sum(images, axis=0) # count spike firings in each image
    nums_spike_sorted = np.sort(nums_spike)
    ans = (np.sum(nums_spike_sorted[int(len(nums_spike_sorted)*0.85):])/np.sum(nums_spike)-0.15)/0.85
    return ans

def shuffle_times(times,bin):
    # ビンの境界
    bin_edges = np.arange(0, max(times) + bin/1000, bin/1000)

    # 各ビンにスパイクを分配
    binned_spike_times = []
    for i in range(len(bin_edges) - 1):
        bin_start, bin_end = bin_edges[i], bin_edges[i + 1]
        spikes_in_bin = times[(times >= bin_start) & (times < bin_end)]
        binned_spike_times.append(spikes_in_bin - bin_start)

    # ランダムシャッフル
    shuffled_bins = random.sample(binned_spike_times,len(binned_spike_times))
    # shuffled_bins = binned_spike_times

    # シャッフル後のスパイク時刻を統合
    shuffled_times = []
    for i, spikes_in_bin in enumerate(shuffled_bins):
        bin_start = bin_edges[i]
        shuffled_times.extend(spikes_in_bin + bin_start)

    shuffled_times = np.array(shuffled_times)

    return shuffled_times
# def shuffle_times(times,bin):
#     # ビンの境界
#     bin_edges = np.arange(0, max(times) + bin/1000, bin/1000)

#     # 各ビンにスパイクを分配
#     binned_spike_times = []
#     for i in range(len(bin_edges) - 1):
#         bin_start, bin_end = bin_edges[i], bin_edges[i + 1]
#         spikes_in_bin = times[(times >= bin_start) & (times < bin_end)]
#         binned_spike_times.append(spikes_in_bin - bin_start)

#     # ランダムシャッフル
#     np.random.seed(42)  # 再現性のためシードを設定
#     # shuffled_bins = np.random.permutation(binned_spike_times)
#     shuffled_bins = binned_spike_times

#     # シャッフル後のスパイク時刻を統合
#     shuffled_times = []
#     for i, spikes_in_bin in enumerate(shuffled_bins):
#         bin_start = bin_edges[i]
#         shuffled_times.extend(spikes_in_bin + bin_start)

#     shuffled_times = np.array(shuffled_times)

#     return shuffled_times

## Cascade size and duration
def get_avalanche(T, times):
    size_list = []
    duration_list = []
    time_thr = T/len(times)
    size = 1
    time_start = times[0]
    i = 0
    while i < len(times)-1:
        if times[i+1] - times[i] < time_thr:
            size += 1
        else:
            size_list.append(size)
            duration_list.append(times[i] - time_start)
            size = 1
            time_start = times[i+1]
        i += 1
    return size_list, duration_list

In [4]:
dish_list = ["19880","23312","23280","23311","23317","23302","23282"]
date_all = {"19880": [240228, 240229, 240301, 240302, 240303, 240304, 240305, 240306, 240307, 240308, 240309, 240310, 240311, 240313, 240314, 240315, 240316, 240317, 240318, 240319, 240320, 240321, 240322, 240324, 240325, 240326, 240327, 240328, 240329, 240330, 240331],\
            "23312": [240418, 240419, 240423, 240424, 240425, 240426, 240427, 240428, 240429, 240430, 240501, 240502, 240503, 240504, 240505, 240506, 240507, 240508, 240509, 240510, 240511, 240512, 240513],\
            "23280": [240516, 240517, 240518, 240519, 240520, 240521, 240522, 240523, 240524, 240525, 240526, 240527, 240528, 240529, 240530, 240531, 240601, 240602],\
            "23311": [240516, 240517, 240518, 240519, 240520, 240521, 240522, 240524, 240525, 240526, 240527, 240528, 240529, 240530, 240531, 240601, 240602, 240603],\
            "23317": [240605, 240606, 240607, 240608, 240609, 240611, 240612, 240613, 240614, 240615, 240616, 240617, 240618, 240619, 240620],\
            "23302": [240619, 240621, 240622, 240623, 240624, 240625, 240626, 240627, 240628, 240629, 240630, 240701, 240702, 240704, 240705, 240706],\
            "23282": [240621, 240622, 240623, 240624, 240625, 240626, 240627, 240628, 240629, 240630, 240701],\
            }
start_date_all = {"19880": 240227,\
            "23312": 240416,\
            "23280": 240514,\
            "23311": 240514,\
            "23317": 240604,\
            "23302": 240618,\
            "23282": 240618,\
            }

# date_all = {"17355": [240302, 240303, 240304, 240305, 240306, 240307, 240308, 240309, 240310, 240311, 240312],\
#              "19880": [240228, 240229, 240301, 240302, 240303, 240304, 240305, 240306, 240307, 240308, 240309, 240310, 240311, 240313, 240314, 240315, 240316, 240317, 240318, 240319, 240320, 240321, 240322, 240324, 240325, 240326, 240327, 240328, 240329, 240330, 240331],\
#              "19815": [240302, 240303, 240304, 240305, 240306, 240307, 240308, 240309, 240310, 240311, 240312],\
#              "19802": [240229, 240301, 240302, 240303, 240304, 240305, 240306, 240307, 240308, 240309, 240310, 240311, 240313, 240314, 240315, 240316, 240317, 240318, 240321, 240322, 240324, 240325, 240326, 240328, 240329, 240330, 240331],\
#              "19870": [240418, 240419, 240423, 240424, 240425, 240426, 240427, 240428, 240429, 240430, 240501],\
#              "23312": [240418, 240419, 240423, 240424, 240425, 240426, 240427, 240428, 240429, 240430, 240501, 240502, 240503, 240504, 240505, 240506, 240507, 240508, 240509, 240510, 240511, 240512, 240513],\
#              "23286": [240418, 240419, 240423, 240424, 240425, 240426, 240427, 240428, 240429, 240430, 240501, 240502, 240503, 240504, 240505, 240506, 240507, 240508, 240509, 240510, 240511, 240512, 240513],\
#              "17434": [240419, 240424, 240426, 240429],\
#              "23280": [240516, 240517, 240518, 240519, 240520, 240521, 240522, 240523, 240524, 240525, 240526, 240527, 240528, 240529, 240530, 240531, 240601, 240602],\
#              "23311": [240516, 240517, 240518, 240519, 240520, 240521, 240522, 240524, 240525, 240526, 240527, 240528, 240529, 240530, 240531, 240601, 240602, 240603],\
#              "23306": [240516, 240517, 240518, 240519, 240520, 240521, 240522, 240524, 240525, 240526, 240527, 240528, 240529, 240530, 240531, 240601, 240602, 240603],\
#              "23226": [240605, 240606, 240607, 240608, 240609, 240610, 240611, 240612, 240613, 240614, 240615, 240616, 240617, 240618, 240619, 240620],\
#              "23317": [240605, 240606, 240607, 240608, 240609, 240611, 240612, 240613, 240614, 240615, 240616, 240617, 240618, 240619, 240620],\
#              "19871": [240605, 240606, 240607, 240608, 240609, 240610, 240611, 240612, 240613],\
#              "19819": [240605, 240606, 240607, 240608, 240609, 240610, 240611, 240612, 240613],\
#              "23256": [240619, 240621, 240622, 240623, 240624, 240625, 240626, 240627, 240628, 240629, 240630, 240701, 240702, 240703, 240704, 240705, 240706, 240707, 240708, 240709, 240710, 240711],\
#              "23302": [240619, 240621, 240622, 240623, 240624, 240625, 240626, 240627, 240628, 240629, 240630, 240701, 240702, 240704, 240705, 240706],\
#              "23282": [240621, 240622, 240623, 240624, 240625, 240626, 240627, 240628, 240629, 240630, 240701],\
#              "23235": [240621, 240622, 240623, 240624, 240625, 240626, 240627, 240628, 240629, 240630, 240701, 240702, 240703, 240704, 240705, 240706],\
#              "17278": [240703, 240704, 240705, 240706, 240707, 240708, 240709, 240710, 240711],\
#              "19918": [240707, 240708, 240709, 240710, 240711],\
#              }
# start_date_all = {"17355": 240227,\
#              "19880": 240227,\
#              "19815": 240227,\
#              "19802": 240227,\
#              "19870": 240416,\
#              "23312": 240416,\
#              "23286": 240416,\
#              "17434": 240416,\
#              "23280": 240514,\
#              "23311": 240514,\
#              "23306": 240514,\
#              "23226": 240604,\
#              "23317": 240604,\
#              "19871": 240604,\
#              "19819": 240604,\
#              "23256": 240618,\
#              "23302": 240618,\
#              "23282": 240618,\
#              "23235": 240618,\
#              "17278": 240702,\
#              "19918": 240702,\
#              }


DIV_all = {}
for dish, date_list in date_all.items():
    start_date = start_date_all[dish]
    DIV_all[dish] = get_DIV(start_date, date_list)

In [4]:
xy_all = {}              # Position of electrode
firingrate_all = {}      # Avarage firing rate[Hz]
varspec_all = {}         # Variance 
varspec_shift_all = {}   # Variance spectre of shuffled dataspectre
varspec_ind_all = {}
U_all = {}               # PC matrix
U_shift_all = {}         # PC matrix of shuffled data
U_ind_all = {}
size_all = {}            # Cascade size
size_time_all = {}
duration_all = {}        # Cascade duration
duration_time_all = {}
burstiness_all = {}      # Burstiness index

bin = 10 # [ms]

for dish, date_list in date_all.items():
    xy_dish = []
    firingrate_dish = []
    varspec_dish = []
    varspec_shift_dish = []
    varspec_ind_dish = []
    U_dish = []
    U_shift_dish = []
    U_ind_dish = []
    size_dish = []
    size_time_dish = []
    duration_dish = []
    duration_time_dish = []
    burstiness_dish = []
    
    for date in date_list:
        path = f'/mnt/cerebellum/akita/development/data/{dish}/{date}/record.raw.h5' # access raw data
        record = File(path)
        T = (record['data_store/data0000/stop_time'][0] - record['data_store/data0000/start_time'][0])/1000 # recording time[s] (1800 sec.)
        N = 1024 # number of electrode
        times = np.array(pd.DataFrame(np.array(record['data_store/data0000/spikes']))['frameno']/20000) # framenumber converted to spiking time[s]
        times -= times[0]
        channs = np.array(pd.DataFrame(np.array(record['data_store/data0000/spikes']))['channel']) # index of electrode that detected the firing
        position = pd.DataFrame(np.array(record['data_store/data0000/settings/mapping']))

        xy = np.zeros((N,2))
        for i in range(N):
            if len(position[position['channel'] == i]['x']) > 0:
                xy[i,0] = float(position[position['channel'] == i]['x'].iloc[0])
                xy[i,1] = float(position[position['channel'] == i]['y'].iloc[0])
        xy_dish.append(xy)
                
        firingrate_dish.append(len(times)/(T*N))
        
        image, centered_image = get_images(T,N,bin,times,channs)
        varspec, varspec_shift, varspec_ind, U, U_shift, U_ind = get_U_var(centered_image) # PCA
        varspec_dish.append(varspec)
        varspec_shift_dish.append(varspec_shift)
        varspec_ind_dish.append(varspec_ind)
        U_dish.append(U)
        U_shift_dish.append(U_shift)
        U_ind_dish.append(U_ind)
        
        size, duration = get_avalanche(T,times)
        size_dish.append(size)
        duration_dish.append(duration)
        
        times_shuffle = shuffle_times(times,bin)
        size_time, duration_time = get_avalanche(T,times_shuffle)
        size_time_dish.append(size_time)
        duration_time_dish.append(duration_time)
        
        burstiness_dish.append(get_burstiness(T,N,times,channs))
    
    
    xy_all[dish]              = xy_dish
    firingrate_all[dish]      = firingrate_dish
    varspec_all[dish]         = varspec_dish
    varspec_shift_all[dish]   = varspec_shift_dish
    varspec_ind_all[dish]     = varspec_ind_dish
    U_all[dish]               = U_dish
    U_shift_all[dish]         = U_shift_dish
    U_ind_all[dish]           = U_ind_dish
    size_all[dish]            = size_dish
    size_time_all[dish]       = size_time_dish
    duration_all[dish]        = duration_dish
    duration_time_all[dish]   = duration_time_dish
    burstiness_all[dish]      = burstiness_dish

In [5]:
path = '/root/code/paper_public/data/10ms'

with open(f"{path}/xy_all.pkl","wb") as f:
    pickle.dump(xy_all, f)
with open(f"{path}/firingrate_all.pkl","wb") as f:
    pickle.dump(firingrate_all, f)
with open(f"{path}/varspec_all.pkl","wb") as f:
    pickle.dump(varspec_all, f)
with open(f"{path}/varspec_shift_all.pkl","wb") as f:
    pickle.dump(varspec_shift_all, f)
with open(f"{path}/varspec_ind_all.pkl","wb") as f:
    pickle.dump(varspec_ind_all, f)
with open(f"{path}/U_all.pkl","wb") as f:
    pickle.dump(U_all, f)
with open(f"{path}/U_shift_all.pkl","wb") as f:
    pickle.dump(U_shift_all, f)
with open(f"{path}/U_ind_all.pkl","wb") as f:
    pickle.dump(U_ind_all, f)
with open(f"{path}/size_all.pkl","wb") as f:
    pickle.dump(size_all, f)
with open(f"{path}/size_time_all.pkl","wb") as f:
    pickle.dump(size_time_all, f)
with open(f"{path}/duration_all.pkl","wb") as f:
    pickle.dump(duration_all, f)
with open(f"{path}/duration_time_all.pkl","wb") as f:
    pickle.dump(duration_time_all, f)
with open(f"{path}/burstiness_all.pkl","wb") as f:
    pickle.dump(burstiness_all, f)

In [5]:
firingrate_all = {}              # Position of electrode
for dish, date_list in date_all.items():
    firingrate_dish = []
    for date in date_list:
        path = f'/mnt/cerebellum/akita/development/data/{dish}/{date}/record.raw.h5' # access raw data
        record = File(path)
        T = (record['data_store/data0000/stop_time'][0] - record['data_store/data0000/start_time'][0])/1000
        N = 1024
        times = np.array(pd.DataFrame(np.array(record['data_store/data0000/spikes']))['frameno']/20000)
        times -= times[0]
        
        firingrate_dish.append(len(times)/(T*N))
        
    firingrate_all[dish]              = firingrate_dish

In [5]:
# with open("/root/code/paper_public/data/firingrate_all.pkl","wb") as f:
#     pickle.dump(firingrate_all, f)
path = '/root/code/paper_public/data/10ms'
    
with open(f"{path}/date_all.pkl","wb") as f:
    pickle.dump(date_all, f)
with open(f"{path}/DIV_all.pkl","wb") as f:
    pickle.dump(DIV_all, f)