In [1]:
import numpy as np
import scipy.io as sio
import pandas as pd
from tqdm.notebook import tqdm

In [2]:
mice = ['ym212','ym213','ym214','ym215','ym218','ym219','ym220','ym222','ym223','ym224','ym226','ym227']  # 12 in total
sessions = ['5FC','7FC']  # recent and remote
#data_dir = r'D:/Yuichi/MemoryAge_WinLen_5_Step_0.5/Data/DATA_win_len_5__step_0.5/'
data_dir = r'D:/Yuichi/MemoryAge_WinLen_5_Step_0.5/Data/'

sample_rate = 1600
window_len = int(sample_rate*5.0)
step = int(sample_rate*0.5)        
Z_SCORE = True

PRINT = True

# 1. Data segmentation
For each mouse in each session, save all the segments cutted by xB xE with a 'window_len' and 'step'

In [3]:
segments_all_recent = {}
segments_all_remote = {}

tot_num_seg_recent = 0
tot_num_seg_remote = 0

for mouse in tqdm(mice):
    for session in sessions:
        # ---------------------------------------------------
        # load LFP and the corresponding TS
        lfp_name = data_dir + '/' + mouse + '_' + session + '_' + 'LFP.mat'
        lfp_and_ts = sio.loadmat(lfp_name)
        lfp = lfp_and_ts['LFP_3_regions'].transpose()   # e.g., shape=(3, 987136)
        ts = lfp_and_ts['LFP_ts_usec'].squeeze()        # e.g., shape=(987136)
        del lfp_and_ts
        
        # load (xB, xE)
        fre_ts_name = data_dir + '/' + mouse + '_' + session + '_' + 'Freeze_Ts.csv'
        fre_B_E = pd.read_csv(fre_ts_name, header=None)
        fre_B_E = fre_B_E.rename(columns={0:'xB',1:'xE'})

        # check whether 'the 1st start timestamp of freeze behavior' happens later than 'LFP timestamp start'
        if fre_B_E.iloc[0,0] < ts[0]:
            print(f'Attention: Mouse{mouse}, Session{session}.')
        # check whether 'the last end timestamp of freeze behavior' happens earlier than 'LFP timestamp end'
        if fre_B_E.iloc[-1,1] > ts[-1]:
            print(f'Attention: Mouse{mouse}, Session{session}.')
        # After check, we can conclude that all the timestamps in fre_B_E is in the range of ts

        # Now, we have
        # - lfp           in millivolts
        # - ts            in micro second, the lfp is the downsampling signal with a sample rate=1600Hz
        #                 any two adjacent ts have a diff 625us  
        #                 e.g., np.diff(ts[:,0]) 
        # - fre_B_E   in micro second

        # ---------------------------------------------------
        #   Z-SCORE normalization
        # ---------------------------------------------------
        if Z_SCORE:
            freeze_lfp_mean_std = pd.read_csv(r'./check_freeze_lfp_mean_std_figs/freeze_lfp_mean_std.csv')
            freeze_lfp_mean_std = freeze_lfp_mean_std.set_index(['Mouse-Session'])

            # the mean and std of the current session freezing lfp
            lfp_mean = freeze_lfp_mean_std.loc[mouse + '_' + session, 'Mean']
            lfp_std = freeze_lfp_mean_std.loc[mouse + '_' + session, 'Std']

            lfp = (lfp - lfp_mean)/lfp_std

            # Now, we have
            # - lfp        with freezing period z-score normalized
            #              if you only get the freezing segments, then the mean=0, std=1
            # TODO: How to deal with the outliers?

        # ---------------------------------------------------
        # Convert the xB xE from machine time to the corresponding idx on lfp array
        # e.g., xB machine time -> lfp machine time -> lfp array idx

        # we make an identical df as fre_B_E
        # this df is used to save each xB or xE's idx on lfp data array
        fre_B_E_idx = fre_B_E.copy()
        fre_B_E_idx['xB'] = np.nan
        fre_B_E_idx['xE'] = np.nan

        ts_starting_point = 0
        # note that too many loops is not efficient, but consider the small data scale, it is fine.
        # the time complicity is O(ts.shape[0])
        for row_i in range(len(fre_B_E)):    
            for col_j in range(2):
                behav_ts = fre_B_E.iloc[row_i, col_j]

                for ts_idx in range(ts_starting_point, ts.shape[0]):
                    if behav_ts <= ts[ts_idx]:
                        fre_B_E_idx.iloc[row_i, col_j] = ts_idx
                        ts_starting_point = ts_idx + 1
                        break  # 'break' apply to the innermost loop.

        # make sure the indice are int type
        fre_B_E_idx = fre_B_E_idx.astype(int)
        # xE - xB to get the length of each period
        fre_periods = fre_B_E_idx.iloc[:,1] - fre_B_E_idx.iloc[:,0]
        # remove all the periods smaller than 'window_len' defined above
        periods_keep = window_len <= fre_periods  # for example, remove all the periods smaller than 2.56s
        fre_B_E_idx = fre_B_E_idx.loc[periods_keep, :].reset_index(drop=True)

        del row_i, col_j, behav_ts, ts_idx, ts_starting_point, fre_periods, periods_keep
        # Now, we have
        # - fre_B_E_idx
        
        # ---------------------------------------------------
        # now we make each 'window_len' lfp using freezing timestamps

        segment_all = []

        # for each Begin End pair
        for B_E_pair_idx in range(len(fre_B_E_idx)):
            # get the Begin End index
            B_tmp = fre_B_E_idx.iloc[B_E_pair_idx, 0]
            E_tmp = fre_B_E_idx.iloc[B_E_pair_idx, 1]

            for segment_start in range(B_tmp, E_tmp - window_len + 2, step):
            # why do I set the ending point to E_tmp - window_len + 2?
            # because even if you get the last value, which is E_tmp - window_len + 1, it is still ok.
            # because from E_tmp - window_len + 1 to E_tmp (inclusive), there are window_len points! 

                # cut lfp
                segment = lfp[:, segment_start: segment_start + window_len]
                segment_all.append(segment)
        
        if session == '5FC':  # recent
            segments_all_recent[mouse] = segment_all
            tot_num_seg_recent += len(segment_all)
            if PRINT:
                print(f'Mouse:{mouse} Session:{session}| There are {len(segment_all)} segments.')
                recent_seg_num_this_mouse = len(segment_all)
        else:                 # remote
            segments_all_remote[mouse] = segment_all
            tot_num_seg_remote += len(segment_all)
            if PRINT:
                print(f'            Session:{session}| There are {len(segment_all)} segments.')
                remote_seg_num_this_mouse = len(segment_all)
                print(f'            Total:{recent_seg_num_this_mouse+remote_seg_num_this_mouse}')

print(f'There are {tot_num_seg_recent} segments in recent.')
print(f'There are {tot_num_seg_remote} segments in remote.')

# now, we have 
# - segments_all_recent
# - segments_all_remote
sio.savemat('segments_all_recent.mat', segments_all_recent)
sio.savemat('segments_all_remote.mat', segments_all_remote)

  0%|          | 0/12 [00:00<?, ?it/s]

Mouse:ym212 Session:5FC| There are 109 segments.
            Session:7FC| There are 33 segments.
            Total:142
Mouse:ym213 Session:5FC| There are 7 segments.
            Session:7FC| There are 9 segments.
            Total:16
Mouse:ym214 Session:5FC| There are 3 segments.
            Session:7FC| There are 5 segments.
            Total:8
Mouse:ym215 Session:5FC| There are 237 segments.
            Session:7FC| There are 32 segments.
            Total:269
Mouse:ym218 Session:5FC| There are 215 segments.
            Session:7FC| There are 99 segments.
            Total:314
Mouse:ym219 Session:5FC| There are 15 segments.
            Session:7FC| There are 9 segments.
            Total:24
Mouse:ym220 Session:5FC| There are 173 segments.
            Session:7FC| There are 44 segments.
            Total:217
Mouse:ym222 Session:5FC| There are 62 segments.
            Session:7FC| There are 275 segments.
            Total:337
Mouse:ym223 Session:5FC| There are 125 segments.
           