In [1]:
import numpy as np
import scipy.io as sio
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

In [2]:
mice = ['ym212','ym213','ym214','ym215','ym218','ym219','ym220','ym222','ym223','ym224','ym226','ym227']
sessions = ['5FC','7FC']
data_dir = r'/Users/david/Projects/Yuichi/MemoryAge_WinLen_5_Step_0.5/Data'

window_len = int(1600*5)    # 5s
step = int(1600*0.5)        # 0.5s

In [3]:
mean_std_all_df = pd.DataFrame(columns=['Mouse-Session','Mean','Std'])

for mouse in tqdm(mice):
    for session in sessions:
        # ---------------------------------------------------
        # load LFP and the corresponding TS
        lfp_name = data_dir + '/' + mouse + '_' + session + '_' + 'LFP.mat'
        lfp_and_ts = sio.loadmat(lfp_name)
        lfp = lfp_and_ts['LFP_3_regions'].transpose()   # e.g., shape=(3, 987136)
        ts = lfp_and_ts['LFP_ts_usec'].squeeze()        # e.g., shape=(987136)
        del lfp_and_ts
        
        # load (xB, xE)
        non_fre_ts_name = data_dir + '/' + mouse + '_' + session + '_' + 'NonFreeze_Ts.csv'
        non_fre_B_E = pd.read_csv(non_fre_ts_name, header=None)
        non_fre_B_E = non_fre_B_E.rename(columns={0:'xB',1:'xE'})

        # check whether 'the 1st start timestamp of non-freeze behavior' happens later than 'LFP timestamp start'
        if non_fre_B_E.iloc[0,0] < ts[0]:
            print(f'Attention: Mouse{mouse}, Session{session}.')
        # check whether 'the last end timestamp of non-freeze behavior' happens earlier than 'LFP timestamp end'
        if non_fre_B_E.iloc[-1,1] > ts[-1]:
            print(f'Attention: Mouse{mouse}, Session{session}.')
        # After check, we can conclude that all the timestamps in non_fre_B_E is in the range of ts

        # Now, we have
        # - lfp           in millivolts
        # - ts            in micro second, the lfp is the downsampling signal with a sample rate=1600Hz
        #                 any two adjacent ts have a diff 625us  
        #                 e.g., np.diff(ts[:,0]) 
        # - non_fre_B_E   in micro second

        # ---------------------------------------------------
        # Convert the xB xE from machine time to the corresponding idx on lfp array
        # e.g., xB machine time -> lfp machine time -> lfp array idx

        # we make an identical df as non_fre_B_E
        # this df is used to save each xB or xE's idx on lfp data array
        non_fre_B_E_idx = non_fre_B_E.copy()
        non_fre_B_E_idx['xB'] = np.nan
        non_fre_B_E_idx['xE'] = np.nan

        ts_starting_point = 0
        # note that too many loops is not efficient, but consider the small data scale, it is fine.
        # the time complicity is O(ts.shape[0])
        for row_i in range(len(non_fre_B_E)):    
            for col_j in range(2):
                behav_ts = non_fre_B_E.iloc[row_i, col_j]

                for ts_idx in range(ts_starting_point, ts.shape[0]):
                    if behav_ts <= ts[ts_idx]:
                        non_fre_B_E_idx.iloc[row_i, col_j] = ts_idx
                        ts_starting_point = ts_idx + 1
                        break  # 'break' apply to the innermost loop.

        # make sure the indice are int type
        non_fre_B_E_idx = non_fre_B_E_idx.astype(int)
        # xE - xB to get the length of each period
        non_fre_periods = non_fre_B_E_idx.iloc[:,1] - non_fre_B_E_idx.iloc[:,0]
        # remove all the periods smaller than 'window_len' defined above
        periods_keep = window_len <= non_fre_periods
        non_fre_B_E_idx = non_fre_B_E_idx.loc[periods_keep, :].reset_index(drop=True)

        del row_i, col_j, behav_ts, ts_idx, ts_starting_point, non_fre_periods, periods_keep
        # Now, we have
        # - non_fre_B_E_idx
        
        # ---------------------------------------------------
        # for each mouse in each session, now we get all the non-freezing lfp data for stat analysis

        non_fre_segments = []  # save all the non freezing lfp segments
        for row_i in range(len(non_fre_B_E_idx)):
            xB_tmp = non_fre_B_E_idx.iloc[row_i, 0]  # the start index of each segment
            xE_tmp = non_fre_B_E_idx.iloc[row_i, 1]  # the end

            non_fre_segments.append(lfp[:, xB_tmp: xE_tmp+1])

        # reshape non-freezing lfp data to 1-d array for easy processing
        non_fre_segments = np.concatenate(non_fre_segments, axis=1).reshape(-1)
        # get mean and std of the non-freezing lfp
        lfp_mean = np.mean(non_fre_segments)
        lfp_std = np.std(non_fre_segments)       
        # save
        mean_std_all_df.loc[len(mean_std_all_df.index)] = [mouse + '_' + session, lfp_mean, lfp_std]
        # plot hist
        fig, axs = plt.subplots(1,1,figsize=(4,4), tight_layout=True)
        axs.hist(non_fre_segments[:-1], bins = 100)
        axs.axvline(x=0, color='r')
        #axs.set_xlim(-0.75, 0.75)
        fig.suptitle(mouse+'_'+session)
        plt.savefig('check_non_freeze_lfp_mean_std_figs/before_z_score/' + mouse + '_' + session + '.png')
        plt.close()

mean_std_all_df.to_csv('check_non_freeze_lfp_mean_std_figs/non_freeze_lfp_mean_std.csv')

A Jupyter Widget

## Let's do z-score normalization to the non-freezing periods for each session

In [4]:
mean_std_all_df = mean_std_all_df.set_index(['Mouse-Session'])

In [5]:
mean_std_all_df_after_z = pd.DataFrame(columns=['Mouse-Session','Mean','Std'])

for mouse in tqdm(mice):
    for session in sessions:
        # ---------------------------------------------------
        # load LFP and the corresponding TS
        lfp_name = data_dir + '/' + mouse + '_' + session + '_' + 'LFP.mat'
        lfp_and_ts = sio.loadmat(lfp_name)
        lfp = lfp_and_ts['LFP_3_regions'].transpose()   # e.g., shape=(3, 987136)
        ts = lfp_and_ts['LFP_ts_usec'].squeeze()        # e.g., shape=(987136)
        del lfp_and_ts
        
        # load (xB, xE)
        non_fre_ts_name = data_dir + '/' + mouse + '_' + session + '_' + 'NonFreeze_Ts.csv'
        non_fre_B_E = pd.read_csv(non_fre_ts_name, header=None)
        non_fre_B_E = non_fre_B_E.rename(columns={0:'xB',1:'xE'})

        # Now, we have
        # - lfp           in millivolts
        # - ts            in micro second, the lfp is the downsampling signal with a sample rate=1600Hz
        #                 any two adjacent ts have a diff 625us  
        #                 e.g., np.diff(ts[:,0]) 
        # - non_fre_B_E   in micro second

        # ---------------------------------------------------
        # Convert the xB xE from machine time to the corresponding idx on lfp array
        # e.g., xB machine time -> lfp machine time -> lfp array idx

        # we make an identical df as non_fre_B_E
        # this df is used to save each xB or xE's idx on lfp data array
        non_fre_B_E_idx = non_fre_B_E.copy()
        non_fre_B_E_idx['xB'] = np.nan
        non_fre_B_E_idx['xE'] = np.nan

        ts_starting_point = 0
        # note that too many loops is not efficient, but consider the small data scale, it is fine.
        # the time complicity is O(ts.shape[0])
        for row_i in range(len(non_fre_B_E)):    
            for col_j in range(2):
                behav_ts = non_fre_B_E.iloc[row_i, col_j]

                for ts_idx in range(ts_starting_point, ts.shape[0]):
                    if behav_ts <= ts[ts_idx]:
                        non_fre_B_E_idx.iloc[row_i, col_j] = ts_idx
                        ts_starting_point = ts_idx + 1
                        break  # 'break' apply to the innermost loop.

        # make sure the indice are int type
        non_fre_B_E_idx = non_fre_B_E_idx.astype(int)
        # xE - xB to get the length of each period
        non_fre_periods = non_fre_B_E_idx.iloc[:,1] - non_fre_B_E_idx.iloc[:,0]
        # remove all the periods smaller than 'window_len' defined above
        periods_keep = window_len <= non_fre_periods
        non_fre_B_E_idx = non_fre_B_E_idx.loc[periods_keep, :].reset_index(drop=True)

        del row_i, col_j, behav_ts, ts_idx, ts_starting_point, non_fre_periods, periods_keep
        # Now, we have
        # - non_fre_B_E_idx
        
        # ---------------------------------------------------
        #              Z-score normalization
        # ---------------------------------------------------

        mean_tmp = mean_std_all_df.loc[mouse + '_' + session, 'Mean']
        std_tmp = mean_std_all_df.loc[mouse + '_' + session, 'Std']

        lfp = (lfp - mean_tmp)/std_tmp
        # although the freezing part in also in the lfp array, it doesn't matter
        # becasue we only need non-freezing part later.

        # ---------------------------------------------------
        # for each mouse in each session, now we get all the non-freezing lfp data for stat analysis

        non_fre_segments = []  # save all the non freezing lfp segments
        for row_i in range(len(non_fre_B_E_idx)):
            xB_tmp = non_fre_B_E_idx.iloc[row_i, 0]  # the start index of each segment
            xE_tmp = non_fre_B_E_idx.iloc[row_i, 1]  # the end

            non_fre_segments.append(lfp[:, xB_tmp: xE_tmp+1])

        # reshape non-freezing lfp data to 1-d array for easy processing
        non_fre_segments = np.concatenate(non_fre_segments, axis=1).reshape(-1)
        # get mean and std of the non-freezing lfp
        lfp_mean = np.mean(non_fre_segments)
        lfp_std = np.std(non_fre_segments)       
        # save
        mean_std_all_df_after_z.loc[len(mean_std_all_df_after_z.index)] = [mouse + '_' + session, lfp_mean, lfp_std]
        # plot hist
        fig, axs = plt.subplots(1,1,figsize=(4,4), tight_layout=True)
        axs.hist(non_fre_segments[:-1], bins = 100)
        axs.axvline(x=0, color='r')
        #axs.set_xlim(-2, 2)
        fig.suptitle(mouse+'_'+session)
        plt.savefig('check_non_freeze_lfp_mean_std_figs/after_z_score/' + mouse + '_' + session + '.png')
        plt.close()

mean_std_all_df_after_z.to_csv('check_non_freeze_lfp_mean_std_figs/non_freeze_lfp_mean_std_after_z.csv')

A Jupyter Widget

In [6]:
mean_std_all_df_after_z

Unnamed: 0,Mouse-Session,Mean,Std
0,ym212_5FC,-2.099906e-18,1.0
1,ym212_7FC,4.9492350000000005e-18,1.0
2,ym213_5FC,-7.912708e-18,1.0
3,ym213_7FC,-1.501634e-17,1.0
4,ym214_5FC,2.584018e-18,1.0
5,ym214_7FC,-2.5025749999999998e-19,1.0
6,ym215_5FC,8.999008e-18,1.0
7,ym215_7FC,7.677158e-18,1.0
8,ym218_5FC,-1.7697860000000003e-17,1.0
9,ym218_7FC,6.398548e-18,1.0


In [7]:
# Useful setting Pylance
# https://qiita.com/honda28/items/5913720d17cad424c4c6