# Get Sachi's PSTH into correct shape and save in inventory

In [2]:
import os
import scipy.io as sio
import numpy as np
import shutil 
from natsort import natsorted

# Define a list of image sets to be processed
image_sets = ['hvm', 'bold5000', 'nat300'] #'hvm'

In [2]:
#--------------------------------------------------------------------------------------------
# Iterate though every imageset and get all correct shaped psth.
#--------------------------------------------------------------------------------------------
for image_set in image_sets:
    # Define paths for Intan and MWorks processed data
    intan_proc_path = f'/braintree/data2/active/users/sachis/projects/{image_set}/monkeys/solo/intanproc'
    mworks_proc_path = f'/braintree/data2/active/users/sachis/projects/{image_set}/monkeys/solo/mworksproc'

    # List and sort Intan & MWorks recordings, ignoring hidden files
    intan_recordings = os.listdir(intan_proc_path)
    intan_recordings = sorted([item for item in intan_recordings if not item.startswith('.')])
    mworks_recordings = os.listdir(mworks_proc_path)
    mworks_recordings = sorted([item for item in mworks_recordings if not item.startswith('.')])

    # Initialize lists to store dates and times
    intan_dates = []
    intan_times = []
    mworks_dates = []
    mworks_times = []

    # Extract dates and times from Intan & MWorks recording filenames
    for recording in intan_recordings:
        intan_dates.append(recording.split('_')[2])
        intan_times.append(recording.split('_')[3])
    for recording in mworks_recordings:
        mworks_dates.append(recording.split('_')[2])
        mworks_times.append(recording.split('_')[3])

    # Process each Intan recording
    for intan_rec, intan_date, intan_time in zip(intan_recordings, intan_dates, intan_times):
        
        inventory_save_path = f'/braintree/home/aliya277/inventory_new/exp_{image_set}/exp_{image_set}.sub_solo/exp_{image_set}.sub_solo.20{intan_date}/exp_{image_set}.sub_solo.20{intan_date}_{intan_time}.proc'
        if os.path.isdir(inventory_save_path): 
            print(f'path already exists for exp_{image_set}.sub_solo.20{intan_date}_{intan_time}.proc')
            continue

        # Find matching MWorks recording based on the date
        index = mworks_dates.index(intan_date)
        if int(mworks_times[index][:2]) <= int(intan_time[:2])-1:
            if int(mworks_times[index+1][0:2]) <= int(intan_time[0:2])+1:
                index = index+1
        print(index)

        mworks_rec = mworks_recordings[index]

        # Load MWorks data file
        mwk_file = sio.loadmat(os.path.join(mworks_proc_path, mworks_rec))
        # Extract image order and unique image IDs
        image_order = mwk_file['image_order'][0]
        image_ids = np.unique(image_order)
        # Determine index of each image ID in the order
        image_id_by_index = [np.where(image_order == element)[0] for element in image_ids]
        
        # Find the maximum repetition count for any image
        max_rep = max(len(arr) for arr in image_id_by_index)

        # List and sort PSTH files for the current Intan recording
        psth_files = os.listdir(os.path.join(intan_proc_path, intan_rec, 'psth'))
        psth_files = sorted([item for item in psth_files if not item.startswith('.')])

        # Populate the final PSTH array
        for i_channel, psth_file in zip(range(len(psth_files)), psth_files):
            psth_path = os.path.join(os.path.join(intan_proc_path, intan_rec, 'psth', psth_file))
            psth_ = sio.loadmat(psth_path)
            psth = psth_['psth']
            meta = psth_['meta']

            final_shape = (image_ids.shape[0], max_rep, psth.shape[1]-1)
            final_psth = np.full(final_shape, np.nan)
            

            for image_id, image_id_indices in zip(image_ids, image_id_by_index):
                # Assign PSTH data to the final array (image_id starts at 1)
                final_psth[image_id-1, :len(image_id_indices), :] = psth[image_id_indices, :-1]
        
            # Print the shape of the final PSTH array for verification
            print(f'PSTH for channel {i_channel} for {intan_rec} of shape {final_psth.shape}')

            # Prepare data for saving
            final_psth = final_psth.astype(np.float16) 
            data = {'psth': final_psth, 'meta': meta}

            # Define the save path for the final data
            savepath = f'/braintree/home/aliya277/inventory_new/exp_{image_set}/exp_{image_set}.sub_solo/exp_{image_set}.sub_solo.20{intan_date}/exp_{image_set}.sub_solo.20{intan_date}_{intan_time}.proc/psth/channel_files'
            
            if i_channel == 0:
                try: shutil.rmtree(savepath)
                except Exception as e: print(e)

            if not os.path.isdir(savepath): os.mkdir(savepath)
           
            # Save the processed data in MATLAB format    
            try: sio.savemat(os.path.join(savepath, f'{intan_rec}_channel_{i_channel}_psth.mat'), data)
            except Exception as e: print(e)


path already exists for exp_hvm.sub_solo.20181102_114336.proc
path already exists for exp_hvm.sub_solo.20181105_112735.proc
path already exists for exp_hvm.sub_solo.20181106_101732.proc
path already exists for exp_hvm.sub_solo.20181107_094221.proc
path already exists for exp_hvm.sub_solo.20181109_100301.proc
path already exists for exp_hvm.sub_solo.20181112_105935.proc
path already exists for exp_hvm.sub_solo.20181113_110917.proc
path already exists for exp_hvm.sub_solo.20181114_101205.proc
path already exists for exp_hvm.sub_solo.20181116_100519.proc
path already exists for exp_hvm.sub_solo.20181119_095618.proc
path already exists for exp_hvm.sub_solo.20181120_100252.proc
path already exists for exp_hvm.sub_solo.20181121_111735.proc
path already exists for exp_hvm.sub_solo.20181124_111612.proc
path already exists for exp_hvm.sub_solo.20181125_110844.proc
path already exists for exp_hvm.sub_solo.20181126_101025.proc
path already exists for exp_bold5000.sub_solo.20190220_143521.proc
pat


For Solo, the normalizer images are the last 25 images in the data (i.e., first X images are experiment images, then a grey image, and last 25 normalizers). 
It was only beginning from Oleo that we started running normalizers separately (at the beginning of each session, min. 10 repetitions). Each session has psth_meta = -100, 380, 10.

In [None]:
#--------------------------------------------------------------------------------------------
# Iterate though every imageset and save normalizer and experiment psth in .npy files (because .mat files are too big)
#--------------------------------------------------------------------------------------------

# Loop through each image set
for image_set in image_sets:
    # Define paths for Intan and MWorks processed data
    intan_proc_path = f'/braintree/data2/active/users/sachis/projects/{image_set}/monkeys/solo/intanproc'
    mworks_proc_path = f'/braintree/data2/active/users/sachis/projects/{image_set}/monkeys/solo/mworksproc'

    # List and sort Intan & MWorks recordings, ignoring hidden files
    intan_recordings = os.listdir(intan_proc_path)
    intan_recordings = sorted([item for item in intan_recordings if not item.startswith('.')])
    mworks_recordings = os.listdir(mworks_proc_path)
    mworks_recordings = sorted([item for item in mworks_recordings if not item.startswith('.')])

    # Initialize lists to store dates and times
    intan_dates = []
    intan_times = []
    mworks_dates = []
    mworks_times = []

    # Extract dates and times from Intan & MWorks recording filenames
    for recording in intan_recordings:
        intan_dates.append(recording.split('_')[2])
        intan_times.append(recording.split('_')[3])
    for recording in mworks_recordings:
        mworks_dates.append(recording.split('_')[2])
        mworks_times.append(recording.split('_')[3])

    # Assuming all necessary variables (intan_recordings, intan_dates, intan_times, image_set) are defined
    savepath_template = '/braintree/home/aliya277/inventory_new/exp_{}/exp_{}.sub_solo/exp_{}.sub_solo.20{}/exp_{}.sub_solo.20{}_{}.proc/psth/channel_files'
    normpath_template = '/braintree/home/aliya277/inventory_new/norm_HVM/norm_HVM.sub_solo/norm_HVM.sub_solo.20{}/norm_HVM.sub_solo.20{}_{}.proc/psth'
    final_psth_template ='{}_{}.sub_solo.20{}_{}_psth.npy'

    # Preprocess all paths first
    savepaths = [savepath_template.format(image_set, image_set, image_set, date, image_set, date, time) for date, time in zip(intan_dates, intan_times)]
    channel_files_list = [natsorted(os.listdir(path)) for path in savepaths if os.path.isdir(path)]
       
    for i, path in enumerate(savepaths):
        #if os.path.isfile(os.path.join(os.path.dirname(path), final_psth_template.format('exp', image_set, intan_dates[i], intan_times[i]))): continue 

        print(f'Session {os.path.basename(os.path.dirname(os.path.dirname(path)))}')
        # Determine the dimensions
        first_file_path = os.path.join(path, channel_files_list[i][0])
        first_file = sio.loadmat(first_file_path)
        num_channels = len(channel_files_list[i])  # Number of channel files per session
        num_stim = first_file['psth'].shape[0] - 26  # Assuming 'psth' is 2D: (num_bins + 26, ...)
        num_reps = first_file['psth'].shape[1]
        num_bins = first_file['psth'].shape[2]
        meta     = first_file['meta']
        experiment_shape = (num_stim, num_reps, num_bins, num_channels)
        normalizer_shape = (26, num_reps, num_bins, num_channels)

        # Preallocate arrays
        experiment_psth = np.empty(experiment_shape, dtype=np.float16)
        experiment_psth[:] = np.nan
        normalizer_psth = np.empty(normalizer_shape, dtype=np.float16)
        normalizer_psth[:] = np.nan


        for j, channelfile in enumerate(natsorted(channel_files_list[i])):
            #print(path, channelfile)
            psth_data = sio.loadmat(os.path.join(path, channelfile))['psth']
            # Split psth data
            experiment_psth[:,:,:, j] = psth_data[:-26]  # Experiment PSTH
            # For the normalizer, move the first item to the end (i.e. move grey image to end to match standard format)
            normalizer_psth[:,:,:, j] = np.roll(psth_data[-26:], shift=-1, axis=0)
        
        print('Experiment PSTH shape:', experiment_psth.shape)
        print('Normalizer PSTH shape:', normalizer_psth.shape)

        exp_data  = {'psth': experiment_psth, 'meta': meta}
        norm_data = {'psth': normalizer_psth, 'meta': meta}

        print('... saving exp data')
        try: np.save(os.path.join(os.path.dirname(path), final_psth_template.format('exp', image_set, intan_dates[i], intan_times[i])), exp_data)
        except Exception as e: print(e)
        print('... saving norm data')
        try: np.save(os.path.join(normpath_template.format(intan_dates[i], intan_dates[i], intan_times[i]), final_psth_template.format('norm', image_set, intan_dates[i], intan_times[i])), norm_data)
        except Exception as e: print(e)


The rest of the processing of this data is going to be done by the standard pipeline notebooks, starting with 1_create_proc_nwb.ipynb.