In [2]:
from open_ephys.analysis import Session
import numpy as np
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import re
import pandas as pd
import glob

def AlignToTriggersAndFIndEphysTimestamps(Port_intimes,trial_id,first_poke_times,trial_start,TrialStart_EphysTime,FirstPoke_EphysTime):

    new_TS = []
    for index, trial in enumerate(trial_id):
        if np.isnan(Port_intimes[index]):
            new_TS = new_TS + [np.nan]
        else:
            current_poke_event_time = Port_intimes[index]

            # find ech relevant timestamps
            CurrentTrial_startTS = trial_start[trial-1]
            First_pokeTS = first_poke_times[trial-1]

            # last trial has no next trial start
            if trial == trial_id[-1]:
                NextTrial_startTS = 9999999999999
            else:
                NextTrial_startTS = np.unique(trial_start)[trial]

            # find the ts current poke event is closest to
            trialstart_diff =  abs(CurrentTrial_startTS - current_poke_event_time)

            EphysTS = TrialStart_EphysTime[trial-1]
            current_dist = current_poke_event_time - CurrentTrial_startTS
            distance = EphysTS + current_dist



            new_TS = new_TS + [distance]

    return(new_TS)


def align_open_ephys_processors(main_processor_tuple, aux_processor_tuples, session_path=None, synch_channel=1):

    session_data = Session(str(session_path))
    if len(session_data.recordnodes) != 1:
        raise ValueError("should be exactly one record node.")
    if len(session_data.recordnodes[0].recordings) != 1:
        raise ValueError("Should be exactly one recording.")

    for rn, recordnode in enumerate(session_data.recordnodes):
        for r, recording in enumerate(recordnode.recordings):
            # Synch
            recording.add_sync_line(
                synch_channel,
                main_processor_tuple[0],
                main_processor_tuple[1],
                main=True,
            )
            for aux_processor in aux_processor_tuples:
                recording.add_sync_line(
                    synch_channel,
                    aux_processor[0],
                    aux_processor[1],
                    main=False,
                )
            print('this should be zero:')
            print(rn)

    return recording

def find_files(filename, search_path):
    result = []

    #Walking top-down from the root
    for root, dir, files in os.walk(search_path):
        if filename in files:
            result.append(os.path.join(root, filename))

    return result

def sequence_contains_sequence(haystack_seq, needle_seq, string):
    start_index = []
    for i in range(0, len(haystack_seq) - len(needle_seq) + 1):
        if needle_seq == haystack_seq[i:i+len(needle_seq)]:
            start_index = start_index + [i]
            print(string + ' barcode found')
    return start_index

def find_folder_path(parent_folder, target_folder):
    for root, dirs, files in os.walk(parent_folder):
        if target_folder in dirs:
            return os.path.join(root, target_folder)
        # If the target folder is not found
    return (print('not found'))

def list_all_datapaths(data_path):
    data_paths = []
    for file in os.listdir(data_path):
        if 'EJT' in file[0:5]:
            for file_ in os.listdir(data_path+file):
                if 'record' in file_:
                    data_paths += [data_path+file+'//'+file_]
    return data_paths

# define paths:

data_path = r'/ceph/sjones/projects/sequence_squad/organised_data/animals//'
dat_path = r"/ceph/sjones/projects/sequence_squad/organised_data/ppseq_data/output_data/striatum/New_Post_sleep//"

# current_mouse  = '178_2_1'

# list out all possible datapaths
data_paths = list_all_datapaths(data_path)

for path in data_paths:

#     print(path)

    a = path.split('//')[-2].split('_')[0][3::]
    b = path.split('//')[-2][-1]
    c = path.split('//')[-1].split('_')[0][-1]
    current_mouse = '_'.join([a,b,c])

    for path_ in data_paths:

        mir = path_.split('//')[-2].split('_')[0][3::] + '_' +  path_.split('//')[-2].split('_')[1][-1] + '_' + path_.split('//')[-1].split('_')[0][-1]
        if mir == current_mouse:
            print(mir)
            recording_date = path_.split('//')[-1].split('_')[-1]
            org_data_path = path_

    print(org_data_path)


### find OE processor path and OE_raw_path
    OE_processor_path_base = r"/ceph/sjones/projects/sequence_squad/data/raw_neuropixel/OE_DATA//"

    compress_date = ''.join(recording_date.split('-')[0:2]) + recording_date.split('-')[-1][-2::]

    mouse_name = 'EJT' + current_mouse.split('_')[0]
    if current_mouse.split('_')[1] == '2':
        mouse_name = mouse_name + '_implant2'

    current_path = os.path.join(OE_processor_path_base + mouse_name, compress_date) + '//'

    OE_processor_path = glob.glob(os.path.join(current_path, '**', 'continuous'), recursive=True)[0]
    OE_raw_path = '//'.join(OE_processor_path.split('//')[0:10]) + '//'

    print(OE_processor_path)
    print(os.listdir(OE_processor_path))

    ### histology path
    hist_path_base = r"/ceph/sjones/projects/sequence_squad/data/histology/Neuropixel_tracks//"

    for item in os.listdir(hist_path_base):
        if mouse_name.split('_')[0] in item:
            mouse_name_ = item

    hist_path = os.path.join((hist_path_base + mouse_name_),"brainreg//")
    hist_path = glob.glob(os.path.join(hist_path, '**','tracks'),recursive = True)[0]

    implant_files = []
    for item in os.listdir(hist_path):
        if 'csv' in item:
            implant_files+=[item]

    if len(implant_files) > 1:
        for file in implant_files:
            if current_mouse.split('_')[-2] in file:
                implant_file = file
    else:
        implant_file = implant_files[0]

    probe_track_file = os.path.join(hist_path,implant_file)
    print(probe_track_file)

    ######################### get processors
    #sample rate:
    Fs = 2500


    implant_df = pd.read_csv(probe_track_file)

    import re
    count = 0
    for processor in os.listdir(OE_processor_path):
        if count == 0:
            main1 = int(re.findall(r'\d+', processor)[0])
            main1_2 = processor.split('.')[-1]
        elif count == 1:
            main2 = int(re.findall(r'\d+', processor)[0])
            main2_2 = processor.split('.')[-1]
        elif count == 2:
            main3 = int(re.findall(r'\d+', processor)[0])
            main3_2 = processor.split('.')[-1]
        count +=1

    main_processor_tuple=(main1, main1_2)

    aux_processor_tuples=((main2,main2_2),(main3,main3_2))

    break
    ############ DONT RUN MORE THAN ONCE! ITS SO SLOW! ######################

    ### LOAD in data: this could take a few minutes
    recording = align_open_ephys_processors(main_processor_tuple,aux_processor_tuples,OE_raw_path)
    recording.compute_global_timestamps()

    ############ DONT RUN MORE THAN ONCE! ITS SO SLOW! ######################




    ##### extract channels

    data = recording.continuous[1].samples

    if int(current_mouse.split('_')[0]) > 250:
        timestamps = recording.continuous[1].timestamps
    else:
        timestamps = np.load(OE_processor_path + '//' + 'Neuropix-PXI-' + str(aux_processor_tuples[0][0]) + '.' + str(aux_processor_tuples[0][1]) + '//' + 'synchronized_timestamps.npy')

    ## chose 6 channels: #
    channels = [50,100,150,200,250,300,350]

    ### add in region info based on depth:
    try:
        callosum_middle_index = int(np.median(np.where(implant_df['Region acronym'].values == 'ccb')))
    except:
        callosum_middle_index = int(np.median(np.where(implant_df['Region acronym'].values == 'ccg')))

    proportion_in_motor_cortex = (callosum_middle_index/len(implant_df))
    # there should be 400 channels per 4000um
    # tot_channels = 384
    # bank_spacing = 20 # 20um
    # channels_per_bank = 2
    first_cortex_channel = int(proportion_in_motor_cortex * 400)

    # save out data:
    timestamps_s = timestamps/2500
    timestamps_s_offset_adjusted = timestamps_s - timestamps_s[0]

    channel_regions = []
    for channel in channels:
        if channel > first_cortex_channel:
            channel_regions.append('m_crtex')
        elif channel < first_cortex_channel:
            channel_regions.append('striatum')

    # save timestamp data
    save_file_path = org_data_path + r"/ephys/LFP/"
    if not os.path.isdir(save_file_path):
        os.makedirs(save_file_path)

    np.save(save_file_path+ 'LFP_timestamps.npy',timestamps_s)
    np.save(save_file_path+ 'aligned_LFP_timestamps.npy',timestamps_s_offset_adjusted)

    ## free up memory
    del timestamps_s
    del timestamps_s_offset_adjusted



    #### add in region info and extract data into useable format

    ### add in region info based on depth:
    for chosen_channel in channels:
        data_channel = []
        chunk_size = 2000 # adjust this value to balance speed and memory usage
        for i in tqdm(range(0, len(data), chunk_size)):
            chunk = [data[j][chosen_channel] for j in range(i, min(i+chunk_size, len(data)))]
            data_channel += chunk

        if chosen_channel > first_cortex_channel:
            data_region = 'm-crtx'
        elif chosen_channel < first_cortex_channel:
            data_region = 'striatum'

        save_path = save_file_path + 'channel-' + str(chosen_channel) + '_REGION-' + data_region + "_LFP_data.npy"

        np.save(save_path,data_channel)
        print('data saved for channel ' + str(chosen_channel))
        # clean up for memory
        del data_channel

    print('done')



        
        
        
        
        


FileNotFoundError: [WinError 3] The system cannot find the path specified: '/ceph/sjones/projects/sequence_squad/organised_data/animals//'

In [None]:
oe = "Z:\projects\sequence_squad\data\raw_neuropixel\OE_DATA\EJT262\180523\2023-05-18_11-26-01\Record Node 113\experiment1\recording2\continuous"

In [None]:
recording = align_open_ephys_processors(main_processor_tuple,aux_processor_tuples,OE_raw_path)