Test code for the code that loops through the probelfp.

In [2]:
# Allen Visual Behaviour SWR Collection Script
# Called by a bash script
# Produces SWR events at a 2.5 zscore threshold
# also produces time series lsiting gamma band events (minimum 0.015 s long) and motion artifacts on a channel outside brain
# TO DO:  Interpolate between all the channels some how and merge the lfp signals from ca1 into a big 6 by samples array to run the detector on.

# Technical details of this dataset are located here: https://brainmapportal-live-4cc80a57cd6e400d854-f7fdcae.divio-media.net/filer_public/f7/06/f706855a-a3a1-4a3a-a6b0-3502ad64680f/visualbehaviorneuropixels_technicalwhitepaper.pdf
# IF that link does not work this url can also work: https://portal.brain-map.org/explore/circuits/visual-behavior-neuropixels 

"""
Errors: 
in session 746083955
 line 337, in <module>
    channel_outside_hp = np.random.choice(channel_outside_hp)
  File "numpy/random/mtrand.pyx", line 950, in numpy.random.mtrand.RandomState.choice
ValueError: 'a' cannot be empty unless no samples are taken

"""



"""
    This code calls the Neuropixels LFP data from the visual behaviour
    datasets and loops through it storing channels used as well as
    the.
    
    The methods are based on a combination of methods taken from De Filliopo et al., (2022), Nitzan et al., (2020) which were both used on the ABA and IBL datasets for 
    detecting ripples while the mouse was still.  Methods for improving detectors so that putative ripples occuring during movement could be detected are based on 
    criteria from Frank and Chen (2008) as well as
"""



# change this as needed:
sdk_cache_dir='/space/scratch/allen_visbehave_data'# path to where the cache for the allensdk is (wehre the lfp is going)
output_dir = '/space/scratch'
swr_output_dir = 'allen_visbehave_swr_data' # directory specifying the 
select_these_sessions = []
# example input
select_these_sessions = [715093703, 719161530, 721123822]
#select_these_sessions = [715093703]
#select_these_sessions = [746083955] # the first screwed up when selecting a probe.
#dont_wipe_these_sessions = [715093703, 719161530]
dont_wipe_these_sessions = []

# libraries
import os
import subprocess 
import numpy as np
import pandas as pd
from scipy import io, signal
#from fitter import Fitter, get_common_distributions, get_distributions
import scipy.ndimage
from scipy.ndimage import gaussian_filter
from scipy.ndimage import gaussian_filter1d
import matplotlib.pyplot as plt
# for ripple detection
import ripple_detection
import ripple_detection.simulate as ripsim # for making our time vectors
from scipy import signal
# %%
#import KernelRegDraft as kreg # custom module, not needed
#import 'Stienmetz2019Reanalyzed/KernelRegDraft.py' as kreg
import piso #can be difficult to install, https://piso.readthedocs.io/en/latest/
from scipy.ndimage import gaussian_filter
from scipy.ndimage import gaussian_filter1d
from scipy import stats
from tqdm import tqdm
from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache


# functions

# subprocess is a default module
def call_bash_function(bash_command = ""):
    #example bash comand:
    #bash_command = "source /path/to/your/bash_script.sh && your_bash_function"
    process = subprocess.Popen(bash_command, stdout=subprocess.PIPE, shell=True)
    output, error = process.communicate()

    if process.returncode == 0:
        print("Bash function executed successfully.")
        print("Output:", output.decode('utf-8'))
    else:
        print("Error:", error.decode('utf-8'))

# Assuming you have your signal_array, b, and a defined as before
def finitimpresp_filter_for_LFP(LFP_array, samplingfreq, lowcut = 1, highcut = 250,
                    filter_order = 101):
    
    nyquist = 0.5 * samplingfreq

    # Design the FIR bandpass filter using scipy.signal.firwin
    fir_coeff = signal.firwin(filter_order, [lowcut / nyquist, highcut / nyquist],
                              pass_zero=False, fs=samplingfreq)

    # Apply the FIR filter to your signal_array
    #filtered_signal = signal.convolve(LFP_array, fir_coeff, mode='same', method='auto')
    filtered_signal = signal.lfilter(fir_coeff, 1.0, LFP_array, axis=0)
    return(filtered_signal)


def event_boundary_detector(time, five_to_fourty_band_power_df, envelope=True, minimum_duration = 0.02, maximum_duration = 0.4,
                       threshold_sd=2.5, envelope_threshold_sd=1):
    """
    Power threshold event detector, includes an envelope as well if wanted
    
    Originally for detecting sharp waves in the striatum radiatum, takes in power signal from 
    
    From Fernández-Ruiz, A., Oliva, A., Fermino de Oliveira, E., Rocha-Almeida, F., Tingley, D., 
    & Buzsáki, G. (2019). Long-duration hippocampal sharp wave ripples improve memory. Science, 364(6445), 1082-1086.
    
    
    Sharp waves were detected separately using LFP from a CA1 str. radiatum channel, filtered with band-pass filter boundaries
   (5-40 Hz). LFP events of a minimum duration of 20 ms and maximum 400 ms exceeding 2.5 SD of the
   background signal were included as candidate SPWs. Only if a SPW was simultaneously detected with
   a ripple, a CA1 SPW-R event was retained for further analysis. SPW-R bursts were classified when more
   than one event was detected in a 400 ms time window.
    
    """
    
    # make df to fill
    row_of_info =  {
        'start_time': [],
        'end_time': [],
        'duration': [],
        }

    #sharp_wave_events_df = pd.DataFrame()
    #scored_wave_power = stats.zscore(five_to_fourty_band_df)
    
    # compute our power threshold
    #wave_band_sd_thresh = np.std(five_to_fourty_band_df)*threshold_sd
    five_to_fourty_band_power_df = stats.zscore(five_to_fourty_band_power_df)
    past_thresh = five_to_fourty_band_power_df>=threshold_sd
    
    # now we expand the sections that are past thresh up to the points that 
    # are past the envelope thresh, so not all sections above envelope thresh are true
    # but those sections which alse contain a region past the detection threshold are included
    def expand_sections(z_scores, boolean_array, thresh):
        # Find indices where boolean_array is True
        true_indices = np.where(boolean_array)[0]

        # Initialize an array to keep track of expanded sections
        expanded_sections = np.zeros_like(z_scores, dtype=bool)

        # Iterate over true_indices and expand sections
        for index in true_indices:
            # Find the start and end of the current section
            start = index
            end = index

            # Expand section to the left (while meeting conditions)
            while start > 0 and z_scores[start - 1] >  thresh:
                start -= 1

            # Expand section to the right (while meeting conditions)
            while end < len(z_scores) - 1 and z_scores[end + 1] >  thresh:
                end += 1

            # Check if the expanded section contains a point above envelope_threshold_sd in z_scores
            if any(z_scores[start:end + 1] >  thresh):
                expanded_sections[start:end + 1] = True

        # Update the boolean_array based on expanded_sections
        boolean_array = boolean_array | expanded_sections

        return boolean_array
    
    if envelope==True:
        past_thresh = expand_sections(z_scores=five_to_fourty_band_power_df,
                                  boolean_array= past_thresh,
                                  thresh = envelope_threshold_sd)
    
    
    # Find the indices where consecutive True values start
    starts = np.where(past_thresh & ~np.roll(past_thresh, 1))[0]
    row_of_info['start_time'] = time[starts]
    # Find the indices where consecutive True values end
    ends = np.where(past_thresh & ~np.roll(past_thresh, -1))[0]
    row_of_info['end_time'] = time[ends]
    
    row_of_info['duration'] = [row_of_info['end_time'][i]-row_of_info['start_time'][i] for i in range(0,len(row_of_info['start_time']))]
    
    #turn the dictionary into adataframe
    sharp_wave_events_df = pd.DataFrame(row_of_info)
    
    # filter for the duration range we want
    in_duration_range = (sharp_wave_events_df.duration>minimum_duration)&(sharp_wave_events_df.duration<maximum_duration)
    sharp_wave_events_df = sharp_wave_events_df[in_duration_range]
    
    return sharp_wave_events_df

def event_boundary_times(time, past_thresh):
    """
    finds the times of a vector of true statements and returns values from another
    array representing the times
    """
    # Find the indices where consecutive True values start
    starts = np.where(past_thresh & ~np.roll(past_thresh, 1))[0]
    row_of_info['start_time'] = time[starts]
    # Find the indices where consecutive True values end
    ends = np.where(past_thresh & ~np.roll(past_thresh, -1))[0]
    row_of_info['end_time'] = time[ends]
    
    row_of_info['duration'] = [row_of_info['end_time'][i]-row_of_info['start_time'][i] for i in range(0,len(row_of_info['start_time']))]
    
    #turn the dictionary into adataframe
    events_df = pd.DataFrame(row_of_info)
      
    return events_df

def peaks_in_events(events, time_values, signal_values):
    # looks for the peask in the lfp signal, value of zscored raw lfp peak and returns time of peak
    signal_values_zscore = stats.zscore(signal_values)
    max_values = []
    max_lfp_zscore_values = []
    peak_times = []
    for start, end in zip(events['start_time'], events['end_time']):
        window_idx = (time_values >= start) & (time_values <= end)
        ripplesignal = signal_values[window_idx]
        ripple_lfp_zscore_signal = signal_values_zscore[window_idx]
        maxpoint = np.argmax(ripplesignal)
        max_values.append(ripplesignal[maxpoint])
        max_lfp_zscore_values.append(ripple_lfp_zscore_signal[maxpoint])
        rippletimepoints = time_values[window_idx]
        peak_times.append(rippletimepoints[maxpoint])
    return np.array(max_values), np.array(max_lfp_zscore_values),  np.array(peak_times)

# Setting up the ABI Cache
manifest_path = os.path.join(sdk_cache_dir, "manifest.json")

cache = EcephysProjectCache.from_warehouse(manifest=manifest_path)


# we start by calling and filtering our dataframe of the sessions we will be working with
sessions = cache.get_session_table()
if len(select_these_sessions)>0:
    sessions = sessions.loc[ sessions.index.intersection(select_these_sessions) ]

    
 
# Looping through the sessions specified
"""
    from tqdm import tqdm
    import time

    # Replace this with your actual loop or task
    for i in tqdm(range(10), desc="Processing", unit="iteration"):
        # Simulate some work
        time.sleep(0.1)

    print("Task completed!")
"""

# include a data frame that lists the sessions used, channels taken from each session and for what (movement vs ripples),
# and if there is behavioural data or lfp data

# Create main folder
swr_output_dir_path = os.path.join(output_dir, swr_output_dir)
os.makedirs(swr_output_dir, exist_ok=True)
sessions_without_ca1 = np.array([])

for seshnum in tqdm(range(0, sessions.shape[0]), desc="Processing", unit="iteration"):
    session_id = sessions.index.values[seshnum]
    session = cache.get_session_data(session_id)
    print("Session id " + str(session_id))
    
    # check if this session even has CA1 channels in it, if not skip this iteration and add the name to the list
    sesh_has_ca1 = np.isin('CA1', list(session.channels.ecephys_structure_acronym.unique()))
    if not sesh_has_ca1:
        print("Session id " + str(session_id) + "Does not have CA1")
        sessions_without_ca1 = np.append(sessions_without_ca1, session_id)
        continue

    # Create subfolder for session, will contain all csvs for events detected and .npy of ca1 channels and control channels 
    session_subfolder = "swrs_session_" + str(session_id)
    session_subfolder = os.path.join(swr_output_dir_path, session_subfolder)
    os.makedirs(session_subfolder, exist_ok=True) 
    
    # get probes with CA1 recordings out of recording
    probe_id_list = list(session.channels.probe_id.unique())
    probes_of_interest = []

    # find probes which contain channels from CA1
    for probe_id in probe_id_list:
        has_ca1_and_exists = np.isin('CA1', list(session.channels[session.channels.probe_id == probe_id].ecephys_structure_acronym.unique()))
        has_ca1_and_exists = has_ca1_and_exists & session.probes.has_lfp_data[probe_id]
        if has_ca1_and_exists:
            probes_of_interest.append(probe_id)
    # create an arraey to be filled with channel ids fro ca1
    ca1_chans_arr = np.array([], dtype=int)
    used_channels_xarray_dict = {} # a list to put the lfp xarray objects into 
    
    # create an array to be filled with outside of brain controls
    outof_hp_chans_arr = np.array([], dtype=int)

    
    # get lfp for each probe
    for probe_id in probes_of_interest:    
        # pull or laod the lfp for this probe
        print("Probe id " + str(probe_id))
        lfp = session.get_lfp(probe_id)
        sampling_rate_this_probe = session.probes.lfp_sampling_rate[probe_id]

        print("Selecting CA1 channel...")
        # fetching channels in ca1 on this probe for this recording
        ca1_chans =session.channels.probe_channel_number[(session.channels.probe_id==probe_id)&(session.channels.ecephys_structure_acronym=='CA1')]
        ca1_idx = np.isin(lfp.channel.values, ca1_chans.index.values)
        ca1_idx = lfp.channel.values[ca1_idx]
        
        # select ca1 channels 
        lfp_ca1  = lfp.sel(channel=ca1_idx)
        lfp_ca1  = lfp_ca1.to_pandas()
        
        # get the timestamps for this lfp recording
        lfp_time_index = lfp_ca1.index.values 

        #identify channel on probe with highest ripple power
        lfp_ca1_ripppleband = finitimpresp_filter_for_LFP(lfp_ca1, samplingfreq = sampling_rate_this_probe,  lowcut = 120, highcut = 250)
        highest_rip_power = lfp_ca1_ripppleband.max(axis=0)
        
        # store channel identity in ca1_chans_arr and pull it for analysis of that channel
        this_chan_id = int(lfp_ca1.columns[highest_rip_power.argmax()])
        
        # ideally we would store the channels for later use, but each lfp has it's own time and sampling rate that it goes through
        #used_channels_xarray_dict[this_chan_id] = lfp.channel.values[this_chan_id]
        ca1_chans_arr = np.append(ca1_chans_arr, this_chan_id)
        peakrippleband = lfp_ca1_ripppleband[:,highest_rip_power.argmax()]
        # make fake speed variable, we can use this for now and fix it later              
        dummy_speed = np.zeros_like(peakrippleband)
        print("Detecting Putative Ripples")
        # we add a dimension to peakrippleband because the ripple detector needs it
        Karlsson_ripple_times = ripple_detection.Karlsson_ripple_detector(
            time = lfp_time_index, 
            zscore_threshold=2.5,
            filtered_lfps = peakrippleband[:,None], 
            speed = dummy_speed, 
            sampling_frequency = sampling_rate_this_probe
        )
        # there is no need for this criteria (Karlsson_ripple_times.duration>0.015)&(Karlsson_ripple_times.duration<0.25)
        # because they are already filtered for minimum duration
        # but we need to do it for maximum duration
        Karlsson_ripple_times = Karlsson_ripple_times[Karlsson_ripple_times.duration<0.25]
        print("Done")
        # adds some stuff we want to the file
        Karlsson_ripple_times['Peak_Amplitude'], Karlsson_ripple_times['Peak_Amplitude_lfpzscore'],  Karlsson_ripple_times['Peak_time'] = peaks_in_events(events=Karlsson_ripple_times, 
                                                                                                                           time_values=lfp_time_index, 
                                                                                                                           signal_values=lfp_ca1.to_numpy()[:,highest_rip_power.argmax()])
        
        csv_filename = f"probe_{probe_id}_channel_{this_chan_id}_karlsson_detector_events.csv"
        csv_path = os.path.join(session_subfolder, csv_filename)
        Karlsson_ripple_times.to_csv(csv_path, index=True)
        print("Writing to file.")
        print("Detecting gamma events.")
        # gamma power
        # compute this later, I will have a seperate script called SWR filtering which will do this
        gamma_band = finitimpresp_filter_for_LFP(lfp_ca1[this_chan_id], samplingfreq =  sampling_rate_this_probe, lowcut = 20, highcut = 80)
        gamma_power = np.abs(signal.hilbert(gamma_band))**2
        gamma_times = event_boundary_detector(time = lfp_time_index, threshold_sd = 3, envelope=False, 
                                      minimum_duration = 0.015, maximum_duration = float('inf'),
                                  five_to_fourty_band_power_df = gamma_band)
        print("Done")
        csv_filename = f"probe_{probe_id}_channel_{this_chan_id}_gamma_band_events.csv"
        csv_path = os.path.join(session_subfolder, csv_filename)
        gamma_times.to_csv(csv_path, index=True)
        print("Writing to file.")
        print("Selecting reference channel for movement artifact filtering.")
        # movement artifact detector channel (SWR bandpass and power, then z-score)
        # control_region_idx = session.channels.ecephys_structure_acronym.isna()
        
        # as detailed in supplementry methods in Nitzan et al., (2022) on page 2 under Event Detection
        """"
        An additional ‘noise’ signal from a channel outside of the hippocampus was provided to exclude
        simultaneously occurring high frequency events. 
        """
        # So we will pick a channel from outside hippocampus to filter for high frequency events as well
        # if there is no non hippocampal non-cortical region 
        idx = session.channels.probe_id == probe_id
        organisedprobechans = session.channels[idx].sort_values(by='probe_vertical_position')
        organisedprobechans = organisedprobechans[np.isin(organisedprobechans.index.values, lfp.channel.values) ]

        # code for identifying first  and last ca1 channel, not used now but can be later to pick channels above or below ca1
        # first_ca1 = organisedprobechans.probe_vertical_position[organisedprobechans.ecephys_structure_acronym == 'CA1'].tolist()[-1]
        # last_ca1 = organisedprobechans.probe_vertical_position[organisedprobechans.ecephys_structure_acronym == 'CA1'].tolist()[0]
        
        not_a_ca1_chan = np.logical_not(np.isin(organisedprobechans.ecephys_structure_acronym,[ "CA3", "CA2", "CA1", "HPF", "EC", "DG"]))
        # Find the indices of the blocks of False i.e. the channels that are ca1
        take_two = np.random.choice(organisedprobechans.index[not_a_ca1_chan], 2, replace=False)
        outof_hp_chans_arr = np.append(outof_hp_chans_arr,take_two)
        
        # movement control
        for channel_outside_hp in take_two:
            movement_control_channel = lfp.sel(channel=channel_outside_hp)
            movement_control_channel = movement_control_channel.to_pandas()
            movement_control_channel = finitimpresp_filter_for_LFP(movement_control_channel,samplingfreq=sampling_rate_this_probe, lowcut = 120, highcut = 250)
            print("Detecting movement artifacts.")
            movement_controls = ripple_detection.Karlsson_ripple_detector(
                time = lfp_time_index, 
                filtered_lfps = movement_control_channel[:,None], 
                speed = dummy_speed, 
                zscore_threshold=2.5,
                sampling_frequency = sampling_rate_this_probe
            )
            print("Done")
            csv_filename = f"probe_{probe_id}_channel_{channel_outside_hp}_movement_artifacts.csv"
            csv_path = os.path.join(session_subfolder, csv_filename)
            movement_controls.to_csv(csv_path, index=True)
            print("Done")
        
        #write these two to a numpy array finish loop
        # write channel number and sessionid to a pandas array tracking where each channel came from
        # so at the end of the loop you can identify which channel it called
    
    # save channels used in the session to detect SWR events (also gamma events) and channels for movement artifact detection
    # Save numpy arrays to the file
    txt_np_arr_filename_swr = f"session_{session_id}_channel_{this_chan_id}_ca1_channels.txt"
    nptxt_path = os.path.join(session_subfolder, txt_np_arr_filename_swr)
    np.savetxt(nptxt_path, ca1_chans_arr, fmt='%d', delimiter=',')

    txt_np_arr_filename_move = f"session_{session_id}_channel_{this_chan_id}_outofbrain_channels.txt"
    nptxt_path = os.path.join(session_subfolder, txt_np_arr_filename_move)
    np.savetxt(nptxt_path, outof_hp_chans_arr, fmt='%d', delimiter=',')
    
    #loop over global channels
    # needs changing to all channels
    # this is difficult because the samples need to be interpolated between and matched
    """
    used_channels_xarray_dict[this_chan_id].append(lfp.channel.values[this_chan_id]).keys()
    this_chan_id + used_channels_xarray_dict
    
    Karlsson_ripple_times = ripple_detection.Karlsson_ripple_detector(
            time = lfp_time_index, 
            filtered_lfps = lfp.sel(channel=ca1_chans_arr), 
            speed = dummy_speed, 
            sampling_frequency = 1250.0)
    
    # save to 
    csv_filename = f"global_session_{session_id}_karlsson_detector_events.csv"
    csv_path = os.path.join(session_subfolder, csv_filename)
    Karlsson_ripple_times.to_csv(csv_path, index=True)
    """
    
    # removing files
    # replace path/to/directory with cache and session info for this loop
    if (session_id not in dont_wipe_these_sessions):
        remove_from_path_command = "find "+sdk_cache_dir+"/session_"+str(session_id)+" -type f -name '*lfp*' -exec rm {} +"
        call_bash_function(remove_from_path_command)

# saving the sessions that don't contain any channels in CA1
txt_np_arr_filename_move = "session_without_ca1.txt"
nptxt_path = os.path.join(swr_output_dir_path, txt_np_arr_filename_move)
np.savetxt(nptxt_path, outof_hp_chans_arr, fmt='%d', delimiter=',')


print("Done! Results in " + swr_output_dir_path)

Processing:   0%|                                                                          | 0/1 [00:00<?, ?iteration/s]

Session id 746083955
Probe id760647913
Selecting CA1 channel...
Detecting Putative Ripples
Done
Writing to file.
Detecting gamma events.


Processing:   0%|                                                                          | 0/1 [00:36<?, ?iteration/s]

Done
Writing to file.
Selecting reference channel for movement artifact filtering.





TypeError: unsupported operand type(s) for &: 'bool' and 'str'

In [15]:
idx = session.channels.probe_id == probe_id
organisedprobechans = session.channels[idx].sort_values(by='probe_vertical_position')
organisedprobechans = organisedprobechans[np.isin(organisedprobechans.index.values, lfp.channel.values) ]

first_ca1 = organisedprobechans.probe_vertical_position[organisedprobechans.ecephys_structure_acronym == 'CA1'].tolist()[-1]
last_ca1 = organisedprobechans.probe_vertical_position[organisedprobechans.ecephys_structure_acronym == 'CA1'].tolist()[0]
not_a_ca1_chan = np.logical_not(np.isin(organisedprobechans.ecephys_structure_acronym,[ "CA3", "CA2", "CA1", "HPF", "EC", "DG"]))
# Find the indices of the blocks of False i.e. the channels that are ca1
take_two = np.random.choice(organisedprobechans.index[not_a_ca1_chan], 2, replace=False)



Unnamed: 0_level_0,filtering,probe_channel_number,probe_horizontal_position,probe_id,probe_vertical_position,structure_acronym,ecephys_structure_id,ecephys_structure_acronym,anterior_posterior_ccf_coordinate,dorsal_ventral_ccf_coordinate,left_right_ccf_coordinate
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
849734700,AP band: 500 Hz high-pass; LFP band: 1000 Hz l...,6,59,760647913,80,VPM,733.0,VPM,7342.0,3838.0,7711.0
849734708,AP band: 500 Hz high-pass; LFP band: 1000 Hz l...,10,59,760647913,120,VPM,733.0,VPM,7350.0,3806.0,7734.0
849734716,AP band: 500 Hz high-pass; LFP band: 1000 Hz l...,14,59,760647913,160,VPM,733.0,VPM,7359.0,3765.0,7762.0
849734724,AP band: 500 Hz high-pass; LFP band: 1000 Hz l...,18,59,760647913,200,VPM,733.0,VPM,7367.0,3732.0,7786.0
849734732,AP band: 500 Hz high-pass; LFP band: 1000 Hz l...,22,59,760647913,240,VPM,733.0,VPM,7375.0,3699.0,7809.0
849734740,AP band: 500 Hz high-pass; LFP band: 1000 Hz l...,26,59,760647913,280,VPM,733.0,VPM,7382.0,3666.0,7832.0
849734748,AP band: 500 Hz high-pass; LFP band: 1000 Hz l...,30,59,760647913,320,VPM,733.0,VPM,7390.0,3632.0,7856.0
849734756,AP band: 500 Hz high-pass; LFP band: 1000 Hz l...,34,59,760647913,360,VPM,733.0,VPM,7397.0,3599.0,7879.0
849734764,AP band: 500 Hz high-pass; LFP band: 1000 Hz l...,38,59,760647913,400,TH,549.0,TH,7406.0,3557.0,7909.0
849734772,AP band: 500 Hz high-pass; LFP band: 1000 Hz l...,42,59,760647913,440,TH,549.0,TH,7413.0,3523.0,7932.0


In [54]:
first_ca1 = organisedprobechans.probe_vertical_position[organisedprobechans.ecephys_structure_acronym == 'CA1'].tolist()[-1]
last_ca1 = organisedprobechans.probe_vertical_position[organisedprobechans.ecephys_structure_acronym == 'CA1'].tolist()[0]
not_a_ca1_chan = np.logical_not(np.isin(organisedprobechans.ecephys_structure_acronym,[ "CA3", "CA2", "CA1", "HPF", "EC", "DG"]))
# Find the indices of the blocks of False i.e. the channels that are ca1
take_two = np.random.choice(organisedprobechans.index[not_a_ca1_chan], 2, replace=False)

array([849734876, 849735148])

In [41]:
not_a_ca1_chan = np.logical_not(np.isin(organisedprobechans.ecephys_structure_acronym,[ "CA3", "CA2", "CA1", "HPF", "EC", "DG"]))
not_a_ca1_chan[-1]&not_a_ca1_chan[-2]
not_a_ca1_chan.shape

(57,)

In [26]:
np.logical_not(np.isin(organisedprobechans.ecephys_structure_acronym,[ "CA3", "CA2", "CA1", "HPF", "EC", "DG"]))

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
        True,  True,  True])

In [22]:
organisedprobechans.probe_vertical_position[organisedprobechans.ecephys_structure_acronym == 'CA1'].tolist()[-1]

2240

In [19]:
organisedprobechans.index[np.logical_not(np.isin(organisedprobechans.ecephys_structure_acronym,[ "CA3", "CA2", "CA1", "HPF", "EC", "DG"]))]

Int64Index([849734700, 849734708, 849734716, 849734724, 849734732, 849734740,
            849734748, 849734756, 849734764, 849734772, 849734780, 849734788,
            849734796, 849734804, 849734812, 849734820, 849734828, 849734836,
            849734844, 849734852, 849734860, 849734868, 849734876, 849734884,
            849734892, 849735140, 849735148, 849735156],
           dtype='int64', name='id')

In [None]:
control_region_idx = session.channels.ecephys_structure_acronym.isna()
sum((session.channels.probe_id == probe_id)&(control_region_idx))

In [None]:
control_region_idx = session.channels.ecephys_structure_acronym.isna()
channeloutsidebrain = session.channels[(session.channels.probe_id == probe_id)&(control_region_idx)].index.values
print(channeloutsidebrain)
#channeloutsidebrain = lfp.channel.values[np.isin(lfp.channel.values, channeloutsidebrain)]
np.isin(lfp.channel.values, channeloutsidebrain)

In [None]:
testsesh = cache.get_session_data(732592105)
testsesh.channels.ecephys_structure_acronym.value_counts()

In [None]:
sessions = cache.get_session_table()

for seshnum in tqdm(range(0, sessions.shape[0]), desc="Processing", unit="iteration"):
    session_id = sessions.index.values[seshnum]
    session = cache.get_session_data(session_id)
    print("Session id" + str(session_id))
    
    # get probes with CA1 recordings out of recording
    probe_id_list = list(session.channels.probe_id.unique())
    probes_of_interest = []

    # find probes which contain channels from CA1
    for probe_id in probe_id_list:
        has_ca1 = np.isin('CA1', list(session.channels[session.channels.probe_id == probe_id].ecephys_structure_acronym.unique()))
        if has_ca1:
            probes_of_interest.append(probe_id)
    # create an arraey to be filled with channel ids fro ca1
    ca1_chans_arr = np.array([], dtype=int)
    used_channels_xarray_dict = {} # a list to put the lfp xarray objects into 
    
    # create an array to be filled with outside of brain controls
    outof_hp_chans_arr = np.array([], dtype=int)

    
    # get lfp for each probe
    for probe_id in probes_of_interest:    
        # pull or laod the lfp for this probe
        print("Probe id" + str(probe_id))
        #lfp = session.get_lfp(probe_id)
        print(session.channels.structure_acronym[session.channels.probe_id==probe_id].value_counts())

In [None]:
session.channels[session.channels.probe_id==idx]

In [None]:

probe_id_list = list(session.channels.probe_id.unique())
print(probe_id_list)
probes_of_interest = []

# find probes which contain channels from CA1
for probe_id in probe_id_list:
    has_ca1 = np.isin('CA1', list(session.channels[session.channels.probe_id == probe_id].ecephys_structure_acronym.unique()))
    if has_ca1:
        probes_of_interest.append(probe_id)
    
print(probes_of_interest)

In [None]:
session_id

In [None]:
idx = session.channels.probe_id.unique()[0]
session.channels[session.channels.probe_id==idx].sort_values(by='probe_vertical_position').to_csv('exampleprobe_by_depth.csv')

In [None]:
idx = session.channels.probe_id.unique()[0]
session.channels[(session.channels.probe_id==idx)&(session.channels.ecephys_structure_acronym=='CA1')].sort_values(by='probe_vertical_position')

In [None]:
channeloutsidebrain = session.channels[(session.channels.probe_id == probe_id)&(session.channels.ecephys_structure_acronym.isna())].index.values
print(channeloutsidebrain)
channeloutsidebrain = lfp.channel.values[np.isin(lfp.channel.values, channeloutsidebrain)]
print(channeloutsidebrain)
channeloutsidebrain = np.random.choice(channeloutsidebrain)
print(channeloutsidebrain)

In [None]:
sessions_full_list = cache.get_session_table()

In [None]:
sessions_full_list

In [None]:
sessions_full_list.session_type.value_counts()

In [None]:
#test_session = sessions_full_list[746083955]
test_session = session = cache.get_session_data(746083955)
test_session

In [None]:
test_session.probes.lfp_sampling_rate[test_session.probes.index.values[0]]

In [None]:
channeloutsidebrain = session.channels[(session.channels.probe_id == probe_id)&(session.channels.ecephys_structure_acronym.isna())].index.values
channeloutsidebrain = lfp.channel.values[np.isin(lfp.channel.values, channeloutsidebrain)]
channeloutsidebrain = np.random.choice(channeloutsidebrain)
outofbrain_chans_arr = np.append(outofbrain_chans_arr,channeloutsidebrain)
        
movement_control_channel = lfp.sel(channel=channeloutsidebrain)
movement_control_channel = movement_control_channel.to_pandas()

In [None]:
# Save the array to the specified file
# ca1_chans_arr
# outofbrain_chans_arr
txt_np_arr_filename_swr = f"session_{session_id}_channel_{this_chan_id}_ca1_channels.txt"
nptxt_path = os.path.join(session_subfolder, txt_np_arr_filename_swr)
np.savetxt(nptxt_path, ca1_chans_arr, fmt='%d', delimiter=',')

txt_np_arr_filename_move = f"session_{session_id}_channel_{this_chan_id}_outofbrain_channels.txt"
nptxt_path = os.path.join(session_subfolder, txt_np_arr_filename_move)
np.savetxt(nptxt_path, outofbrain_chans_arr, fmt='%d', delimiter=',')

In [None]:
#ca1_chans_arr = np.array([850264486, 850261534, 850260279, 850258884, 850257427])
movement_control_channel  = lfp.sel(channel=channeloutsidebrain)

In [None]:
#test = lfp.sel(channel=list(ca1_chans_arr))
type(lfp)
print(lfp.dims)
lfp
#lfp.sel(channel = [850264486, 850261534, 850260279, 850258884, 850257427])
np.isin([850264486, 850261534, 850260279, 850258884, 850257427], lfp.channel)


In [None]:
np.random.choice(session.channels[(session.channels.probe_id == probe_id)&(session.channels.ecephys_structure_acronym.isna())].index)

In [None]:
funk_session = cache.get_session_data(sessions.index[sessions.session_type=='functional_connectivity'][0])
funk_session

In [None]:
funk_session.running_speed
funk_session.get_pupil_data()

In [None]:
sessions.session_type.value_counts()

In [None]:
lfp_ca1  = lfp.sel(channel=ca1_idx)
lfp_ca1 = lfp_ca1.to_pandas()
lfp_ca1_ripppleband = finitimpresp_filter_for_LFP(lfp_ca1, lowcut = 120, highcut = 250)

In [None]:
list(session.probes.index)


list(session.channels[session.channels.probe_id == probe_id].ecephys_structure_acronym.unique())

In [None]:
{session.probes.loc[probe_id].description : 
     list(session.channels[session.channels.probe_id == probe_id].ecephys_structure_acronym.unique())
     for probe_id in session.probes.index.values}

In [None]:
probe_id_list = list(session.probes.index)
probes_of_interest = []
    
# find probes which contain channels from CA1
for probe_id in probe_id_list:
    has_ca1 = np.isin('CA1',list(session.channels[session.channels.probe_id == probe_id].ecephys_structure_acronym.unique()))
    if has_ca1:
        probes_of_interest.append(probe_id)
print(probes_of_interest)

In [None]:
test_arr = np.array([])
print(test_arr)
test_arr = np.append(test_arr, 123)
print(test_arr)
test_arr = np.append(test_arr, 456)
test_arr = np.append(test_arr, 789)
print(test_arr)

In [None]:
probe_id = session.probes[session.probes.description == 'probeE'].index.values[0]

lfp = session.get_lfp(probe_id)

lfp

In [None]:
for i in (range(0, 10))

In [None]:
# code for user input,
# for some reason matching the user_input to a string does not work
"""
def process_input(user_input):
    if user_input.lower() == 'all':
        value = True
    else:
        try:
            with open(user_input, 'r') as file:
                value = file.read().splitlines()
        except FileNotFoundError:
            print("File not found or invalid path.")
            value = None
    return value

def filter_sessions_based_on_user_input(sessions_df):
    # sessions is a dataframe called from allensdk cache data
    user_input = input("Enter 'all' or a path to a .txt file with session_ids in a list [seshnum1,seshnum2,...]: ")
    user_input = str(user_input)
    user_input = user_input.strip()  # Remove leading/trailing whitespace
    sessions_to_use = process_input(user_input)
    print(user_input)
    print(type(user_input))
    print(type('all'))
    print(user_input !='all')
    if (sessions_to_use!='all'):
        user_input = [int(x): for x in user_input]
        sessions_df = sessions_df.loc[ sessions_df.index.intersection(user_input) ]
    
    return sessions_df
        
sessions = filter_sessions_based_on_user_input(sessions)
"""

In [None]:
dont_wipe_these_sessions = [715093703, 719161530]

!np.isin(session_id, dont_wipe_these_sessions)

In [18]:

"""
def check_overlap(df1, df2):
    overlap_check = df1.apply(lambda row1: df2.apply(lambda row2: not (row1['end_time'] < row2['start_time'] or row1['start_time'] > row2['end_time']), axis=1), axis=1)
    overlap_df = pd.DataFrame(overlap_check, columns=['Overlap'])
    return overlap_df
"""

def events_within_intervals(events_a, events_b):
    """
    Finds events of type A that occur within any events of type B.

    Parameters:
    - events_a (pd.DataFrame): DataFrame containing events of type A with 'start_time' and 'end_time' columns.
    - events_b (pd.DataFrame): DataFrame containing events of type B with 'start_time' and 'end_time' columns.

    Returns:
    - pd.DataFrame: DataFrame containing events of type A that occur within any events of type B.
    """
    # Initialize an empty list to store events of type A within events of type B
    result = []

    # Iterate through each row of events of type A
    for index_a, event_a in events_a.iterrows():
        # Iterate through each row of events of type B
        for index_b, event_b in events_b.iterrows():
            # Check if the start_time and end_time of event A are within the start_time and end_time of event B
            if event_b['start_time'] <= event_a['start_time'] and event_a['end_time'] <= event_b['end_time']:
                # If true, append event A to the result list
                result.append(True)

    # Convert the result list to a DataFrame and return it
    return result


[True, True]

In [21]:

def check_overlap(df1, df2):
    result = []

    for i in range(len(df1)):
        start_time_1, end_time_1 = df1.loc[i, 'start_time'], df1.loc[i, 'end_time']
        overlap = any((start_time_2 < end_time_1 and end_time_2 > start_time_1) for start_time_2, end_time_2 in zip(df2['start_time'], df2['end_time']))
        result.append(overlap)

    return result

# Example usage:
data1 = {'start_time': [28800.0, 45000.0, 56700.0, 71000.0, 81000.0],
         'end_time': [36900.0, 46400.0, 64800.0, 71200.0, 85500.0]}

data2 = {'start_time': [36000.0, 48600.0, 60300.0, 72000.0, 81000.0, 90000.0, 97200.0],
         'end_time': [42300.0, 54900.0, 63900.0, 79200.0, 85500.0, 93600.0, 100800.0]}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)


print(check_overlap(df1, df2))
print(check_overlap(df2, df1))

[True, False, True, False, True]
[True, False, True, False, True, False, False]
