# Trial formatting and LFP extraction

Brief 1-2 sentence description of notebook.

In [1]:
import os
import glob
from collections import Counter

In [2]:
# Imports of all used packages and libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import h5py
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter
import matplotlib.cm as cm


In [3]:
import spikeinterface.extractors as se
import spikeinterface.preprocessing as sp
from spectral_connectivity import Multitaper, Connectivity

In [4]:
sns.set('notebook', 'ticks', font_scale=1.2)
mpl.rcParams['figure.figsize'] = [15,6]

## Inputs & Data

Explanation of each input and where it comes from.

In [5]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
THORAX_INDEX = 1
CHANNEL_MAPPING_DF = pd.read_excel("./channel_mapping.xlsx")
TONE_TIMESTAMP_DF = pd.read_excel("./rce_tone_timestamp.xlsx", index_col=0)
VIDEO_TO_FRAME_AND_SUBJECT_DF = pd.read_excel("./video_to_frame_and_subject.xlsx")
SLEAP_DIR = "/scratch/back_up/reward_competition_extention/proc/id_corrected"
OUTPUT_DIR = r"./proc" # where data is saved should always be shown in the inputs
MED_PC_WIDTH = 29.5
MED_PC_HEIGHT = 24
FRAME_RATE = 22
VELOCITY_WINDOW_SIZE = FRAME_RATE
ROLLING_AVERAGE_WINDOW_SIZE = FRAME_RATE // 2
TRIAL_DURATION = 10

In [6]:
ALL_SESSION_DIR = list(set(['/scratch/back_up/reward_competition_extention/data/omission/2023_06_17/20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2.rec',
'/scratch/back_up/reward_competition_extention/data/omission/2023_06_18/20230618_100636_standard_comp_to_omission_D2_subj_1-4_and_1-1.rec',
'/scratch/back_up/reward_competition_extention/data/omission/2023_06_19/20230619_115321_standard_comp_to_omission_D3_subj_1-2_and_1-4.rec',
'/scratch/back_up/reward_competition_extention/data/omission/2023_06_20/20230620_114347_standard_comp_to_omission_D4_subj_1-2_and_1-1.rec',
'/scratch/back_up/reward_competition_extention/data/omission/2023_06_21/20230621_111240_standard_comp_to_omission_D5_subj_1-4_and_1-2.rec'
                   ]))

In [7]:
EPHYS_SAMPLING_RATE = 20000
LFP_SAMPLING_RATE = 1000

## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

In [8]:
# As much code and as many cells as required
# includes EDA and playing with data
# GO HAM!

# Ideally functions are defined here first and then data is processed using the functions

# function names are short and in snake case all lowercase
# a function name should be unique but does not have to describe the function
# doc strings describe functions not function names


In [9]:
def get_sleap_tracks_from_h5(filename):
    """
    Retrieve pose tracking data (tracks) from a SLEAP-generated h5 file.
    
    This function is intended for use with Pandas' apply method on columns containing filenames.
    
    Parameters:
    ----------
    filename : str
        Path to the SLEAP h5 file containing pose tracking data.
        
    Returns:
    -------
    np.ndarray
        A transposed version of the 'tracks' dataset in the provided h5 file.
        
    Example:
    --------
    df['tracks'] = df['filename_column'].apply(get_sleap_tracks_from_h5)
    
    """
    with h5py.File(filename, "r") as f:
        return f["tracks"][:].T

In [10]:
def get_sleap_track_names_from_h5(filename):
    """
    Retrieve the names of tracked features from a SLEAP-generated h5 file.
    
    This function is intended for use with Pandas' apply method on columns containing filenames.
    
    Parameters:
    ----------
    filename : str
        Path to the SLEAP h5 file containing pose tracking data.
        
    Returns:
    -------
    h5py.Dataset
        The 'track_names' dataset in the provided h5 file, representing the names of the tracked features.
        
    Example:
    --------
    df['track_names'] = df['filename_column'].apply(get_sleap_track_names_from_h5)
    
    """
    with h5py.File(filename, "r") as f:
        return [item.tobytes().decode('utf-8') for item in f["track_names"][:]]


In [11]:
def get_node_names_from_sleap(filename):
    """
    Retrieve node names from a SLEAP h5 file.

    Parameters:
    - filename (str): Path to the SLEAP h5 file.

    Returns:
    - list of str: List of node names.
    """
    with h5py.File(filename, "r") as f:
        return [n.decode() for n in f["node_names"][:]]

In [12]:

def fill_missing(Y, kind="linear"):
    """Fills missing values independently along each dimension after the first."""

    # Store initial shape.
    initial_shape = Y.shape

    # Flatten after first dim.
    Y = Y.reshape((initial_shape[0], -1))

    # Interpolate along each slice.
    for i in range(Y.shape[-1]):
        y = Y[:, i]

        # Build interpolant.
        x = np.flatnonzero(~np.isnan(y))
        f = interp1d(x, y[x], kind=kind, fill_value=np.nan, bounds_error=False)

        # Fill missing
        xq = np.flatnonzero(np.isnan(y))
        y[xq] = f(xq)
        
        # Fill leading or trailing NaNs with the nearest non-NaN values
        mask = np.isnan(y)
        y[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), y[~mask])

        # Save slice
        Y[:, i] = y

    # Restore to initial shape.
    Y = Y.reshape(initial_shape)

    return Y

In [13]:
def compute_velocity(node_loc, window_size=25, polynomial_order=3):
    """
    Calculate the velocity of tracked nodes from pose data.
    
    The function utilizes the Savitzky-Golay filter to smooth the data and compute the velocity.
    
    Parameters:
    ----------
    node_loc : numpy.ndarray
        The location of nodes, represented as an array of shape [frames, 2]. 
        Each row represents x and y coordinates for a particular frame.
        
    window_size : int, optional
        The size of the window used for the Savitzky-Golay filter. 
        Represents the number of consecutive data points used when smoothing the data.
        Default is 25.
        
    polynomial_order : int, optional
        The order of the polynomial fit to the data within the Savitzky-Golay filter window.
        Default is 3.

    Returns:
    -------
    numpy.ndarray
        The velocity for each frame, calculated from the smoothed x and y coordinates.
    
    """
    node_loc_vel = np.zeros_like(node_loc)
    
    # For each coordinate (x and y), smooth the data and calculate the derivative (velocity)
    for c in range(node_loc.shape[-1]):
        node_loc_vel[:, c] = savgol_filter(node_loc[:, c], window_size, polynomial_order, deriv=1)
    
    # Calculate the magnitude of the velocity vectors for each frame
    node_vel = np.linalg.norm(node_loc_vel, axis=1)

    return node_vel

In [14]:
def extract_sleap_data(filename):
    """
    Extracts coordinates, names of body parts, and track names from a SLEAP file.
    
    Parameters:
    - filename (str): Path to the SLEAP file.
    
    Returns:
    - tuple: A tuple containing the following elements:
        * locations (numpy.ndarray): Array containing the coordinates.
        * node_names (list of str): List of body part names.
        * track_names (list of str): List of track names.
    
    Example:
    >>> locations, node_names, track_names = extract_sleap_data("path/to/sleap/file.h5")
    """
    result = {}
    with h5py.File(filename, "r") as f:
        result["locations"] = f["tracks"][:].T
        result["node_names"] = [n.decode() for n in f["node_names"][:]]
        result["track_names"] = [n.decode() for n in f["track_names"][:]]

    return result

In [15]:
def rescale_dimension_in_array(arr, dimension=0, ratio=1):
    """
    Rescale values of a specified dimension in a 3D numpy array for the entire array.
    
    Parameters:
    - arr (numpy.ndarray): A 3D numpy array where the third dimension is being rescaled.
    - dimension (int, default=0): Specifies which dimension (0 or 1) of the third 
                                  dimension in the array should be rescaled. 
                                  For instance, in many contexts:
                                  0 represents the x-coordinate, 
                                  1 represents the y-coordinate.
    - ratio (float, default=1): The scaling factor to be applied.
    
    Returns:
    - numpy.ndarray: The rescaled array.
    """
    
    arr[:,:,dimension] *= ratio
    return arr

In [16]:
def rolling_average(arr, window_size):
    """
    Computes the rolling average using a specified window size.
    
    Parameters:
        arr (numpy.array): The input array to compute the rolling average for.
        window_size (int): The size of the rolling window.

    Returns:
        numpy.array: The rolling average of the input array.
    """
    if window_size < 1:
       raise ValueError("Window size must be at least 1.")
    
    # Create a uniform window of given window size
    window = np.ones(window_size) / window_size

    # Use numpy's convolve function to compute the rolling average
    return np.convolve(arr, window, mode='valid')



In [17]:
def chunked_average(arr, chunk_size):
    """
    Computes the average for non-overlapping chunks of the input array.
    
    Parameters:
        arr (numpy.array): The input array.
        chunk_size (int): The size of each chunk.

    Returns:
        numpy.array: The averages of the non-overlapping chunks.
    """

    # Number of chunks
    num_chunks = len(arr) // chunk_size
    
    # Reshape the array into a 2D array of shape (num_chunks, chunk_size)
    reshaped_arr = arr[:num_chunks * chunk_size].reshape(num_chunks, chunk_size)
    
    # Compute the mean along the second axis (i.e., for each chunk)
    return reshaped_arr.mean(axis=1)

In [18]:
def compute_sorted_index(group, value_column='Value', index_column='SortedIndex'):
    """ 
    Computes the index of each row's value within its sorted group.

    Parameters:
    - group (pd.DataFrame): A group of data.
    - value_column (str): Name of the column containing the values to be sorted.
    - index_column (str): Name of the new column that will contain the indices.

    Returns:
    - pd.DataFrame: The group with an additional column containing the indices.
    """
    sorted_values = sorted(list(set(group[value_column].tolist())))
    group[index_column] = group[value_column].apply(lambda x: sorted_values.index(x))
    return group

# Loading in trial information

- Dropping all rows that have not been labeled

In [19]:
all_trials_df = TONE_TIMESTAMP_DF.dropna(subset="condition").reset_index(drop=True)

In [20]:
sorted(all_trials_df["recording_dir"].unique())

['20221202_134600_omission_and_competition_subject_6_1_and_6_2',
 '20221203_154800_omission_and_competition_subject_6_4_and_6_1',
 '20221214_125409_om_and_comp_6_1_and_6_3',
 '20221215_145401_comp_amd_om_6_1_and_6_3',
 '20230612_101430_standard_comp_to_training_D1_subj_1-4_and_1-3',
 '20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2',
 '20230618_100636_standard_comp_to_omission_D2_subj_1-4_and_1-1',
 '20230619_115321_standard_comp_to_omission_D3_subj_1-2_and_1-4',
 '20230620_114347_standard_comp_to_omission_D4_subj_1-2_and_1-1',
 '20230621_111240_standard_comp_to_omission_D5_subj_1-4_and_1-2']

- Making the video frame number usable

In [21]:
all_trials_df["video_frame"] = all_trials_df["video_frame"].astype(int)

- Getting the name of the video so that we can sync it up with the ephys recording

In [22]:
all_trials_df["video_name"]  = all_trials_df["video_file"].apply(lambda x: x.strip(".videoTimeStamps.cameraHWSync"))

- Getting all subject IDs for a given recording

In [23]:
# using different id extractions for different file formats
all_trials_df["all_subjects"] = all_trials_df["recording_dir"].apply(lambda x: x if "2023" in x else "subj" + "_".join(x.split("_")[-5:]))
all_trials_df["all_subjects"] = all_trials_df["all_subjects"].apply(lambda x: tuple(sorted([num.strip("_").replace("_",".") for num in x.replace("-", "_").split("subj")[-1].strip("_").split("and")])))

In [24]:
all_trials_df["all_subjects"].unique()

array([('6.1', '6.2'), ('6.1', '6.4'), ('6.1', '6.3'), ('1.3', '1.4'),
       ('1.1', '1.2'), ('1.1', '1.4'), ('1.2', '1.4')], dtype=object)

In [25]:
all_trials_df["current_subject"] = all_trials_df["subject_info"].apply(lambda x: ".".join(x.replace("-","_").split("_")[:2])).astype(str)

In [26]:
all_trials_df["current_subject"].unique()

array(['6.1', '1.3', '1.4', '1.1', '1.2'], dtype=object)

- Converting the trial label to win or lose based on who won the trial

In [27]:
all_trials_df["trial_outcome"] = all_trials_df.apply(
    lambda x: "win" if str(x["condition"]).strip() == str(x["current_subject"]) 
             else ("lose" if str(x["condition"]) in x["all_subjects"] 
                   else x["condition"]), axis=1)

In [28]:
all_trials_df["trial_outcome"].unique()

array(['rewarded', 'omission', 'win', 'lose'], dtype=object)

- Adding the competition closeness as a column

In [29]:
competition_closeness_map = {k: "non_comp" if "only" in str(k).lower() else "comp" if type(k) is str else np.nan for k in all_trials_df["competition_closeness"].unique()}

In [30]:
competition_closeness_map

{nan: nan,
 'Subj 1 Only': 'non_comp',
 'Subj 2 blocking Subj 1': 'comp',
 'Subj 1 then Subj 2': 'comp',
 'Subj 1 blocking Subj 2': 'comp',
 'Subj 2 Only': 'non_comp',
 'Subj 2 then Subj 1': 'comp',
 'Close Call': 'comp'}

In [31]:
all_trials_df["competition_closeness"] = all_trials_df["competition_closeness"].map(competition_closeness_map)

In [32]:
all_trials_df["competition_closeness"] = all_trials_df.apply(lambda x: "_".join([str(x["trial_outcome"]), str(x["competition_closeness"])]).strip("nan").strip("_"), axis=1)

In [33]:
all_trials_df["competition_closeness"].unique()

array(['rewarded', 'omission', 'win_non_comp', 'win_comp',
       'lose_non_comp', 'lose_comp'], dtype=object)

- Adding the LFP index

In [34]:
all_trials_df["lfp_index"] = (all_trials_df["time_stamp_index"] // (EPHYS_SAMPLING_RATE/LFP_SAMPLING_RATE)).astype(int)

In [35]:
all_trials_df["time"] = all_trials_df["time"].astype(int)

In [36]:
all_trials_df["time_stamp_index"] = all_trials_df["time_stamp_index"].astype(int)

- Removing unnecessary columns

In [37]:
all_trials_df = all_trials_df.drop(columns=["state", "din", "condition", "Unnamed: 13"], errors="ignore")

In [38]:
all_trials_df.head()

Unnamed: 0,time,recording_dir,recording_file,time_stamp_index,video_file,video_frame,video_number,subject_info,competition_closeness,video_name,all_subjects,current_subject,trial_outcome,lfp_index
0,6310663,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,1390826,20221202_134600_omission_and_competition_subje...,1734,1.0,6_1_top_2_base_3,rewarded,20221202_134600_omission_and_competition_subje...,"(6.1, 6.2)",6.1,rewarded,69541
1,7910662,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,2990825,20221202_134600_omission_and_competition_subje...,3728,1.0,6_1_top_2_base_3,rewarded,20221202_134600_omission_and_competition_subje...,"(6.1, 6.2)",6.1,rewarded,149541
2,9710660,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,4790823,20221202_134600_omission_and_competition_subje...,5972,1.0,6_1_top_2_base_3,rewarded,20221202_134600_omission_and_competition_subje...,"(6.1, 6.2)",6.1,rewarded,239541
3,11310658,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,6390821,20221202_134600_omission_and_competition_subje...,7966,1.0,6_1_top_2_base_3,omission,20221202_134600_omission_and_competition_subje...,"(6.1, 6.2)",6.1,omission,319541
4,12810657,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,7890820,20221202_134600_omission_and_competition_subje...,9836,1.0,6_1_top_2_base_3,rewarded,20221202_134600_omission_and_competition_subje...,"(6.1, 6.2)",6.1,rewarded,394541


In [39]:
all_trials_df.to_csv("./proc/all_region_trial_metadata.csv")

### Extracting the LFP

In [40]:
recording_name_to_all_ch_lfp = {}
# Going through all the recording sessions 
for session_dir in ALL_SESSION_DIR:
    # Going through all the recordings in each session
    for recording_path in glob.glob(os.path.join(session_dir, "*.rec")):
        try:
            recording_basename = os.path.splitext(os.path.basename(recording_path))[0]
            # checking to see if the recording has an ECU component
            # if it doesn't, then the next one be extracted
            current_recording = se.read_spikegadgets(recording_path, stream_id="ECU")
            current_recording = se.read_spikegadgets(recording_path, stream_id="trodes")
            print(recording_basename)
            # Preprocessing the LFP
            current_recording = sp.bandpass_filter(current_recording, freq_min=0.5, freq_max=300)
            current_recording = sp.notch_filter(current_recording, freq=60)
            current_recording = sp.resample(current_recording, resample_rate=1000)
            current_recording = sp.zscore(current_recording)
            recording_name_to_all_ch_lfp[recording_basename] = current_recording
        except:
            pass



20230619_115321_standard_comp_to_omission_D3_subj_1-4_t3b3L_box2_merged
20230621_111240_standard_comp_to_omission_D5_subj_1-4_t3b3L_box1_merged
20230617_115521_standard_comp_to_omission_D1_subj_1-2_t2b2L_box2_merged
20230617_115521_standard_comp_to_omission_D1_subj_1-1_t1b3L_box1_merged
20230620_114347_standard_comp_to_omission_D4_subj_1-1_t1b2L_box_2_merged
20230620_114347_standard_comp_to_omission_D4_subj_1-2_t3b3L_box_1_merged
20230618_100636_standard_comp_to_omission_D2_subj_1_4_t4b3L_box1_merged
20230618_100636_standard_comp_to_omission_D2_subj_1_1_t1b2L_box2_merged


- Filtering for all trials that have labels

In [41]:
all_trials_df = all_trials_df[all_trials_df["recording_file"].isin(recording_name_to_all_ch_lfp.keys())].reset_index(drop=True)

In [42]:
all_trials_df.head()

Unnamed: 0,time,recording_dir,recording_file,time_stamp_index,video_file,video_frame,video_number,subject_info,competition_closeness,video_name,all_subjects,current_subject,trial_outcome,lfp_index
0,4509412,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,1030879,20230617_115521_standard_comp_to_omission_D1_s...,1029,1.0,1-1_t1b3L_box1,lose_comp,20230617_115521_standard_comp_to_omission_D1_s...,"(1.1, 1.2)",1.1,lose,51543
1,6909411,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,3430878,20230617_115521_standard_comp_to_omission_D1_s...,3425,1.0,1-1_t1b3L_box1,win_comp,20230617_115521_standard_comp_to_omission_D1_s...,"(1.1, 1.2)",1.1,win,171543
2,9209413,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,5730880,20230617_115521_standard_comp_to_omission_D1_s...,5720,1.0,1-1_t1b3L_box1,win_comp,20230617_115521_standard_comp_to_omission_D1_s...,"(1.1, 1.2)",1.1,win,286544
3,11009410,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,7530877,20230617_115521_standard_comp_to_omission_D1_s...,7516,1.0,1-1_t1b3L_box1,lose_non_comp,20230617_115521_standard_comp_to_omission_D1_s...,"(1.1, 1.2)",1.1,lose,376543
4,12109413,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,8630880,20230617_115521_standard_comp_to_omission_D1_s...,8615,1.0,1-1_t1b3L_box1,win_non_comp,20230617_115521_standard_comp_to_omission_D1_s...,"(1.1, 1.2)",1.1,win,431544


- Adding trial numbers based on timestamp ordering for each recording

In [43]:
all_trials_df = all_trials_df.groupby('recording_file').apply(lambda g: compute_sorted_index(g, value_column='time', index_column='trial_number')).reset_index(drop=True)

In [44]:
all_trials_df["trial_number"].unique()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39])

## Adding the LFP trace information

In [45]:
CHANNEL_MAPPING_DF = CHANNEL_MAPPING_DF[CHANNEL_MAPPING_DF["Cohort"] == 2]

In [46]:
CHANNEL_MAPPING_DF

Unnamed: 0,Cohort,Subject,eib_mPFC,eib_vHPC,eib_BLA,eib_LH,eib_MD,spike_interface_mPFC,spike_interface_vHPC,spike_interface_BLA,spike_interface_LH,spike_interface_MD
4,2,1.1,,16,17,18,19,5.0,31.0,30.0,29.0,28.0
5,2,1.2,,31,30,29,28,10.0,31.0,30.0,29.0,28.0
6,2,1.3,,15,14,13,12,,,,,
7,2,1.4,,15,14,13,12,2.0,31.0,30.0,29.0,28.0


- Adding all the brain region to ch information

In [47]:
CHANNEL_MAPPING_DF["Subject"] = CHANNEL_MAPPING_DF["Subject"].astype(str)

In [48]:
channel_map_and_all_trials_df = all_trials_df.merge(CHANNEL_MAPPING_DF, left_on="current_subject", right_on="Subject", how="left")

In [49]:
channel_map_and_all_trials_df = channel_map_and_all_trials_df.drop(columns=[col for col in channel_map_and_all_trials_df.columns if "eib" in col], errors="ignore")

In [50]:
channel_map_and_all_trials_df = channel_map_and_all_trials_df.drop(columns=["Subject"], errors="ignore")

In [51]:
channel_map_and_all_trials_df.head()

Unnamed: 0,time,recording_dir,recording_file,time_stamp_index,video_file,video_frame,video_number,subject_info,competition_closeness,video_name,...,current_subject,trial_outcome,lfp_index,trial_number,Cohort,spike_interface_mPFC,spike_interface_vHPC,spike_interface_BLA,spike_interface_LH,spike_interface_MD
0,4509412,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,1030879,20230617_115521_standard_comp_to_omission_D1_s...,1029,1.0,1-1_t1b3L_box1,lose_comp,20230617_115521_standard_comp_to_omission_D1_s...,...,1.1,lose,51543,0,2,5.0,31.0,30.0,29.0,28.0
1,6909411,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,3430878,20230617_115521_standard_comp_to_omission_D1_s...,3425,1.0,1-1_t1b3L_box1,win_comp,20230617_115521_standard_comp_to_omission_D1_s...,...,1.1,win,171543,1,2,5.0,31.0,30.0,29.0,28.0
2,9209413,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,5730880,20230617_115521_standard_comp_to_omission_D1_s...,5720,1.0,1-1_t1b3L_box1,win_comp,20230617_115521_standard_comp_to_omission_D1_s...,...,1.1,win,286544,2,2,5.0,31.0,30.0,29.0,28.0
3,11009410,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,7530877,20230617_115521_standard_comp_to_omission_D1_s...,7516,1.0,1-1_t1b3L_box1,lose_non_comp,20230617_115521_standard_comp_to_omission_D1_s...,...,1.1,lose,376543,3,2,5.0,31.0,30.0,29.0,28.0
4,12109413,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,8630880,20230617_115521_standard_comp_to_omission_D1_s...,8615,1.0,1-1_t1b3L_box1,win_non_comp,20230617_115521_standard_comp_to_omission_D1_s...,...,1.1,win,431544,4,2,5.0,31.0,30.0,29.0,28.0


- Linking up all LFP calculations with all the trials

In [52]:
channel_map_and_all_trials_df["all_ch_lfp"] = channel_map_and_all_trials_df["recording_file"].map(recording_name_to_all_ch_lfp)

- Creating a new row for each brain region

In [53]:
brain_region_col = [col for col in CHANNEL_MAPPING_DF if "spike_interface" in col]

In [54]:
id_cols = [col for col in channel_map_and_all_trials_df.columns if col not in brain_region_col]

In [55]:
brain_region_col

['spike_interface_mPFC',
 'spike_interface_vHPC',
 'spike_interface_BLA',
 'spike_interface_LH',
 'spike_interface_MD']

In [56]:
for col in brain_region_col:
    channel_map_and_all_trials_df[col] = channel_map_and_all_trials_df[col].astype(int).astype(str)

In [57]:
channel_map_and_all_trials_df.columns

Index(['time', 'recording_dir', 'recording_file', 'time_stamp_index',
       'video_file', 'video_frame', 'video_number', 'subject_info',
       'competition_closeness', 'video_name', 'all_subjects',
       'current_subject', 'trial_outcome', 'lfp_index', 'trial_number',
       'Cohort', 'spike_interface_mPFC', 'spike_interface_vHPC',
       'spike_interface_BLA', 'spike_interface_LH', 'spike_interface_MD',
       'all_ch_lfp'],
      dtype='object')

In [58]:
channel_map_and_all_trials_df["baseline_lfp_timestamp_range"] = channel_map_and_all_trials_df["lfp_index"].apply(lambda x: (x - TRIAL_DURATION * LFP_SAMPLING_RATE, x))

In [59]:
channel_map_and_all_trials_df["trial_lfp_timestamp_range"] = channel_map_and_all_trials_df["lfp_index"].apply(lambda x: (x, x + TRIAL_DURATION * LFP_SAMPLING_RATE))

In [60]:
channel_map_and_all_trials_df["baseline_ephys_timestamp_range"] = channel_map_and_all_trials_df["time_stamp_index"].apply(lambda x: (x - TRIAL_DURATION * EPHYS_SAMPLING_RATE, x))

In [61]:
channel_map_and_all_trials_df["trial_ephys_timestamp_range"] = channel_map_and_all_trials_df["time_stamp_index"].apply(lambda x: (x, x + TRIAL_DURATION * EPHYS_SAMPLING_RATE))

In [62]:
channel_map_and_all_trials_df["baseline_videoframe_range"] = channel_map_and_all_trials_df["video_frame"].apply(lambda x: (x - TRIAL_DURATION * FRAME_RATE, x))

In [63]:
channel_map_and_all_trials_df["trial_videoframe_range"] = channel_map_and_all_trials_df["video_frame"].apply(lambda x: (x, x + TRIAL_DURATION * FRAME_RATE))

In [64]:
for col in brain_region_col:
    print(col)
    channel_map_and_all_trials_df["{}_baseline_lfp_trace".format(col.strip("spike_interface").strip("_"))] = channel_map_and_all_trials_df.apply(lambda row: row["all_ch_lfp"].get_traces(channel_ids=row[col], start_frame=row["baseline_lfp_timestamp_range"][0], end_frame=row["baseline_lfp_timestamp_range"][1]).T[0], axis=1)

    channel_map_and_all_trials_df["{}_trial_lfp_trace".format(col.strip("spike_interface").strip("_"))] = channel_map_and_all_trials_df.apply(lambda row: row["all_ch_lfp"].get_traces(channel_ids=row[col], start_frame=row["trial_lfp_timestamp_range"][0], end_frame=row["trial_lfp_timestamp_range"][1]).T[0], axis=1)


spike_interface_mPFC
spike_interface_vHPC
spike_interface_BLA
spike_interface_LH
spike_interface_MD


In [65]:
channel_map_and_all_trials_df.to_pickle("./proc/lfp_traces_with_trial_info.pkl")

In [67]:
raise ValueError()

ValueError: 

# OLD code below for melting the dataframe into multiple rows

In [None]:
melted_channel_map_and_all_trials_df = channel_map_and_all_trials_df.melt(id_vars=id_cols ,value_vars=brain_region_col, var_name='brain_region', value_name='channel')


In [None]:
melted_channel_map_and_all_trials_df.head()

In [None]:
melted_channel_map_and_all_trials_df.tail()

In [None]:
melted_channel_map_and_all_trials_df.columns

In [None]:
TRIAL_DURATION * LFP_SAMPLING_RATE

In [None]:
melted_channel_map_and_all_trials_df["baseline_lfp_timestamp_range"] = melted_channel_map_and_all_trials_df["lfp_index"].apply(lambda x: (x - TRIAL_DURATION * LFP_SAMPLING_RATE, x))

In [None]:
melted_channel_map_and_all_trials_df["trial_lfp_timestamp_range"] = melted_channel_map_and_all_trials_df["lfp_index"].apply(lambda x: (x, x + TRIAL_DURATION * LFP_SAMPLING_RATE))

In [None]:
melted_channel_map_and_all_trials_df.head()

- Getting the traces for each trial for each brain region

In [None]:
melted_channel_map_and_all_trials_df["channel"] = melted_channel_map_and_all_trials_df["channel"].astype(int).astype(str)


In [None]:
melted_channel_map_and_all_trials_df.drop(columns=["all_ch_lfp"], errors="ignore").to_csv("./proc/per_brain_region_trial_metadata.csv")

In [None]:
melted_channel_map_and_all_trials_df["baseline_lfp_trace"] = melted_channel_map_and_all_trials_df.apply(lambda row: 
row["all_ch_lfp"].get_traces(channel_ids=[row["channel"]], start_frame=row["baseline_lfp_timestamp_range"][0], end_frame=row["baseline_lfp_timestamp_range"][1]).T[0], axis=1)

In [None]:
melted_channel_map_and_all_trials_df["trial_lfp_trace"] = melted_channel_map_and_all_trials_df.apply(lambda row: 
row["all_ch_lfp"].get_traces(channel_ids=[row["channel"]], start_frame=row["trial_lfp_timestamp_range"][0], end_frame=row["trial_lfp_timestamp_range"][1]).T[0], axis=1)

In [None]:
melted_channel_map_and_all_trials_df.to_pickle("./proc/lfp_traces_with_trial_info.pkl")