#Functions and script to interface with recorded Harp Binaries#

Where numbered binary files have been saved in Behavior.harp, and SoundCard.harp folders.
We will create a general reader for the harp behavior board binaries and another specifically for register 32 of the sound card.

In [171]:
# Import main libraries and define data folder
import numpy as np
import harp
import pandas as pd
from harp.model import Model, Register, Access
######################################################################################
data_folder = r".\2024-01-12T14-56-34"
######################################################################################

**First the Behavior Board streams which contain the pokes**

In [172]:
# Load the behavior board binary data and show the resulting dataframe
bin_b_path = data_folder + r"\Behavior.harp"

# Create reader for behavior.
behavior_reader = harp.create_reader(bin_b_path)

# Read the behavior harp stream, Digital Input states for the nosepoke timestamps and IDs. Drop DI3
all_pokes = behavior_reader.DigitalInputState.read()
all_pokes.drop(columns=['DI3'],inplace = True) 
all_pokes.reset_index(inplace=True)
# Show resulting dataframe
all_pokes

Unnamed: 0,Time,DIPort0,DIPort1,DIPort2
0,3787916000.0,True,False,False
1,3787916000.0,False,False,False


In [173]:
# Open our experimental csv file (from bonsai only stores the last Poke and Audio cue)
csv_path = data_folder + r"\Experimental-data\experimental-data.csv"
exp_csv = pd.read_csv(csv_path)
csv_out_path = data_folder + r"\Experimental-data\experimental-data-merge.csv"

# Read the start and end of each trial from this and grab the timestamps
fields = ['TrialStart','TrialEnd']
df_trials = exp_csv[fields]
df_trials

Unnamed: 0,TrialStart,TrialEnd
0,3787916000.0,3787916000.0


**Function to pull out events between trial timestamps**

In [174]:
# Function to parse pokes from behavior trial data using a dataframe containing the timestamps for the start and end of each trial from the experimental csv generated by Bonsai
def parse_trials_pokes(df_events,df_trials):

    # Create lists to store the poke IDs and timestamps for all trials
    PokeON_S, PokeOFF_S, PokeID_S = [], [], []

    # Iterate through trials (rows) and extract data from harp stream
    for index, trial in df_trials.iterrows():

        # Extract events that occur within the time range of this trial
        trial_events=df_events[(df_events.index >= trial.TrialStart) & (df_events.index <= trial.TrialEnd)]

        # Create trial lists for ll pokes this trial
        PokeON, PokeOFF, PokeID = [], [], []
        for _, poke in trial_events.iterrows():
            event_time = poke.index
            poke = poke[['DIPort0','DIPort1','DIPort2']]

            # find poke IDs from which column the timestamp is in. Only find ID for PokeOFFSET
            if poke.any():
                PokeON.append(event_time)
                true_column_index = int(poke.idxmax()[-1]) # find which port
            else:
                PokeOFF.append(event_time)
                PokeID.append(true_column_index) # should be safe unless the state of a nosepoke is already True at the start of a trial (shouldn't ever be true; ports should be initialised in low state)
        PokeON_S.append(PokeON)
        PokeOFF_S.append(PokeOFF)
        PokeID_S.append(PokeID)
    poke_df = pd.DataFrame({'NosepokeInTime': PokeON_S, 'NosepokeOutTime': PokeOFF_S, 'PokeID': PokeID_S}) # create dataframe from all nosepoke events
    return poke_df

In [175]:
# Extract and parse pokes according to trial
poke_df = parse_trials_pokes(all_pokes,df_trials)
poke_df

Unnamed: 0,NosepokeInTime,NosepokeOutTime,PokeID
0,[3787916227.850592],[3787916227.970496],[0]


**Open and Merge with experimental csv**

In [176]:
# Merge with experimental csv
exp_csv=exp_csv.drop(columns=['NosepokeInTime']) # Drop the old nosepoke timestamps (only one stored here)
merged_df = exp_csv.merge(poke_df, left_index=True, right_index=True, how='left') # merge with experimental data
merged_df.set_index('TrialNumber', inplace=True)

# Save the dataset (maybe, but not got the sounds yet)
merged_df

Unnamed: 0_level_0,TrialStart,TrialEnd,TrainingStage,TrainingSubstage,TrialCompletionCode,DotXLocation,DotYLocation,DotOnsetTime,DotOffsetTime,AudioCueIdentity,AudioCueStart,AudioCueEnd,CorrectForBias,BrainRegion,WallRotation,WallsRotated,NosepokeInTime,NosepokeOutTime,PokeID
TrialNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,3787916000.0,3787916000.0,4,1,IncorrectNosepoke0,0.3,-0.7,3787916000.0,3787916000.0,17,3787916000.0,0,False,,False,False,[3787916227.850592],[3787916227.970496],[0]


**Now the Soundcard data**

In [177]:
# Load the soundcard binary data (register 32) and show resulting dataframe
bin_s_path = data_folder + r"\SoundCard.harp\SoundCard_32.bin"

# the explicitly defined model will be deprecated or redundant in future
model = Model(device='Soundcard', whoAmI=1280,firmwareVersion='2.2',hardwareTargets='1.1',registers={'PlaySoundOrFrequency': Register(address=32, type="U16", access=Access.Event)})
sound_reader = harp.create_reader(model, keep_type=True)

# Read the harp soundcard stream, for the timestamps and audio ID
all_sounds = sound_reader.PlaySoundOrFrequency.read(bin_s_path)
all_sounds.reset_index(inplace=True)

# Filter to only keep events (when sound actually happened, not write commands to the board) 
all_sounds = all_sounds.loc[all_sounds['MessageType'] == 'EVENT']

# Show dataframe (maybe)
all_sounds

Unnamed: 0,Time,PlaySoundOrFrequency,MessageType
1,3787916000.0,17,EVENT
3,3787916000.0,18,EVENT


In [178]:
# Function to extract and parse sounds from binary files from register 32 of harp soundcard (almost the same as extracting pokes)
# Function to parse pokes from behavior trial data using a dataframe containing the timestamps for the start and end of each trial from the experimental csv generated by Bonsai
def parse_trials_sounds(df_events,df_trials,OFF_index=18):

    # Create lists to store the poke IDs and timestamps for all trials
    ON_S, OFF_S, ID_S = [], [], []

    # Iterate through trials (rows) and extract data from harp stream
    for index, trial in df_trials.iterrows():

        # Extract events that occur within the time range of this trial
        trial_events=df_events[(df_events.Time >= trial.TrialStart) & (df_events.Time <= trial.TrialEnd)]

        # Create trial lists for sounds this trial
        ON, OFF, ID = [], [], []
        for _, sound in trial_events.iterrows():
            event_time = sound.Time
            sound = sound[['PlaySoundOrFrequency']]
            sound = int(sound.iloc[0])

            # find audio IDs from the value. Only find ID for OFFSET
            if sound != OFF_index:
                ON.append(event_time)
                ID.append(sound)

            else:
                OFF.append(event_time)

        ON_S.append(ON)
        OFF_S.append(OFF)
        ID_S.append(ID)
    sound_df = pd.DataFrame({'AudioCueStart': ON_S, 'AudioCueEnd': OFF_S, 'AudioCueIdentity': ID_S}) # create dataframe from all nosepoke events

    return sound_df

In [179]:
# Set index of silence (OFF)
OFF_index = 18
sound_df = parse_trials_sounds(all_sounds,df_trials, OFF_index = OFF_index)

# Show dataframe (maybe)
sound_df


Unnamed: 0,AudioCueStart,AudioCueEnd,AudioCueIdentity
0,[3787916222.179168],[3787916227.85424],[17]


**Merge with the experimental csv we already have**

In [181]:
# Merge with the csv we already have
merged_df_drop=merged_df.drop(columns=['AudioCueIdentity', 'AudioCueStart', 'AudioCueEnd']) # Drop the old sounds timestamps and IDs (only one stored here)
merged_df_drop.reset_index(inplace=True)
merged_df_final = pd.merge(merged_df_drop, sound_df, left_index = True, right_index = True, how = 'left')
merged_df_final.set_index('TrialNumber', inplace=True)

# Save and print (maybe)
merged_df.to_csv(csv_out_path)
merged_df_final

Unnamed: 0_level_0,TrialStart,TrialEnd,TrainingStage,TrainingSubstage,TrialCompletionCode,DotXLocation,DotYLocation,DotOnsetTime,DotOffsetTime,CorrectForBias,BrainRegion,WallRotation,WallsRotated,NosepokeInTime,NosepokeOutTime,PokeID,AudioCueStart,AudioCueEnd,AudioCueIdentity
TrialNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,3787916000.0,3787916000.0,4,1,IncorrectNosepoke0,0.3,-0.7,3787916000.0,3787916000.0,False,,False,False,[3787916227.850592],[3787916227.970496],[0],[3787916222.179168],[3787916227.85424],[17]
