Rough workbook to check timestamps in experimental-data.csv (output from Bonsai) are consistent with what we would expect from the trial logic. 

Check timestamps both relative to each other, the TTLs and the harp binaries.

**Get harp TTL data frame**

In [1]:
# Import main libraries and define data folder
import harp
import pandas as pd
from harp.model import Model, Register, Access
import os
from pathlib import Path
import matplotlib.pyplot as plt

import timestamps.harp.utils as hu

#==============================================================================
animal_ID = 'FNT099'
session_ID = '2024-05-17T10-12-40'
# session_ID = '2024-03-13T10-49-40'

# animal_ID = 'FNT107'
# session_ID = '2024-08-11T14-01-24'

# path behavioural data on ceph repo
raw_data_dir = Path("W:/projects/FlexiVexi/raw_data")
output_dir = Path("C:/Users/megan/Documents/sjlab/flexible-navigation-task" +
              r"/data_analysis/intermediate_variables")

#==============================================================================

# Create reader for behavior.
bin_b_path = raw_data_dir / animal_ID / session_ID / "Behavior.harp"
behavior_reader = harp.create_reader(bin_b_path)

# Specify mapping from sound index to reward port
soundIdx0 = 14
soundIdx1 = 10
soundOffIdx = 18

**Align dot times with experimental-data-csv**

In [None]:
# Import behavioral data as data frame
session_path = raw_data_dir / animal_ID / session_ID
filepath = session_path / 'Experimental-data' / (session_ID + '_experimental-data.csv')
trials_df = pd.read_csv(filepath)

# Get dot onset and offset times given by TTL pulses

## First dot onset time from software clock (used as a common sense check for inconsistencies with number of TTL pulses on start up)
t0 = trials_df['DotOnsetTime'].iloc[0]

## Get dot times from TTL pulses
[dot_times_ttl, ttl_state_0] = hu.get_dot_times_from_ttl(behavior_reader, t0, return_TTL_state_at_startup=True)
print('TTL state upon start-up: ', ttl_state_0)

# Append dot onset and offset times given by TTL pulses to trials_df
trials_df = pd.concat([trials_df, dot_times_ttl],axis=1)

# Common sense check that the logic of aligning the TTL pulses is working as expected.
# Check dot onset and times from software clock TTL pulses are consistent, given by:
# - DotOnsetTime = dot onset time from software clock
# - DotOnsetTime_ttl = dot onset time from TTL pulses
# - DotOffsetTime = dot offset time from software clock
# - DotOffsetTime_ttl = dot offset time from TTL pulses

trials_df[['TrialStart', 'DotOnsetTime', 'DotOffsetTime', 'DotOnsetTime_harp_ttl', 'DotOffsetTime_harp_ttl']]

**Get all poke events**

In [None]:
# Read the behavior harp stream, Digital Input states for the nosepoke timestamps and IDs. Drop DI3 <-- What's DI3??
all_pokes = behavior_reader.DigitalInputState.read()

all_pokes.drop(columns=['DI3','DIPort2'],inplace = True) # remove all nose pokes to dummy port
#all_pokes.reset_index(inplace=True)

# Show resulting data frame
all_pokes.head()

**Get all audio events**

In [None]:
# Path to sound card binary file
bin_sound_path = os.path.join(
    raw_data_dir,
    animal_ID,
    session_ID,
    "SoundCard.harp",
    "SoundCard_32.bin"
)

def get_all_sounds(bin_sound_path):
    
    # the explicitly defined model will be deprecated or redundant in future
    model = harp.model.Model(
        device='Soundcard',
        whoAmI=1280,
        firmwareVersion='2.2',
        hardwareTargets='1.1',
        registers={
            'PlaySoundOrFrequency': harp.model.Register(
                address=32,
                type="U16",
                access=harp.model.Access.Event
            )
        }
    )    
    sound_reader = harp.create_reader(model, keep_type=True)

    # Read the harp sound card stream, for the timestamps and audio ID
    all_sounds = sound_reader.PlaySoundOrFrequency.read(bin_sound_path)

    # Filter to only keep events (when sound actually happened, not write commands to the board) 
    all_sounds = all_sounds.loc[all_sounds['MessageType'] == 'EVENT']

    # Drop columns that are not needed
    all_sounds.drop(columns=['MessageType'], inplace=True)
    # Reset index
    all_sounds.reset_index(inplace=True)

    return all_sounds

# Read the harp sound card stream, for the timestamps and audio ID
all_sounds = get_all_sounds(bin_sound_path)

# Show dataframe (maybe)
all_sounds.head()

**Align poke events with trials**

Get data frame with port choice ID and timestamp for each trial, where the port choice is taken as the first nose poke within the response window (between dot offset and trial end). If the trial is aborted, the port ID and timestamp are both taken as NaN.

In [None]:
trial_pokes_df = hu.parse_trial_pokes(trials_df['TrialStart'], all_pokes)
trial_pokes_df.head()

**Align sound events to trials**

In [None]:
# Get data frame with sound ID and timestamp for each trial
trial_sounds_df = hu.parse_trial_sounds(trials_df['TrialStart'], all_sounds)

# Append sound ID to trials_df
trials_df = pd.concat([trials_df, trial_sounds_df],axis=1)

# Show sound data frame
trial_sounds_df

In [None]:
# Check trials_df AudioCueStart and ChoicePort is as expected
trials_df[
    [
        'TrialStart',
        'TrialStart_harp',
        'TrialCompletionCode',
        'ChoicePort',
        'ChoiceTimestamp',
        'AudioCueStart_harp',
        'AudioCueEnd_harp',
        'AudioCueIdentity'
    ]
]

## Check consistency of harp clock timestamps with experimental-data.csv

DotOnsetTime and TrialStart are distinct on stage 4.1 in experimental-data.csv:

In [None]:
# take difference between TrialStart and DotOnsetTime_harp
test = trials_df['TrialStart'] - trials_df['DotOnsetTime']

# Get histogram of differences between DotOnsetTime_harp and DotOnsetTime
fig, ax = plt.subplots()
test.hist(bins=100, ax=ax)

Trial start time inferred from TTL pulses has some jitter relative to TrialStart (or DotOnset) in experimental-data.csv:

In [None]:
# take difference between TrialStart and DotOnsetTime_harp_ttl
trials_df['DotOnsetTime_harp_diff'] = trials_df['DotOnsetTime_harp_ttl'] - trials_df['TrialStart']
print(sum(trials_df['DotOnsetTime_harp_diff'] < 0))
# Get histogram of differences between DotOnsetTime_harp_ttl and DotOnsetTime
fig, ax = plt.subplots()
trials_df['DotOnsetTime_harp_diff'].hist(bins=100, ax=ax)
ax.set_title('Histogram of differences between DotOnsetTime_harp_ttl and TrialStart')

In [None]:
# take difference between TrialStart and DotOnsetTime_harp_ttl
diff = trials_df['TrialStart'] - trials_df['DotOnsetTime']
# Get histogram of differences between DotOnsetTime_harp_ttl and DotOnsetTime
fig, ax = plt.subplots()
diff.hist(bins=100, ax=ax)
ax.set_title('Histogram of differences between TrialStart and Dot Onset Time')

Timestamp of nosepoke for port choice is identical in both experimental-data.csv and harp binary files:

In [None]:
# take difference between TrialStart and DotOnsetTime_harp
trials_df['choiceTimestamp_diff'] = trials_df['ChoiceTimestamp'] - trials_df['NosepokeInTime']

# Get histogram of differences between DotOnsetTime_harp and DotOnsetTime
fig, ax = plt.subplots()
trials_df['choiceTimestamp_diff'].hist(bins=100, ax=ax)

Compare sound onset time in experimental-data.csv to sound onset in harp time:

In [None]:
trials_df[
    [
        'AudioCueStart',
        'AudioCueStart_harp',
        'AudioCueEnd',
        'AudioCueEnd_harp',
        'AudioCueIdentity'
    ]
].head()

# Check if the number of elements in AudioCueStart_harp matches the number of elements in AudioCueEnd_harp
def check_audio_cue_lengths(row):
    return len(row['AudioCueStart_harp']) == len(row['AudioCueEnd_harp'])

# Apply the function to each row and store the result in a new column
trials_df['AudioCueLengthsMatch'] = trials_df.apply(check_audio_cue_lengths, axis=1)

# Display the rows where the lengths do not match
mismatched_rows = trials_df[~trials_df['AudioCueLengthsMatch']]
print(mismatched_rows)

AudioCueStart in experimental-data.csv matches the final instance of Audio Cue Onset from harp binaries within the trial

In [None]:
import matplotlib.pyplot as plt

# Create a figure with two subplots side by side
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(12, 4))

# Get difference between AudioCueStart and first value of AudioCueStart_harp
trials_df['AudioCueStart_diff'] = trials_df['AudioCueStart'] - trials_df['AudioCueStart_harp'].apply(lambda x: x[0])

# Plot histogram of differences between AudioCueStart in harp binary file versus AudioCueStart in .csv
trials_df['AudioCueStart_diff'].hist(bins=100, ax=axs[0])
axs[0].set_title('Difference between AudioCueStart\nand first value of AudioCueStart_harp')

# Get difference between AudioCueStart and last value of AudioCueStart_harp
trials_df['AudioCueStart_diff'] = trials_df['AudioCueStart'] - trials_df['AudioCueStart_harp'].apply(lambda x: x[-1])

# Plot histogram of differences between AudioCueStart in harp binary file versus AudioCueStart in .csv
trials_df['AudioCueStart_diff'].hist(bins=100, ax=axs[1])
axs[1].set_title('Difference between AudioCueStart\nand last value of AudioCueStart_harp')

# Adjust layout to prevent overlap
# plt.tight_layout()

# Display the plots
plt.show()


AudioCueEnd from experimental-data.csv deviates significantly from first or last Audio Cue End from harp binaries in many trials

In [None]:
import matplotlib.pyplot as plt

# Create a figure with two subplots side by side
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(12, 4))

# Get difference between AudioCueEnd and first value of AudioCueEnd_harp
trials_df['AudioCueEnd_diff'] = trials_df['AudioCueEnd'] - trials_df['AudioCueEnd_harp'].apply(lambda x: x[0])

# Plot histogram of differences between AudioCueEnd in harp binary file versus AudioCueEnd in .csv
trials_df['AudioCueEnd_diff'].hist(bins=100, ax=axs[0])
axs[0].set_title('Difference between AudioCueEnd\nand first value of AudioCueEnd_harp')

# Get difference between AudioCueEnd and last value of AudioCueEnd_harp
trials_df['AudioCueEnd_diff'] = trials_df['AudioCueEnd'] - trials_df['AudioCueEnd_harp'].apply(lambda x: x[-1])

# Plot histogram of differences between AudioCueEnd in harp binary file versus AudioCueEnd in .csv
trials_df['AudioCueEnd_diff'].hist(bins=100, ax=axs[1])
axs[1].set_title('Difference between AudioCueEnd\nand last value of AudioCueEnd_harp')

# Adjust layout to prevent overlap
# plt.tight_layout()

# Display the plots
plt.show()


In [None]:
trials_df
# Print all unique values in the 'TrialCompletionCode' column
unique_values = trials_df['TrialCompletionCode'].unique()
print(unique_values)

print(trials_df['TrainingStage'].unique())

In [None]:
# Filter trials_df to include only rows where 'TrialCompletionCode' is 'AbortedTrial-1'
aborted_trials = trials_df[trials_df['TrialCompletionCode'] == 'AbortedTrial-1']
print(len(aborted_trials))

# Show the 'AudioCueEnd_diff' column for these filtered rows
audio_cue_end_diff_aborted = aborted_trials['AudioCueEnd_diff']
print(audio_cue_end_diff_aborted)

In [None]:
# Check that every value in 'AudioCueEnd' occurs between 'TrialStart' for that row and the 'TrialStart' from the subsequent row
def check_audio_cue_end(trials_df):
    for i in range(len(trials_df) - 1):
        start_time = trials_df.loc[i, 'TrialStart']
        end_time = trials_df.loc[i + 1, 'TrialStart']
        audio_cue_end = trials_df.loc[i, 'AudioCueEnd']
        
        if not (start_time <= audio_cue_end < end_time):
            print(f"AudioCueEnd at index {i} is out of bounds: {audio_cue_end}")
            return False
    
    # Check the last row separately as it doesn't have a subsequent row
    last_start_time = trials_df.iloc[-1]['TrialStart']
    last_audio_cue_end = trials_df.iloc[-1]['AudioCueEnd']
    
    if last_audio_cue_end < last_start_time:
        print(f"AudioCueEnd at the last index is out of bounds: {last_audio_cue_end}")
        return False
    
    return True

# Example usage
is_valid = check_audio_cue_end(trials_df)
print(f"All AudioCueEnd values are within bounds: {is_valid}")

In [None]:
import matplotlib.pyplot as plt

# Plot all instances of AudioCueStart, AudioCueEnd, and TrialStart in the same plot
fig, ax = plt.subplots(figsize=(20, 2))

# Plot AudioCueStart
ax.plot(trials_df['AudioCueStart'], [1] * len(trials_df), 'go', label='AudioCueStart')

# Plot AudioCueEnd
ax.plot(trials_df['AudioCueEnd'], [2] * len(trials_df), 'ro', label='AudioCueEnd')

# Plot TrialStart
ax.vlines(trials_df['TrialStart'], [3] * len(trials_df), 'bo', label='TrialStart')

# Set y-ticks to show labels
ax.set_yticks([1, 2, 3])
ax.set_yticklabels(['AudioCueStart', 'AudioCueEnd', 'TrialStart'])

# Add labels and legend
ax.set_xlabel('Timestamp')
ax.set_title('Timestamps of AudioCueStart, AudioCueEnd, and TrialStart')
ax.set_xlim(trials_df['TrialStart'].min(), trials_df['TrialStart'].min()+150)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

# Show plot
plt.show()
df = trials_df[['AudioCueStart','AudioCueEnd', 'AudioCueStart_harp', 'AudioCueEnd_harp']]

df.head(10)



Using TrialStart from experimental-data.csv as the trial start introduces anomalies in the Audio Cue Start/End times within the trial:

In [None]:
# Get data frame with sound ID and timestamp for each trial from Trial Start inferred from harp TTLs
trial_sounds_df_TrialStart_harp = hu.parse_trial_sounds(trials_df['TrialStart_harp'], bin_sound_path)

# Get data frame with sound ID and timestamp for each trial from Trial Start taken from experimental-data.csv
trial_sounds_df_TrialStart = hu.parse_trial_sounds(trials_df['TrialStart'], bin_sound_path)

# Check if trial_sounds_df_TrialStart_harp and trial_sounds_df_TrialStart are the same
are_identical = trial_sounds_df_TrialStart_harp.equals(trial_sounds_df_TrialStart)
print(are_identical)

# Pinpoint specific trials where the sound dataframes differ
diff = trial_sounds_df_TrialStart_harp != trial_sounds_df_TrialStart
diff_idx = diff.any(axis=1)
print(trial_sounds_df_TrialStart_harp[diff_idx])
print(trial_sounds_df_TrialStart[diff_idx])

Check that TrialStart from experimental-data.csv always precedes TrialStart_harp:

In [None]:
print(sum(trials_df['TrialStart'] < trials_df['AudioCueStart']))