In [2]:
%matplotlib ipympl
import numpy as np
from pathlib import Path
import utils as utils
import harp
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import aeon.io.video as video
from ipywidgets import widgets
from IPython.display import display
import re

In [6]:
# Load data
root = Path('/Volumes/harris/hypnose/rawdata/sub-020_id-072/ses-08_date-20250325/behav/2025-03-25T15-53-02')

In [7]:
# Metadata loading
metadata_reader = utils.SessionData()
session_settings = utils.load_json(metadata_reader, root/"SessionSettings")
print(session_settings.iloc[0]['metadata'].sequences)

/Volumes/harris/hypnose/rawdata/sub-020_id-072/ses-08_date-20250325/behav/2025-03-25T15-53-02/SessionSettings/SessionSettings_*.jsonl
[DotMap(rewardConditions=[[[{'command': 'OdorA', 'rewarded': True}]], [[{'command': 'OdorB', 'rewarded': True}]]], presentationTime=5.0, interCommand='Purge', interCommandTime=0.2, rewardAttempts=0, responseTime=99999.0, interTrialInterval=0.0, name='OdourDiscrimination', defaultCommand='Default', repeatCount=20, maximumTime=1.0, enableTrialIndicator=True, enableRewardLocationIndicator=True, resetOnReward=True)]


In [8]:
# Load relevant data streams
behavior_reader = harp.reader.create_reader('device_schemas/behavior.yml', epoch=harp.io.REFERENCE_EPOCH)
olfactometer_reader = harp.reader.create_reader('device_schemas/olfactometer.yml', epoch=harp.io.REFERENCE_EPOCH)

digital_input_data = utils.load(behavior_reader.DigitalInputState, root/"Behavior")
output_set = utils.load(behavior_reader.OutputSet, root/"Behavior")
output_clear = utils.load(behavior_reader.OutputClear, root/"Behavior")
olfactometer_valves_0 = utils.load(olfactometer_reader.OdorValveState, root/"Olfactometer0")
olfactometer_valves_1 = utils.load(olfactometer_reader.OdorValveState, root/"Olfactometer1")
olfactometer_end_0 = utils.load(olfactometer_reader.EndValveState, root/"Olfactometer0")
analog_data = utils.load(behavior_reader.AnalogData, root/"Behavior")
flow_meter = utils.load(olfactometer_reader.Flowmeter, root/"Olfactometer0")
heartbeat = utils.load(behavior_reader.TimestampSeconds, root/"Behavior")

# 'other' events
pulse_supply_1 = utils.load(behavior_reader.PulseSupplyPort1, root/"Behavior") # reward A pump end time
pulse_supply_2 = utils.load(behavior_reader.PulseSupplyPort2, root/"Behavior") # reward B pump end time
pulse_enable = utils.load(behavior_reader.OutputPulseEnable, root/"Behavior") # allow for either pump to deliver reward


In [9]:
# Extract relevant events
init_poke = digital_input_data['DIPort0'][digital_input_data['DIPort0'] == True]
r1_poke = digital_input_data['DIPort1'][digital_input_data['DIPort1'] == True]
r2_poke = digital_input_data['DIPort2'][digital_input_data['DIPort2'] == True]

r1_reward = pd.Series(True, index=pulse_supply_1.index, name="PulseSupplyPort1")
r2_reward = pd.Series(True, index=pulse_supply_2.index, name="PulseSupplyPort2")

r1_olf_valve = olfactometer_valves_0['Valve0'][olfactometer_valves_0['Valve0'] == True]
r2_olf_valve = olfactometer_valves_0['Valve1'][olfactometer_valves_0['Valve1'] == True]

In [12]:
# Create individual Series with unique timestamps
def create_unique_series(events, name):
    # Get timestamps and make them unique by adding microsecond offsets if needed
    timestamps = events.index
    if len(timestamps) != len(set(timestamps)):
        # If we have duplicates, handle them by adding microsecond offsets
        unique_timestamps = []
        seen = set()
        for ts in timestamps:
            counter = 0
            ts_modified = ts
            while ts_modified in seen:
                # Add microseconds to make timestamp unique
                counter += 1
                ts_modified = ts + pd.Timedelta(microseconds=counter)
            seen.add(ts_modified)
            unique_timestamps.append(ts_modified)
        timestamps = unique_timestamps
    return pd.Series(True, index=timestamps)

# Create Series with unique timestamps
init_poke_series = create_unique_series(init_poke, 'init_poke')
r1_poke_series = create_unique_series(r1_poke, 'r1_poke')
r2_poke_series = create_unique_series(r2_poke, 'r2_poke')
r1_reward_series = create_unique_series(r1_reward, 'r1_reward')
r2_reward_series = create_unique_series(r2_reward, 'r2_reward')
r1_olf_valve_series = create_unique_series(r1_olf_valve, 'r1_olf_valve')
r2_olf_valve_series = create_unique_series(r2_olf_valve, 'r2_olf_valve')

# Create the DataFrame with the unique Series
events_df = pd.DataFrame({
    'init_poke': init_poke_series,
    'r1_poke': r1_poke_series,
    'r2_poke': r2_poke_series,
    'r1_reward': r1_reward_series,
    'r2_reward': r2_reward_series,
    'r1_olf_valve': r1_olf_valve_series,
    'r2_olf_valve': r2_olf_valve_series
})

# Reset index to convert timestamps to a column
events_df = events_df.reset_index(names='timestamp')

# Fill NaN values with False (where events didn't occur)
events_df = events_df.fillna(False)

# Sort by timestamp
events_df = events_df.sort_values('timestamp')

# Get the timestamp part from the root path
timestamp_str = root.stem  # Gets the last part of the path after the last slash
print(f"Extracted timestamp string: {timestamp_str}")

# Convert the timestamp format from '2025-03-25T15-53-02' to pandas timestamp format
match = re.match(r'(\d{4}-\d{2}-\d{2})T(\d{2}-\d{2}-\d{2})', timestamp_str)
if match:
    date_part, time_part = match.groups()
    # Replace hyphens with colons in time part
    time_part = time_part.replace('-', ':')
    formatted_timestamp = f"{date_part} {time_part}"
    start_time = pd.Timestamp(formatted_timestamp)
    print(f"Automatically determined start time: {start_time}")
else:
    # Show error message
    error_msg = f"ERROR: Could not parse timestamp from path '{timestamp_str}'. Expected format: YYYY-MM-DDThh-mm-ss"
    print(error_msg)
    # Raising an error will stop execution
    raise ValueError(error_msg)

# Calculate time delta between the first event and the derived start time
first_event_time = events_df['timestamp'].min()
time_delta = start_time - first_event_time

# Adjust all timestamps
events_df_with_timing = events_df.copy()
events_df_with_timing['timestamp'] = events_df['timestamp'] + time_delta

# Create a new row for the session start time
start_time_row = pd.DataFrame({
    'timestamp': [start_time],
    'init_poke': [False],
    'r1_poke': [False],
    'r2_poke': [False],
    'r1_reward': [False],
    'r2_reward': [False],
    'r1_olf_valve': [False],
    'r2_olf_valve': [False],
    'start_time': [True]  # New column indicating this is the start time
})

# Add the start_time column to the main dataframe (filled with False)
events_df_with_timing['start_time'] = False

# Combine the start time row with the rest of the data
events_df_adjusted = pd.concat([start_time_row, events_df_with_timing], ignore_index=True)

# Sort by timestamp to ensure everything is in chronological order
events_df_adjusted = events_df_adjusted.sort_values('timestamp')

print(f"Session start time: {start_time}")
print(f"First event after start: {events_df_with_timing['timestamp'].min()}")
print(f"Total events: {len(events_df_adjusted)}")
display(events_df_adjusted.head())

Extracted timestamp string: 2025-03-25T15-53-02
Automatically determined start time: 2025-03-25 15:53:02
Session start time: 2025-03-25 15:53:02
First event after start: 2025-03-25 15:53:02
Total events: 4217


  events_df = events_df.fillna(False)


Unnamed: 0,timestamp,init_poke,r1_poke,r2_poke,r1_reward,r2_reward,r1_olf_valve,r2_olf_valve,start_time
0,2025-03-25 15:53:02.000000,False,False,False,False,False,False,False,True
1,2025-03-25 15:53:02.000000,False,True,False,False,False,False,False,False
2,2025-03-25 15:53:02.136384,False,True,False,False,False,False,False,False
3,2025-03-25 15:53:04.298944,False,False,True,False,False,False,False,False
4,2025-03-25 15:53:06.934592,False,True,False,False,False,False,False,False


In [None]:
## Correct decisions for the entire session for both odours

def calculate_overall_decision_accuracy(events_df):
    """
    Calculate decision accuracy for the entire session:
    - r1_olf_valve followed by r1_poke = correct
    - r2_olf_valve followed by r2_poke = correct
    """
    # Make sure the dataframe is sorted by timestamp
    events_df = events_df.sort_values('timestamp')
    
    # Reset index for easier sequential analysis
    data = events_df.reset_index(drop=True)
    
    r1_correct = 0
    r1_total = 0
    r2_correct = 0
    r2_total = 0
    
    # Analyze each row that has an olfactometer valve opening
    for i in range(len(data) - 1):  # -1 to avoid index error when looking ahead
        # Check for r1 valve openings
        if data.loc[i, 'r1_olf_valve']:
            r1_total += 1
            
            # Look ahead for the next poke after this valve opening
            for j in range(i+1, len(data)):
                if data.loc[j, 'r1_poke'] or data.loc[j, 'r2_poke']:
                    # Found a poke, was it the correct one?
                    if data.loc[j, 'r1_poke']:
                        r1_correct += 1
                    break
                    
        # Check for r2 valve openings
        if data.loc[i, 'r2_olf_valve']:
            r2_total += 1
            
            # Look ahead for the next poke after this valve opening
            for j in range(i+1, len(data)):
                if data.loc[j, 'r1_poke'] or data.loc[j, 'r2_poke']:
                    # Found a poke, was it the correct one?
                    if data.loc[j, 'r2_poke']:
                        r2_correct += 1
                    break
    
    # Calculate percentages
    r1_accuracy = (r1_correct / r1_total * 100) if r1_total > 0 else 0
    r2_accuracy = (r2_correct / r2_total * 100) if r2_total > 0 else 0
    overall_accuracy = ((r1_correct + r2_correct) / (r1_total + r2_total) * 100) if (r1_total + r2_total) > 0 else 0
    
    # Create a summary dictionary
    summary = {
        'r1_total': r1_total,
        'r1_correct': r1_correct,
        'r1_accuracy': r1_accuracy,
        'r2_total': r2_total,
        'r2_correct': r2_correct,
        'r2_accuracy': r2_accuracy,
        'total_trials': r1_total + r2_total,
        'total_correct': r1_correct + r2_correct,
        'overall_accuracy': overall_accuracy
    }
    
    return summary

# Calculate overall session accuracy
session_accuracy = calculate_overall_decision_accuracy(events_df_adjusted)

# Display results as a nicely formatted summary
print("Session Decision Accuracy Summary:")
print(f"R1 Trials: {session_accuracy['r1_total']} (Correct: {session_accuracy['r1_correct']}, Accuracy: {session_accuracy['r1_accuracy']:.2f}%)")
print(f"R2 Trials: {session_accuracy['r2_total']} (Correct: {session_accuracy['r2_correct']}, Accuracy: {session_accuracy['r2_accuracy']:.2f}%)")
print(f"Overall: {session_accuracy['total_trials']} trials, {session_accuracy['total_correct']} correct, Accuracy: {session_accuracy['overall_accuracy']:.2f}%")

# Create a pandas DataFrame for easier display in notebook
session_df = pd.DataFrame([session_accuracy])
display(session_df[['r1_total', 'r1_correct', 'r1_accuracy', 'r2_total', 'r2_correct', 'r2_accuracy', 'overall_accuracy']])

Session Decision Accuracy Summary:
R1 Trials: 168 (Correct: 7, Accuracy: 4.17%)
R2 Trials: 168 (Correct: 161, Accuracy: 95.83%)
Overall: 336 trials, 168 correct, Accuracy: 50.00%


Unnamed: 0,r1_total,r1_correct,r1_accuracy,r2_total,r2_correct,r2_accuracy,overall_accuracy
0,168,7,4.166667,168,161,95.833333,50.0


In [None]:
## Time spent in odour port within a session

