# Pupillometry
This notebook takes in three csv files taken from the Pupil Recordings you have exported in Pupil Player. These are:
- `annotations.csv` (contains annotations that indicates important events during the recording)
- `pupil_positions.csv` (contains raw data in regards to the pupil throughout the recording)
- `info.player.json` (contains system and sync time used to format the recording timestamps)

In [1]:
# Imports
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import random
import json
import warnings

warnings.filterwarnings("ignore", category=RuntimeWarning)
matplotlib.use('TkAgg')

# File Paths
info_player_filePath = './source/info.player.json'
pupil_csv_filePath = './source/pupil_positions.csv'
annotations_filepath = './source/annotations.csv'

# Contrasting Colours
CONTRASTING_COLURS = ['#011627', '#2ec4b6', '#e71d36', '#ff9f1c']
current_colour_index = 0

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Converting timestamps into time (starting from 0)
def convert_timestamps_to_time(df, timestamp_column, offset):
    df['time'] = df[timestamp_column] + offset
    df['time'] -= df['time'].min()
    return df

In [3]:
with open(info_player_filePath, 'r') as file:
    data = json.load(file)

start_time_synced_s = data.get('start_time_synced_s')
start_time_system_s = data.get('start_time_system_s')

offset = start_time_system_s - start_time_synced_s
print(f"Offset between system and synced start time: {offset}")

pupil_df = pd.read_csv(pupil_csv_filePath)
print(f"There is a total of {len(pupil_df)} in the Pupil DataFrame")

pupil_df = convert_timestamps_to_time(pupil_df, 'pupil_timestamp', offset)
print(pupil_df['time'])

Offset between system and synced start time: 1709636904.0990736
There is a total of 15321 in the Pupil DataFrame
0         0.000000
1         0.000000
2         0.000320
3         0.000320
4         0.008521
           ...    
15316    31.570625
15317    31.582216
15318    31.582216
15319    31.582271
15320    31.582271
Name: time, Length: 15321, dtype: float64


In [4]:
left_df = pupil_df[(pupil_df['eye_id'] == 0) & (pupil_df['method'] == 'pye3d 0.3.0 real-time')]
right_df = pupil_df[(pupil_df['eye_id'] == 1) & (pupil_df['method'] == 'pye3d 0.3.0 real-time')]

print(len(left_df))
print(len(right_df))

3794
3842


In [5]:
unsmoothed_figure, axs = plt.subplots(1, 2, figsize=(15, 6))

axs[0].plot(left_df['time'], left_df['diameter_3d'], label='Left Eye')
axs[0].set_title('Left Eye')

axs[1].plot(right_df['time'], right_df['diameter_3d'], label='Right Eye')
axs[1].set_title('Right Eye')

for ax in axs:
    ax.set_xlabel('Time (s)')
    ax.set_ylabel('Diameter (mm)')
    ax.legend()

unsmoothed_figure.tight_layout()

In [6]:
combined_unsmoothed, ax = plt.subplots(1, 1, figsize=(15, 6))

ax.plot(left_df['time'], left_df['diameter_3d'], label='Left Eye')
ax.plot(right_df['time'], right_df['diameter_3d'], label='Right Eye')  # Corrected label

ax.set_title('Diameters of Left and Right Eyes')
ax.set_xlabel('Time')
ax.set_ylabel('Diameter (mm)')
ax.legend()

combined_unsmoothed.tight_layout()

In [7]:
window = 10
left_df['smoothed_diameter'] = left_df['diameter_3d'].rolling(window=window).median()
right_df['smoothed_diameter'] = right_df['diameter_3d'].rolling(window=window).median()

smoothed_figure, axs = plt.subplots(1, 2, figsize=(15, 6))

axs[0].plot(left_df[left_df['method'] == 'pye3d 0.3.0 real-time']['time'], left_df[left_df['method'] == 'pye3d 0.3.0 real-time']['smoothed_diameter'], label='Smoothed Left Eye')
axs[0].set_title('Left Eye')

axs[1].plot(right_df[right_df['method'] == 'pye3d 0.3.0 real-time']['time'], right_df[right_df['method'] == 'pye3d 0.3.0 real-time']['smoothed_diameter'], label='Smoothed Right Eye')
axs[1].set_title('Right Eye')

for ax in axs:
    ax.set_xlabel('Time (s)')
    ax.set_ylabel('Diameter (mm)')
    ax.legend()

smoothed_figure.show()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  left_df['smoothed_diameter'] = left_df['diameter_3d'].rolling(window=window).median()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  right_df['smoothed_diameter'] = right_df['diameter_3d'].rolling(window=window).median()


In [8]:
# Use a contrasting colour palette to easily help identify regions
def choose_colour():
    global current_colour_index
    colour = CONTRASTING_COLURS[current_colour_index]
    current_colour_index = (current_colour_index + 1) % len(CONTRASTING_COLURS)
    return colour

In [9]:
# Generate a random RGB color tuple.
def generate_random_color():
    return (random.random(), random.random(), random.random())

# Draw lines and labels representing spawned objects and interceptions on the given axis.
def draw_objects_and_interceptions(ax, spawn_timestamps, interception_timestamps, annotations_df, obstacle_ids):
    object_colors = {}

    # Handle Spawning annotations
    for timestamp, obj_id in zip(spawn_timestamps, annotations_df.loc[annotations_df['label'] == ('Spawning'), 'id']):

        # Checks if the current annotation is refering to an Obstacle objectType by filtering the annotations_df for the Object ID and retrieving the first 'objectType' (which there should only be one)
        obj_type = annotations_df.loc[(annotations_df['label'] == 'Spawning') & (annotations_df['id'] == obj_id), 'objectType'].values[0] if 'objectType' in annotations_df.columns else None
        if obj_type == 'Obstacle' and obj_id in obstacle_ids:
            draw_object_line(ax, timestamp, obj_id, object_colors, is_interception=False)

    # Handle Interception annotations
    for timestamp, obj_id in zip(interception_timestamps, annotations_df.loc[annotations_df['label'] == 'Intercepted', 'id']):
        obj_type = annotations_df.loc[(annotations_df['label'] =='Intercepted') & (annotations_df['id'] == obj_id),'objectType'].values[0] if 'objectType' in annotations_df.columns else None
        # There is a chance for an interception to be missed, so if there exists no records for the ID with label 'Intercepted' then there would be no object type to use, therefore don't need to plot
        
        if obj_type == 'Obstacle' and obj_id in obstacle_ids:
            draw_object_line(ax, timestamp, obj_id, object_colors, is_interception=True)

    return object_colors

# Draw vertical lines on the given axis to represent the start and end of an experiment and scale the x axis to those timestamps
def draw_experiment_lines(ax, start_timestamp, end_timestamp):
    ax.axvline(x=start_timestamp, color='black', linestyle='--')
    ax.axvline(x=end_timestamp, color='black', linestyle='--')
    ax.set_xlim(start_timestamp, end_timestamp)

# Draw a vertical line on the given axis to represent an object and add a text label.
def draw_object_line(ax, timestamp, obj_id, object_colors, is_interception):
    # Assign a random RGB color if ID doesn't have one
    object_colors.setdefault(obj_id, choose_colour())

    # Get the line color
    line_color = object_colors[obj_id]

    # Determine the vertical alignment and position
    vertical_alignment = 'top' if is_interception else 'bottom'
    vertical_position = ax.get_ylim()[0] + 0.02 if vertical_alignment == 'bottom' else ax.get_ylim()[1] - 0.02

    # Plot a vertical line
    ax.axvline(x=timestamp, color=line_color, linestyle='--', alpha=0.7)

    # Add text label
    label_text = f'{"Intercepted" if is_interception else "Spawned"} {int(obj_id)} at {timestamp:.2f}'
    ax.text(timestamp, vertical_position, label_text, rotation=90, va=vertical_alignment, ha='right', color='black')


# Plot regions on the given axis, using colors based on the object_colors dictionary for each object ID.
def plot_observations(ax, annotations_df, object_colours, fill_threshold=1.0):
    looking_at_df = annotations_df.loc[annotations_df['label'] == 'Looking At'].copy()

    # Create an empty dictionary to store the regions for each object ID
    id_region_dict = {}

    # Initialise variables for tracking consecutive points
    current_obj_id = None
    start_time = None

    # Iterate through each row in the 'Looking At' dataframe
    for _, row in looking_at_df.iterrows():
        obj_id = row['id']
        timestamp = row['time']

        # Check if it's the same object and within the threshold seconds
        if obj_id == current_obj_id and start_time is not None and timestamp - start_time <= fill_threshold: 
            end_time = timestamp # Update the end time for the current region
        else:
            # A new region is created when the next ID does not match the current region's ID OR if the time between two points is greater than the threshold (indicating it's a new region for the same ID)
            if current_obj_id is not None and start_time is not None:
                if current_obj_id not in id_region_dict: # If an ID has not been given regions, this will make sure that it has been intialised before we can add new regions
                    id_region_dict[current_obj_id] = [] 
                id_region_dict[current_obj_id].append((start_time, end_time)) # Adds this region to the key with the Object ID

            # Update tracking variables for the next iteration
            current_obj_id = obj_id
            start_time = timestamp
            end_time = timestamp

    # Add the last region after the loop
    if current_obj_id is not None and start_time is not None:
        if current_obj_id not in id_region_dict:
            id_region_dict[current_obj_id] = []
        id_region_dict[current_obj_id].append((start_time, end_time))

    # Iterates through each key-value pair
    for obj_id, regions in id_region_dict.items():
        color = object_colours[obj_id] # Retrieves the colour of the current object from the dictionary

        for region in regions: # Each region should have a start time, and an end time. We iterate through each one whiles also colouring the region between them
            start_time, end_time = region
            ax.axvspan(start_time, end_time, color=color, alpha=0.2, label=f'Object ID {obj_id}')

# Add annotations to a given plot.
def add_annotations(ax, offset, filepath='annotations.csv', show_observable=False):
    if filepath:
        # Creates a DataFrame containing all the annotation data, and also creating a new field called 'Time' which format timestamps into the corresponding time of the recording
        annotations_df = pd.read_csv(filepath)
        annotations_df['time'] = annotations_df['timestamp'] + offset
        annotations_df['time'] -= annotations_df['time'].min()

        # Filter annotations for 'Spawning' or 'Intercepted' labels and ObjectType 'Obstacle'
        filtered_annotations = annotations_df[
            (annotations_df['label'].isin(['Spawning', 'Intercepted'])) &
            (annotations_df['objectType'] == 'Obstacle')
        ]

        # Identifying different types of annotation to plot on the graph
        spawn_timestamps = filtered_annotations.loc[filtered_annotations['label'] == 'Spawning', 'time'].values
        interception_timestamps = filtered_annotations.loc[filtered_annotations['label'] == 'Intercepted', 'time'].values

        start_timestamp = annotations_df.loc[(annotations_df['label'] == 'Experiment Started'), 'time'].values
        end_timestamp = annotations_df.loc[(annotations_df['label'] == 'Experiment Ended'), 'time'].values

        draw_experiment_lines(ax, start_timestamp, end_timestamp) # Draws a line for when the experiment has started and ended, as well as adjusting the x-axis scale to show only between those points

        # Extract ObjectType 'Obstacle' and ID for plotting
        obstacle_ids = filtered_annotations.loc[:, 'id'].values

        # Each object should be given a colour for their specific ID, this can be used for when we want to show what object the user is currently observing
        object_colors = draw_objects_and_interceptions(ax, spawn_timestamps, interception_timestamps, annotations_df, obstacle_ids)
        
        # This will show regions of which indicates where the user looks at depending on the annotations with the label 'Looking At'
        if show_observable:
            plot_observations(ax, annotations_df, object_colors)

In [10]:
def obtain_offset(filepath):
    with open(filepath, 'r') as file:
        data = json.load(file)  
    return data.get('offset')

# Plot both smoothed diameters on the same graph
smoothed_diameters, ax2 = plt.subplots(figsize=(16, 8))

ax2.plot(left_df['time'], left_df['smoothed_diameter'], label='Left Eye')
ax2.plot(right_df['time'], right_df['smoothed_diameter'], label='Right Eye')

ax2.set_title('Diameters of Left and Right Eyes')
ax2.set_xlabel('Time')
ax2.set_ylabel('Diameter (mm)')
ax2.legend()

add_annotations(ax2, offset, annotations_filepath, show_observable=True)

smoothed_diameters.tight_layout()

smoothed_diameters.show()
# combined_unsmoothed.show()
# unsmoothed_figure.show()