In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import os
import glob

# Set seaborn style
sns.set(context="notebook", style="whitegrid", font_scale=1.2)

# Enable inline plotting for Jupyter notebooks
%matplotlib inline



def create_participant_folder(participant_id):
    """Create folder for participant if it doesn't exist"""
    folder_path = f'participant_{participant_id}'
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    return folder_path

def is_point_in_roi(x, y):
    """
    Check if a point falls within the region of interest defined by the image corners
    """
    # Define ROI boundaries
    x_min, x_max = 0.27, 0.73
    y_min, y_max = 0.27, 0.73
    
    return (x_min <= x <= x_max) and (y_min <= y <= y_max)

def compute_roi_trial_durations(trial_data):
    """Compute durations for a single trial, focusing on time spent in ROI"""
    # Calculate total trial duration
    total_duration = trial_data["timestamp"].max() - trial_data["timestamp"].min()
    
    # Create mask for points in ROI
    roi_mask = trial_data.apply(
        lambda row: is_point_in_roi(row["norm_pos_x"], row["norm_pos_y"]), 
        axis=1
    )
    
    # Get timestamps for points in ROI
    roi_points = trial_data[roi_mask]
    
    if len(roi_points) > 0:
        # Calculate ROI percentage
        roi_percentage = (len(roi_points) / len(trial_data)) * 100
        
        # Calculate ROI duration based on percentage of total trial duration
        roi_duration = (roi_percentage / 100) * total_duration
    else:
        roi_duration = 0
        roi_percentage = 0
        
    return roi_duration, roi_percentage, total_duration

def scatter_gaze_positions(data, trial_id, phase, participant_id, roi_data_list, roi_timestamps_list):
    """
    Create a scatter plot of gaze positions with temporal progression coloring
    """
    plot_data = data.copy()
    
    # Calculate metrics for the trial
    roi_duration, roi_percentage, total_duration = compute_roi_trial_durations(plot_data)
    
    # Add data to ROI data list
    roi_data_list.append({
        'participant_id': participant_id,
        'trial_id': trial_id,
        'phase': phase,
        'roi_duration': roi_duration,
        'roi_percentage': roi_percentage,
        'total_duration': total_duration
    })
    
    # Calculate ROI mask
    roi_mask = plot_data.apply(
        lambda row: is_point_in_roi(row["norm_pos_x"], row["norm_pos_y"]), 
        axis=1
    )
    
    # Get ROI points
    roi_points = plot_data[roi_mask]
    
    # Store ROI timestamps
    for timestamp in roi_points['timestamp']:
        roi_timestamps_list.append({
            'participant_id': participant_id,
            'trial_id': trial_id,
            'phase': phase,
            'timestamp': timestamp
        })
    
    # Create figure and axes
    fig, ax = plt.subplots(figsize=(16, 8))
    
    # Plot non-ROI points in grey
    non_roi_points = plot_data[~roi_mask]
    plt.scatter(
        non_roi_points["norm_pos_x"],
        non_roi_points["norm_pos_y"],
        c='grey',
        alpha=0.2,
        label='Outside ROI'
    )
    
    # Plot ROI points with time progression colors
    if len(roi_points) > 0:
        # Create normalized time for ROI points
        min_time = roi_points["timestamp"].min()
        roi_points = roi_points.copy()
        roi_points['normalized_time'] = (roi_points["timestamp"] - min_time) / total_duration
        
        points = plt.scatter(
            roi_points["norm_pos_x"],
            roi_points["norm_pos_y"],
            c=roi_points["normalized_time"],
            cmap="BuGn",
            alpha=0.5,
            label='Inside ROI'
        )
        
        # Add colorbar only if there are ROI points
        cbar = plt.colorbar(points, pad=0.02)
        cbar.ax.set_ylabel("Trial Progress (0 to 1)", rotation=270, labelpad=15)
    
    plt.title(f"Participant {participant_id} - {phase} - Trial {trial_id}\n" +
              f"Total Trial Duration: {total_duration:.2f} seconds\n" +
              f"ROI Duration: {roi_duration:.2f} seconds\n" +
              f"Points in ROI: {roi_percentage:.1f}%")

    # Reference lines
    plt.axhline(y=0, color='r', linestyle='-', alpha=0.3, label='Bottom/Left edge')
    plt.axhline(y=1, color='purple', linestyle='-.', alpha=0.3, label='Top/Right edge')
    plt.axhline(y=0.27, color='orange', linestyle='--', alpha=0.5, label='Image bottom')
    plt.axhline(y=0.5, color='g', linestyle=':', alpha=0.5, label='Center')
    plt.axvline(x=0.5, color='red', linestyle=':', alpha=0.5, label='Center vertical')
    plt.axhline(y=0.73, color='b', linestyle='-.', alpha=0.5, label='Image top')

    # Screen corner markers
    plt.scatter([0.0], [1.0], color='red', s=100, label='Top corner left')
    plt.scatter([0.0], [0.0], color='blue', s=100, label='Top corner bottom')
    plt.scatter([0.5], [0.5], color='pink', s=100, label='Center')
    plt.scatter([1.0], [1.0], color='yellow', s=100, label='Bottom corner right')
    plt.scatter([1.0], [0.0], color='green', s=100, label='Bottom corner left')

    # Image corner markers (ROI corners)
    plt.scatter([0.27], [0.73], color='red', s=100, label='Image corner top left')
    plt.scatter([0.73], [0.73], color='blue', s=100, label='Image corner top right')
    plt.scatter([0.27], [0.27], color='yellow', s=100, label='Image corner bottom left')
    plt.scatter([0.73], [0.27], color='green', s=100, label='Image corner bottom right')

    # Draw ROI rectangle
    roi_rect = plt.Rectangle((0.27, 0.27), 0.46, 0.46, 
                           fill=False, color='red', 
                           linestyle='--', label='ROI')
    ax.add_patch(roi_rect)

    # Axis labels and limits
    plt.xlabel("norm_pos_x", labelpad=5)
    plt.ylabel("norm_pos_y", labelpad=5)
    plt.xlim([0, 1])
    plt.ylim([0, 1])

    # Legend positioning
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
    plt.legend(bbox_to_anchor=(1.15, 1), loc='upper left')

    # Grid
    plt.grid(True, alpha=0.3)

    plt.tight_layout()
    
    # Save plot in participant folder
    folder_path = create_participant_folder(participant_id)
    plt.savefig(f'{folder_path}/trial_{trial_id}_plot.jpg', bbox_inches='tight', dpi=300)
    plt.close()  # Close the plot to free memory

def process_participant_data(file_path):
    """Process data for a single participant"""
    # Extract participant ID from filename
    participant_id = os.path.basename(file_path).split('_')[1]
    
    print(f"\nProcessing Participant: {participant_id}")
    
    # Read the data
    recording_raw = pd.read_csv(file_path)
    
    # Filter data based on confidence
    recording_filtered = recording_raw[recording_raw.confidence > 0.8]
    
    # Filter data where stimulus_state is "ON"
    stimulus_on_data = recording_filtered[recording_filtered["stimulus_state"] == "ON"]
    
    # Initialize ROI data and timestamp lists
    roi_data_list = []
    roi_timestamps_list = []
    
    # Process all trials for this participant
    for trial_id in stimulus_on_data["trial"].unique():
        trial_data = stimulus_on_data[stimulus_on_data["trial"] == trial_id]
        phase = trial_data["phase"].iloc[0]  # Get the phase for this trial
        scatter_gaze_positions(
            trial_data, trial_id, 
            f"Phase {phase.replace('phase', '')}", 
            participant_id, 
            roi_data_list, 
            roi_timestamps_list
        )
    
    # Create DataFrames and save
    folder_path = create_participant_folder(participant_id)
    
    # ROI durations
    roi_df = pd.DataFrame(roi_data_list).sort_values(['phase', 'trial_id'])
    roi_df.to_csv(f'{folder_path}/roi_analysis_results.csv', index=False)
    
    # ROI timestamps
    timestamps_df = pd.DataFrame(roi_timestamps_list).sort_values(['phase', 'trial_id', 'timestamp'])
    timestamps_df.to_csv(f'{folder_path}/roi_timestamps.csv', index=False)
    
    return roi_df


def main():
    # Get all participant data files
    data_files = glob.glob('*_pupil.csv')
    
    # Process each participant's data
    all_participants_data = []
    
    for file_path in data_files:
        roi_df = process_participant_data(file_path)
        all_participants_data.append(roi_df)
    
    # Combine all participants' data and save to a master CSV
    if all_participants_data:
        master_df = pd.concat(all_participants_data, ignore_index=True)
        master_df.to_csv('all_participants_roi_analysis.csv', index=False)
        print("\nMaster ROI analysis file has been saved as 'all_participants_roi_analysis.csv'")

if __name__ == "__main__":
    main()