In [None]:
import os.path as op
import pyplr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns  
import numpy as np
import math
from scipy.signal import savgol_filter
import scipy.io as sio
import statistics as stats
from scipy.interpolate import interp1d
import glob

sns.set_context('notebook', font_scale=1.2)
from pyplr import graphing, utils, preproc
from pyplr.plr import PLR

np.set_printoptions(threshold=np.inf)

# Only use negative trials (original Neutral list removed)
Negative = [11,12,13,14,15,16,17,18,19,20,31,32,33,34,35,36,37,38,39,40,51,52,53,54,55,56,57,58,59,60]

def get_condition_label(trial_duration, median_duration):
    """Get the label for trial condition based on duration median split"""
    if trial_duration > median_duration:
        return "LongDurationVisual"
    else:
        return "ShortDurationVisual"

# List of participant data files - replace with actual paths
participant_files = [
    '001_sirisha_v.mat',
    #'002_ram_v.mat',
    #'003_deepak_v.mat',
    '004_brijesh_v.mat',
    '005_piyush_v.mat',
    #'006_hariharan_v.mat',
    '007_sapna_v.mat',
    '008_maitreyee_v.mat',
    '009_shubham_v.mat',
    #'011_anisha_v.mat'
]

# Structures to store data across all participants
all_participants_LongDurationVisual_trials = []
all_participants_ShortDurationVisual_trials = []
all_participants_LongDurationVisual_baselines = []
all_participants_ShortDurationVisual_baselines = []
all_participants_baseline_avg = []

# To store all trial durations for median calculation
all_trial_durations = []

# First pass to collect all trial durations for median calculation
print("First pass: Collecting trial durations for median calculation...")
for participant_file in participant_files:
    try:
        participant_id = participant_file.split('_')[0]  # Extract participant ID
        
        # Load the mat file data (with error handling)
        try:
            mat_data = sio.loadmat(participant_file)
        except Exception as e:
            print(f"Error loading file {participant_file}: {e}")
            continue  # Skip to next participant if file can't be loaded
        
        # Only process negative trials
        for trial_num in Negative:
            trial_key = f'Trial{trial_num}'
            if trial_key in mat_data:
                trial_data = mat_data[trial_key]
                
                # Get behavioral codes
                behavioral_codes = trial_data['BehavioralCodes'][0, 0]
                code_times = behavioral_codes['CodeTimes'][0, 0].flatten()
                code_numbers = behavioral_codes['CodeNumbers'][0, 0].flatten()
                
                # Find stimulus onset (code 3) and offset (code 4)
                time_2 = code_times[code_numbers == 2]
                time_3 = code_times[code_numbers == 3]
                
                if len(time_2) == 0 or len(time_3) == 0:
                    continue
                    
                # Calculate trial duration (stimulus duration)
                trial_duration = time_3[0] - time_2[0]
                all_trial_durations.append(trial_duration)
    
    except Exception as e:
        print(f"Error processing participant {participant_file} in first pass: {e}")

# Calculate median duration
if len(all_trial_durations) > 0:
    median_duration = np.median(all_trial_durations)
    print(f"Median trial duration: {median_duration} ms")
else:
    print("No valid trial durations found!")
    median_duration = 0

# Target length for resampling
target_length = 500

# For final averages across participants
all_LongDurationVisual_resampled = []
all_ShortDurationVisual_resampled = []

# NEW: Create dictionaries to store participant-level statistics
participant_stats = {}

# Process each participant
for participant_file in participant_files:
    try:
        print(f"Processing participant file: {participant_file}")
        participant_id = participant_file.split('_')[0]  # Extract participant ID
        
        # Lists to store trial data by condition for this participant
        LongDurationVisual_condition_trials = []
        ShortDurationVisual_condition_trials = []
        LongDurationVisual_condition_baselines = []
        ShortDurationVisual_condition_baselines = []
        baseline_avg_array = []
        
        # Store trial durations for this participant
        participant_trial_durations = {}
        
        # Load the mat file data (with error handling)
        try:
            mat_data = sio.loadmat(participant_file)
        except Exception as e:
            print(f"Error loading file {participant_file}: {e}")
            continue  # Skip to next participant if file can't be loaded
        
        # Only process negative trials
        for trial_num in Negative:
            trial_key = f'Trial{trial_num}'
            if trial_key in mat_data:
                trial_data = mat_data[trial_key]
                
                # Get behavioral codes
                behavioral_codes = trial_data['BehavioralCodes'][0, 0]
                code_times = behavioral_codes['CodeTimes'][0, 0].flatten()
                code_numbers = behavioral_codes['CodeNumbers'][0, 0].flatten()
                
                # Find stimulus onset (code 3) and offset (code 4)
                time_2 = code_times[code_numbers == 2]
                time_3 = code_times[code_numbers == 3]
                
                if len(time_2) == 0:
                    continue
                    
                # Calculate trial duration if possible
                if len(time_3) > 0:
                    trial_duration = time_3[0] - time_2[0]
                    participant_trial_durations[trial_num] = trial_duration
                else:
                    # Skip trials without clear offset
                    continue
                
                # Determine condition based on median split
                condition_label = get_condition_label(trial_duration, median_duration)
                 
                # Get raw data
                analog_data = trial_data['AnalogData'][0, 0]
                lsl = analog_data['LSL'][0, 0]
                lsl_data = lsl['LSL1'][0, 0]
                
                # Extract timestamps and diameter data
                timestamps = lsl_data[:, 0]
                diameter0 = lsl_data[:, 19].astype(float)
                diameter1 = lsl_data[:, 20].astype(float)
                average_diameter = (diameter0 + diameter1) / 2

                # Use global confidence
                confidence = lsl_data[:, 1]

                # MODIFIED: Only include data from time_2 - 1000ms up to time_2
                time_start = time_3[0] - 3000
                time_end = time_3[0]
                mask = (timestamps >= time_start) & (timestamps <= time_end)
                
                # Get masked timestamps and data
                masked_timestamps = timestamps[mask]
                masked_confidence = confidence[mask]
                masked_average_diameter = average_diameter[mask]

                # MODIFIED: Normalize timestamps to end at 0 (starting from -3000)
                normalized_timestamps = masked_timestamps - time_end  # This will make timestamps LongDurationVisual
               
                nan_mask = ~np.isnan(masked_average_diameter)
                working_pupil_data = masked_average_diameter[nan_mask]
                working_timestamps = normalized_timestamps[nan_mask]

                # Skip trials with insufficient data
                if len(working_pupil_data) < 50:
                    print(f"Skipping trial {trial_num} - insufficient data points: {len(working_pupil_data)}")
                    continue

                # Blink detection
                bfring = 50
                blink_indices = np.where(masked_confidence < 0.4)[0]
                blink_indices = (np.array(blink_indices)).flatten()
                blinklist = []
                for i in blink_indices:
                    b = list(range((i-bfring),(i+bfring)))
                    blinklist += b
                for i in range(len(blinklist)):
                    if blinklist[i] < 0:
                        blinklist[i] = 0
                final_blinks = np.unique(np.array(blinklist))
                
                permanent_finalblinks = []
                for i in final_blinks:
                    if i < len(working_pupil_data):
                        permanent_finalblinks.append(i)
                permanent_finalblinks = np.array(permanent_finalblinks)

                # Insert NaNs at blink positions
                for i in permanent_finalblinks:
                    if i < len(working_pupil_data):
                        working_pupil_data[i] = np.nan

                # Interpolate missing data
                if np.any(~np.isnan(working_pupil_data)):
                    ok = ~np.isnan(working_pupil_data)
                    xp = ok.ravel().nonzero()[0]
                    fp = working_pupil_data[~np.isnan(working_pupil_data)]
                    x = np.isnan(working_pupil_data).ravel().nonzero()[0]
            
                    if len(xp) > 0 and len(x) > 0:
                        working_pupil_data[np.isnan(working_pupil_data)] = np.interp(x, xp, fp)
                    elif len(x) > 0:
                        working_pupil_data[np.isnan(working_pupil_data)] = 0
                else:
                    print(f"Warning: All data is NaN in trial {trial_num}. Filling with zeros.")
                    working_pupil_data.fill(0)

                # Smoothing data using savgol filter
                filtered_pupil_data = savgol_filter(working_pupil_data, 11, 3)
                
                # Calculate baseline from first 50 data points 
                # Note: This is now the earliest part of the -3000 to 0 ms window
                baseline = filtered_pupil_data[0:50]
                avg = stats.mean(baseline)
                baseline_corrected = filtered_pupil_data - avg
                
                baseline_LongDurationVisual = filtered_pupil_data[0:50]
                avg_LongDurationVisual = stats.mean(baseline_LongDurationVisual)
                baseline_corrected_LongDurationVisual = filtered_pupil_data - avg_LongDurationVisual
                
                baseline_ShortDurationVisual = filtered_pupil_data[0:50]
                avg_ShortDurationVisual = stats.mean(baseline_ShortDurationVisual)
                baseline_corrected_ShortDurationVisual = filtered_pupil_data - avg_ShortDurationVisual
                
                baseline_avg_array.append(avg)
                
                # Store trial data and baseline by condition
                if condition_label == "LongDurationVisual":
                    LongDurationVisual_condition_trials.append({
                        'trial_num': trial_num,
                        'data': baseline_corrected_LongDurationVisual,
                        'timestamps': working_timestamps.copy(),
                        'duration': trial_duration
                    })
                    LongDurationVisual_condition_baselines.append(avg)
                elif condition_label == "ShortDurationVisual":
                    ShortDurationVisual_condition_trials.append({
                        'trial_num': trial_num,
                        'data': baseline_corrected_ShortDurationVisual,
                        'timestamps': working_timestamps.copy(),
                        'duration': trial_duration
                    })
                    ShortDurationVisual_condition_baselines.append(avg)
        
        # Convert baseline lists to numpy arrays
        LongDurationVisual_condition_baselines = np.array(LongDurationVisual_condition_baselines)
        ShortDurationVisual_condition_baselines = np.array(ShortDurationVisual_condition_baselines)
        baseline_avg_array = np.array(baseline_avg_array)
        
        # Print summary of the data for this participant
        print(f"  Number of LongDurationVisual trials: {len(LongDurationVisual_condition_trials)}")
        print(f"  Number of ShortDurationVisual trials: {len(ShortDurationVisual_condition_trials)}")
        
        # Function to resample trial data
        def resample_trial_data(trials, target_length=500):
            """Resample all trials to a consistent length for averaging"""
            resampled_data = []
            
            for trial in trials:
                data = trial['data']
                if len(data) < 2:  # Skip trials with insufficient data
                    continue
                    
                # Create interpolation function
                x_original = np.linspace(0, 1, len(data))
                x_new = np.linspace(0, 1, target_length)
                interpolator = interp1d(x_original, data, kind='linear', bounds_error=False, fill_value='extrapolate')
                
                # Resample data
                resampled = interpolator(x_new)
                resampled_data.append(resampled)
            
            return np.array(resampled_data)
        
        # Resample and average trials for this participant
        participant_LongDurationVisual_mean = None
        participant_ShortDurationVisual_mean = None
        
        if LongDurationVisual_condition_trials:
            LongDurationVisual_resampled = resample_trial_data([t for t in LongDurationVisual_condition_trials if len(t['data']) >= 2], target_length)
            if len(LongDurationVisual_resampled) > 0:
                participant_LongDurationVisual_mean = np.mean(LongDurationVisual_resampled, axis=0)
                participant_LongDurationVisual_std = np.std(LongDurationVisual_resampled, axis=0)
                participant_LongDurationVisual_sem = participant_LongDurationVisual_std / np.sqrt(len(LongDurationVisual_resampled))
                
                all_LongDurationVisual_resampled.append(participant_LongDurationVisual_mean)
                print(f"  Resampled LongDurationVisual data shape: {LongDurationVisual_resampled.shape}")
            else:
                print("  No valid LongDurationVisual trials for resampling")

        if ShortDurationVisual_condition_trials:
            ShortDurationVisual_resampled = resample_trial_data([t for t in ShortDurationVisual_condition_trials if len(t['data']) >= 2], target_length)
            if len(ShortDurationVisual_resampled) > 0:
                participant_ShortDurationVisual_mean = np.mean(ShortDurationVisual_resampled, axis=0)
                participant_ShortDurationVisual_std = np.std(ShortDurationVisual_resampled, axis=0)
                participant_ShortDurationVisual_sem = participant_ShortDurationVisual_std / np.sqrt(len(ShortDurationVisual_resampled))
                
                all_ShortDurationVisual_resampled.append(participant_ShortDurationVisual_mean)
                print(f"  Resampled ShortDurationVisual data shape: {ShortDurationVisual_resampled.shape}")
            else:
                print("  No valid ShortDurationVisual trials for resampling")
        
        # Calculate average durations for each condition
        avg_long_duration = np.mean([t['duration'] for t in LongDurationVisual_condition_trials]) if LongDurationVisual_condition_trials else np.nan
        avg_short_duration = np.mean([t['duration'] for t in ShortDurationVisual_condition_trials]) if ShortDurationVisual_condition_trials else np.nan
        
        # NEW: Store participant-level statistics
        participant_stats[participant_id] = {
            "LongDurationVisual_trials_count": len(LongDurationVisual_condition_trials),
            "ShortDurationVisual_trials_count": len(ShortDurationVisual_condition_trials),
            "LongDurationVisual_baseline_mean": np.mean(LongDurationVisual_condition_baselines) if len(LongDurationVisual_condition_baselines) > 0 else np.nan,
            "LongDurationVisual_baseline_std": np.std(LongDurationVisual_condition_baselines) if len(LongDurationVisual_condition_baselines) > 0 else np.nan,
            "ShortDurationVisual_baseline_mean": np.mean(ShortDurationVisual_condition_baselines) if len(ShortDurationVisual_condition_baselines) > 0 else np.nan,
            "ShortDurationVisual_baseline_std": np.std(ShortDurationVisual_condition_baselines) if len(ShortDurationVisual_condition_baselines) > 0 else np.nan,
            "LongDurationVisual_mean": participant_LongDurationVisual_mean,
            "LongDurationVisual_std": participant_LongDurationVisual_std if 'participant_LongDurationVisual_std' in locals() else None,
            "LongDurationVisual_sem": participant_LongDurationVisual_sem if 'participant_LongDurationVisual_sem' in locals() else None,
            "ShortDurationVisual_mean": participant_ShortDurationVisual_mean,
            "ShortDurationVisual_std": participant_ShortDurationVisual_std if 'participant_ShortDurationVisual_std' in locals() else None,
            "ShortDurationVisual_sem": participant_ShortDurationVisual_sem if 'participant_ShortDurationVisual_sem' in locals() else None,
            "avg_long_duration_ms": avg_long_duration,
            "avg_short_duration_ms": avg_short_duration
        }
        
        # Store data for this participant
        all_participants_LongDurationVisual_baselines.extend(LongDurationVisual_condition_baselines)
        all_participants_ShortDurationVisual_baselines.extend(ShortDurationVisual_condition_baselines)
        all_participants_baseline_avg.extend(baseline_avg_array)
    
    except Exception as e:
        print(f"Error processing participant {participant_file}: {e}")

# Convert participant data to numpy arrays
all_LongDurationVisual_resampled = np.array(all_LongDurationVisual_resampled)
all_ShortDurationVisual_resampled = np.array(all_ShortDurationVisual_resampled)
all_participants_LongDurationVisual_baselines = np.array(all_participants_LongDurationVisual_baselines)
all_participants_ShortDurationVisual_baselines = np.array(all_participants_ShortDurationVisual_baselines)
all_participants_baseline_avg = np.array(all_participants_baseline_avg)

print(f"\nProcessed data from {len(all_LongDurationVisual_resampled)} participants for LongDurationVisual")
print(f"Processed data from {len(all_ShortDurationVisual_resampled)} participants for ShortDurationVisual")

# Calculate grand averages across participants
if len(all_LongDurationVisual_resampled) > 0:
    LongDurationVisual_grand_mean = np.mean(all_LongDurationVisual_resampled, axis=0)
    LongDurationVisual_grand_std = np.std(all_LongDurationVisual_resampled, axis=0)
    LongDurationVisual_grand_sem = LongDurationVisual_grand_std / np.sqrt(len(all_LongDurationVisual_resampled))
else:
    print("Warning: No valid LongDurationVisual data across participants")
    
if len(all_ShortDurationVisual_resampled) > 0:
    ShortDurationVisual_grand_mean = np.mean(all_ShortDurationVisual_resampled, axis=0)
    ShortDurationVisual_grand_std = np.std(all_ShortDurationVisual_resampled, axis=0)
    ShortDurationVisual_grand_sem = ShortDurationVisual_grand_std / np.sqrt(len(all_ShortDurationVisual_resampled))
else:
    print("Warning: No valid ShortDurationVisual data across participants")

# NEW: Export participant-level summary statistics to CSV
participant_summary_data = []
for participant_id, stats_dict in participant_stats.items():
    # Calculate summary statistics for this participant
    LongDurationVisual_mean_value = np.nanmean(stats_dict["LongDurationVisual_mean"]) if stats_dict["LongDurationVisual_mean"] is not None else np.nan
    LongDurationVisual_max_value = np.nanmax(stats_dict["LongDurationVisual_mean"]) if stats_dict["LongDurationVisual_mean"] is not None else np.nan
    LongDurationVisual_min_value = np.nanmin(stats_dict["LongDurationVisual_mean"]) if stats_dict["LongDurationVisual_mean"] is not None else np.nan
    
    ShortDurationVisual_mean_value = np.nanmean(stats_dict["ShortDurationVisual_mean"]) if stats_dict["ShortDurationVisual_mean"] is not None else np.nan
    ShortDurationVisual_max_value = np.nanmax(stats_dict["ShortDurationVisual_mean"]) if stats_dict["ShortDurationVisual_mean"] is not None else np.nan
    ShortDurationVisual_min_value = np.nanmin(stats_dict["ShortDurationVisual_mean"]) if stats_dict["ShortDurationVisual_mean"] is not None else np.nan
    
    participant_summary_data.append({
        "participant_id": participant_id,
        "LongDurationVisual_trials_count": stats_dict["LongDurationVisual_trials_count"],
        "ShortDurationVisual_trials_count": stats_dict["ShortDurationVisual_trials_count"],
        "LongDurationVisual_baseline_mean": stats_dict["LongDurationVisual_baseline_mean"],
        "LongDurationVisual_baseline_std": stats_dict["LongDurationVisual_baseline_std"],
        "ShortDurationVisual_baseline_mean": stats_dict["ShortDurationVisual_baseline_mean"],
        "ShortDurationVisual_baseline_std": stats_dict["ShortDurationVisual_baseline_std"],
        "LongDurationVisual_response_mean": LongDurationVisual_mean_value,
        "LongDurationVisual_response_max": LongDurationVisual_max_value,
        "LongDurationVisual_response_min": LongDurationVisual_min_value,
        "ShortDurationVisual_response_mean": ShortDurationVisual_mean_value,
        "ShortDurationVisual_response_max": ShortDurationVisual_max_value,
        "ShortDurationVisual_response_min": ShortDurationVisual_min_value,
        "avg_long_duration_ms": stats_dict["avg_long_duration_ms"],
        "avg_short_duration_ms": stats_dict["avg_short_duration_ms"]
    })

# Save participant summary data to CSV
participant_summary_df = pd.DataFrame(participant_summary_data)
participant_summary_df.to_csv("pre_stimulus_participant_summary_statistics_duration_negative_only.csv", index=False)
print("Saved participant summary statistics to pre_stimulus_participant_summary_statistics_duration_negative_only.csv")

# MODIFIED: Create time axis from -3000 to 0 ms
time_axis = np.linspace(-3000, 0, target_length)

# Create DataFrames for time-series data
LongDurationVisual_timeseries_data = {"time_ms": time_axis}
ShortDurationVisual_timeseries_data = {"time_ms": time_axis}

# Add grand mean and SEM to DataFrames
if 'LongDurationVisual_grand_mean' in locals():
    LongDurationVisual_timeseries_data["grand_mean"] = LongDurationVisual_grand_mean
    LongDurationVisual_timeseries_data["grand_std"] = LongDurationVisual_grand_std
    LongDurationVisual_timeseries_data["grand_sem"] = LongDurationVisual_grand_sem

if 'ShortDurationVisual_grand_mean' in locals():
    ShortDurationVisual_timeseries_data["grand_mean"] = ShortDurationVisual_grand_mean
    ShortDurationVisual_timeseries_data["grand_std"] = ShortDurationVisual_grand_std
    ShortDurationVisual_timeseries_data["grand_sem"] = ShortDurationVisual_grand_sem

# Add individual participant data to DataFrames
for participant_id, stats_dict in participant_stats.items():
    if stats_dict["LongDurationVisual_mean"] is not None:
        LongDurationVisual_timeseries_data[f"participant_{participant_id}"] = stats_dict["LongDurationVisual_mean"]
    
    if stats_dict["ShortDurationVisual_mean"] is not None:
        ShortDurationVisual_timeseries_data[f"participant_{participant_id}"] = stats_dict["ShortDurationVisual_mean"]

# Save time-series data to CSV
LongDurationVisual_timeseries_df = pd.DataFrame(LongDurationVisual_timeseries_data)
LongDurationVisual_timeseries_df.to_csv("pre_stimulus_LongDurationVisual_condition_timeseries_negative_only.csv", index=False)
print("Saved LongDurationVisual time-series data to pre_stimulus_LongDurationVisual_condition_timeseries_negative_only.csv")

ShortDurationVisual_timeseries_df = pd.DataFrame(ShortDurationVisual_timeseries_data)
ShortDurationVisual_timeseries_df.to_csv("pre_stimulus_ShortDurationVisual_condition_timeseries_negative_only.csv", index=False)
print("Saved ShortDurationVisual time-series data to pre_stimulus_ShortDurationVisual_condition_timeseries_negative_only.csv")

# Export group-level summary statistics
mean_LongDurationVisual_baseline = np.mean(all_participants_LongDurationVisual_baselines)
std_LongDurationVisual_baseline = np.std(all_participants_LongDurationVisual_baselines)
sem_LongDurationVisual_baseline = std_LongDurationVisual_baseline / np.sqrt(len(all_participants_LongDurationVisual_baselines))

mean_ShortDurationVisual_baseline = np.mean(all_participants_ShortDurationVisual_baselines)
std_ShortDurationVisual_baseline = np.std(all_participants_ShortDurationVisual_baselines)
sem_ShortDurationVisual_baseline = std_ShortDurationVisual_baseline / np.sqrt(len(all_participants_ShortDurationVisual_baselines))

# Calculate average durations across all trials for each condition
avg_long_duration_all = np.mean([stats_dict["avg_long_duration_ms"] for _, stats_dict in participant_stats.items() 
                               if not np.isnan(stats_dict["avg_long_duration_ms"])])
avg_short_duration_all = np.mean([stats_dict["avg_short_duration_ms"] for _, stats_dict in participant_stats.items() 
                                if not np.isnan(stats_dict["avg_short_duration_ms"])])

group_summary_data = {
    "statistic": ["baseline_mean", "baseline_std", "baseline_sem", 
                 "response_mean", "response_std", "response_sem",
                 "peak_response", "peak_time_ms", "participant_count",
                 "median_split_value_ms", "avg_duration_ms"],
    "LongDurationVisual_condition": [
        mean_LongDurationVisual_baseline,
        std_LongDurationVisual_baseline,
        sem_LongDurationVisual_baseline,
        np.mean(LongDurationVisual_grand_mean) if 'LongDurationVisual_grand_mean' in locals() else np.nan,
        np.mean(LongDurationVisual_grand_std) if 'LongDurationVisual_grand_std' in locals() else np.nan,
        np.mean(LongDurationVisual_grand_sem) if 'LongDurationVisual_grand_sem' in locals() else np.nan,
        np.max(LongDurationVisual_grand_mean) if 'LongDurationVisual_grand_mean' in locals() else np.nan,
        time_axis[np.argmax(LongDurationVisual_grand_mean)] if 'LongDurationVisual_grand_mean' in locals() else np.nan,
        len(all_LongDurationVisual_resampled),
        median_duration,
        avg_long_duration_all
    ],
    "ShortDurationVisual_condition": [
        mean_ShortDurationVisual_baseline,
        std_ShortDurationVisual_baseline,
        sem_ShortDurationVisual_baseline,
        np.mean(ShortDurationVisual_grand_mean) if 'ShortDurationVisual_grand_mean' in locals() else np.nan,
        np.mean(ShortDurationVisual_grand_std) if 'ShortDurationVisual_grand_std' in locals() else np.nan,
        np.mean(ShortDurationVisual_grand_sem) if 'ShortDurationVisual_grand_sem' in locals() else np.nan,
        np.max(ShortDurationVisual_grand_mean) if 'ShortDurationVisual_grand_mean' in locals() else np.nan,
        time_axis[np.argmax(ShortDurationVisual_grand_mean)] if 'ShortDurationVisual_grand_mean' in locals() else np.nan,
        len(all_ShortDurationVisual_resampled),
        median_duration,
        avg_short_duration_all
    ]
}

# Save group summary data to CSV
group_summary_df = pd.DataFrame(group_summary_data)
group_summary_df.to_csv("pre_stimulus_group_summary_statistics_duration_negative_only.csv", index=False)
print("Saved group summary statistics to pre_stimulus_group_summary_statistics_duration_negative_only.csv")

# Calculate percentage change for each condition
if 'LongDurationVisual_grand_mean' in locals() and 'ShortDurationVisual_grand_mean' in locals():
    # Calculate percentage change for each participant and save to CSV
    percentage_change_data = {"time_ms": time_axis}
    
    # Add grand mean percentage changes
    mean_LongDurationVisual_baseline = np.mean(all_participants_LongDurationVisual_baselines)
    mean_ShortDurationVisual_baseline = np.mean(all_participants_ShortDurationVisual_baselines)
    
    LongDurationVisual_percent_change = (LongDurationVisual_grand_mean / mean_LongDurationVisual_baseline) * 100
    ShortDurationVisual_percent_change = (ShortDurationVisual_grand_mean / mean_ShortDurationVisual_baseline) * 100
    
    percentage_change_data["LongDurationVisual_percent_change"] = LongDurationVisual_percent_change
    percentage_change_data["ShortDurationVisual_percent_change"] = ShortDurationVisual_percent_change
    
    # Save percentage change data to CSV
    percentage_change_df = pd.DataFrame(percentage_change_data)
    percentage_change_df.to_csv("pre_stimulus_percentage_change_timeseries_duration_negative_only.csv", index=False)
    print("Saved percentage change time-series to pre_stimulus_percentage_change_timeseries_duration_negative_only.csv")

# Create plots with grand averages and SEM
if 'LongDurationVisual_grand_mean' in locals() and 'ShortDurationVisual_grand_mean' in locals():
    # Plot 1: Average Pupil Size Comparison
    f = plt.figure(8)
    f.set_figheight(10) 
    f.set_figwidth(20)  
    
    plt.plot(time_axis, LongDurationVisual_grand_mean, 'bo', label="Long Duration")
    plt.fill_between(time_axis, 
                     LongDurationVisual_grand_mean - LongDurationVisual_grand_sem, 
                     LongDurationVisual_grand_mean + LongDurationVisual_grand_sem, 
                     color='blue', alpha=0.2)
    
    plt.plot(time_axis, ShortDurationVisual_grand_mean, 'ro', label="Short Duration")
    plt.fill_between(time_axis, 
                     ShortDurationVisual_grand_mean - ShortDurationVisual_grand_sem, 
                     ShortDurationVisual_grand_mean + ShortDurationVisual_grand_sem, 
                     color='red', alpha=0.2)
    
    plt.xlabel("Time in milliseconds (relative to stimulus onset)", fontsize="20")
    plt.ylabel("Pupil Diameter in arbitrary units", fontsize="20")
    plt.legend(loc="upper left", fontsize="16") 
    plt.title("Average Pre-Stimulus Pupil Size Comparison-ShortDurationVisual vs LongDurationVisual (Negative Trials Only)")
    plt.axvline(x=0, color='k', linestyle='--')  # Mark the stimulus onset
    plt.savefig("pre_stimulus_pupil_time_series_comparison_v_negative_only.png")

    # Plot 2: Subtractive Baseline Corrected Pupil Size Series
        # Plot 2: Subtractive Baseline Corrected Pupil Size Series
    f = plt.figure(11)
    f.set_figheight(10)
    f.set_figwidth(20)
    
    plt.plot(time_axis, ShortDurationVisual_grand_mean, "bo", label="ShortDurationVisual")
    plt.fill_between(time_axis, 
                     ShortDurationVisual_grand_mean - ShortDurationVisual_grand_sem, 
                     ShortDurationVisual_grand_mean + ShortDurationVisual_grand_sem, 
                     color='blue', alpha=0.2)
    
    plt.plot(time_axis, LongDurationVisual_grand_mean, "ro", label="LongDurationVisual")
    plt.fill_between(time_axis, 
                     LongDurationVisual_grand_mean - LongDurationVisual_grand_sem, 
                     LongDurationVisual_grand_mean + LongDurationVisual_grand_sem, 
                     color='red', alpha=0.2)
    
    plt.xlabel("Time in milliseconds (relative to stimulus onset)", fontsize="20")
    plt.ylabel("Change in Pupil size over time wrt Baseline size", fontsize="20")
    plt.legend(loc="upper left", fontsize="16")
    plt.title("Pre-Stimulus Subtractive Baseline Corrected Pupil Size Series")
    plt.axvline(x=0, color='k', linestyle='--')  # Mark the stimulus onset
    plt.savefig("pre_stimulus_baseline_corrected_pupil_size_v.png")

    # Plot 3: Percentage Change
    # Calculate percentage change for grand means
    mean_ShortDurationVisual_baseline = np.mean(all_participants_ShortDurationVisual_baselines)
    mean_LongDurationVisual_baseline = np.mean(all_participants_LongDurationVisual_baselines)
    
    ShortDurationVisual_percent_change = (ShortDurationVisual_grand_mean / mean_ShortDurationVisual_baseline) * 100
    LongDurationVisual_percent_change = (LongDurationVisual_grand_mean / mean_LongDurationVisual_baseline) * 100
    
    # Calculate percentage change for each participant
    ShortDurationVisual_percent_changes = []
    LongDurationVisual_percent_changes = []
    
    for i in range(len(all_ShortDurationVisual_resampled)):
        ShortDurationVisual_percent_changes.append((all_ShortDurationVisual_resampled[i] / mean_ShortDurationVisual_baseline) * 100)
    
    for i in range(len(all_LongDurationVisual_resampled)):
        LongDurationVisual_percent_changes.append((all_LongDurationVisual_resampled[i] / mean_LongDurationVisual_baseline) * 100)
    
    # Calculate SEM for percentage changes
    ShortDurationVisual_sem = np.std(np.array(ShortDurationVisual_percent_changes), axis=0) / np.sqrt(len(ShortDurationVisual_percent_changes))
    LongDurationVisual_sem = np.std(np.array(LongDurationVisual_percent_changes), axis=0) / np.sqrt(len(LongDurationVisual_percent_changes))
    
    f = plt.figure(12)
    f.set_figheight(10)
    f.set_figwidth(20)
    
    plt.plot(time_axis, ShortDurationVisual_percent_change, 'bo', label="ShortDurationVisual")
    plt.fill_between(time_axis, 
                     ShortDurationVisual_percent_change - ShortDurationVisual_sem, 
                     ShortDurationVisual_percent_change + ShortDurationVisual_sem, 
                     color='blue', alpha=0.2)
    
    plt.plot(time_axis, LongDurationVisual_percent_change, 'ro', label="LongDurationVisual")
    plt.fill_between(time_axis, 
                     LongDurationVisual_percent_change - LongDurationVisual_sem, 
                     LongDurationVisual_percent_change + LongDurationVisual_sem, 
                     color='red', alpha=0.2)
    
    plt.xlabel("Time in milliseconds (relative to stimulus onset)", fontsize="20")
    plt.ylabel("Average Percentage Change in Pupil Size from the Baseline Size", fontsize="20")
    plt.legend(loc="upper left", fontsize="16")
    plt.title("Pre-Stimulus Average Percentage Change in Pupil Size")
    plt.axvline(x=0, color='k', linestyle='--')  # Mark the stimulus onset
    plt.savefig("pre_stimulus_percentage_change_pupil_size_v.png")

    # Plot 4: Z-scored data
    # Combine ShortDurationVisual and LongDurationVisual means into one array
    final_reward = list(ShortDurationVisual_grand_mean)
    final_reward.extend(list(LongDurationVisual_grand_mean))
    mean_final_reward = stats.mean(final_reward)
    std_final = stats.stdev(final_reward)

    # Calculate z-scores for all data points
    zscored = []
    for i in range(len(final_reward)):
        z = (final_reward[i] - mean_final_reward) / std_final
        zscored.append(z)

    # Split the z-scored data back into ShortDurationVisual and LongDurationVisuals
    ShortDurationVisual_z = np.array(zscored[:target_length])  # First half is ShortDurationVisual
    LongDurationVisual_z = np.array(zscored[target_length:])  # Second half is LongDurationVisual

    # Calculate baseline z-scores (first 50 points)
    baseline_ShortDurationVisual_z = ShortDurationVisual_z[0:50]
    baseline_LongDurationVisual_z = LongDurationVisual_z[0:50]

    # Calculate mean baseline z-scores
    mean_base_ShortDurationVisual = stats.mean(baseline_ShortDurationVisual_z)
    mean_base_LongDurationVisual = stats.mean(baseline_LongDurationVisual_z)

    # Calculate baseline-corrected z-scores
    final_z_b_ShortDurationVisual = ShortDurationVisual_z - mean_base_ShortDurationVisual
    final_z_b_LongDurationVisual = LongDurationVisual_z - mean_base_LongDurationVisual

    # Calculate percentage change for z-scores
    percentage_ShortDurationVisual = (final_z_b_ShortDurationVisual / abs(mean_base_ShortDurationVisual)) * 100 if mean_base_ShortDurationVisual != 0 else np.zeros_like(final_z_b_ShortDurationVisual)
    percentage_LongDurationVisual = (final_z_b_LongDurationVisual / abs(mean_base_LongDurationVisual)) * 100 if mean_base_LongDurationVisual != 0 else np.zeros_like(final_z_b_LongDurationVisual)
    
    # Calculate z-scores for each participant for SEM
    all_ShortDurationVisual_z = []
    all_LongDurationVisual_z = []
    
    for i in range(len(all_ShortDurationVisual_resampled)):
        # Z-score calculation for this participant
        participant_data = list(all_ShortDurationVisual_resampled[i])
        participant_data.extend(list(all_LongDurationVisual_resampled[i]))
        mean_participant = stats.mean(participant_data)
        std_participant = stats.stdev(participant_data)
        
        # Calculate z-scores
        participant_z = []
        for val in participant_data:
            z = (val - mean_participant) / std_participant
            participant_z.append(z)
        
        # Split and store
        all_ShortDurationVisual_z.append(participant_z[:target_length])
        all_LongDurationVisual_z.append(participant_z[target_length:])
    
    # Convert to numpy arrays
    all_ShortDurationVisual_z = np.array(all_ShortDurationVisual_z)
    all_LongDurationVisual_z = np.array(all_LongDurationVisual_z)
    
    # Calculate baseline-corrected z-scores for each participant
    all_final_z_b_ShortDurationVisual = []
    all_final_z_b_LongDurationVisual = []
    all_percentage_ShortDurationVisual = []
    all_percentage_LongDurationVisual = []
    
    for i in range(len(all_ShortDurationVisual_z)):
        # Calculate participant's baseline
        participant_baseline_short = np.mean(all_ShortDurationVisual_z[i][0:50])
        participant_baseline_long = np.mean(all_LongDurationVisual_z[i][0:50])
        
        # Calculate baseline-corrected z-scores
        z_b_short = all_ShortDurationVisual_z[i] - participant_baseline_short
        z_b_long = all_LongDurationVisual_z[i] - participant_baseline_long
        
        all_final_z_b_ShortDurationVisual.append(z_b_short)
        all_final_z_b_LongDurationVisual.append(z_b_long)
        
        
        # Calculate percentage change
        pct_short = (z_b_short / abs(participant_baseline_short)) * 100 if participant_baseline_short != 0 else np.zeros_like(z_b_short)
        pct_long = (z_b_long / abs(participant_baseline_long)) * 100 if participant_baseline_long != 0 else np.zeros_like(z_b_long)
        
        
        all_percentage_ShortDurationVisual.append(pct_short)
        all_percentage_LongDurationVisual.append(pct_long)
    
    # Convert to numpy arrays
    all_percentage_ShortDurationVisual = np.array(all_percentage_ShortDurationVisual)
    all_percentage_LongDurationVisual = np.array(all_percentage_LongDurationVisual)
    
    # Calculate SEM for percentage change
    percentage_ShortDurationVisual_sem = np.std(all_percentage_ShortDurationVisual, axis=0) / np.sqrt(len(all_percentage_ShortDurationVisual))
    percentage_LongDurationVisual_sem = np.std(all_percentage_LongDurationVisual, axis=0) / np.sqrt(len(all_percentage_LongDurationVisual))
    
    # For original z-scores (maintain the existing code)
    ShortDurationVisual_z_sem = np.std(all_ShortDurationVisual_z, axis=0) / np.sqrt(len(all_ShortDurationVisual_z))
    LongDurationVisual_z_sem = np.std(all_LongDurationVisual_z, axis=0) / np.sqrt(len(all_LongDurationVisual_z))
    
    # NEW: Save z-scored data to CSV
    zscored_data = {
        "time_ms": time_axis,
        "ShortDurationVisual_zscore": final_z_b_ShortDurationVisual,
        "LongDurationVisual_zscore": final_z_b_LongDurationVisual,
        "ShortDurationVisual_percentage": percentage_ShortDurationVisual,
        "LongDurationVisual_percentage": percentage_LongDurationVisual
    }
    
    zscored_df = pd.DataFrame(zscored_data)
    zscored_df.to_csv("zscored_data_v.csv", index=False)
    print("Saved z-scored data to zscored_data_v.csv")

  
    
    # Plot for percentage change in Z-scores
    
    f = plt.figure(14)
    f.set_figheight(10)
    f.set_figwidth(20)
    
    plt.plot(time_axis, percentage_ShortDurationVisual, 'bo', label="ShortDurationVisual")
    plt.fill_between(time_axis, 
                     percentage_ShortDurationVisual - percentage_ShortDurationVisual_sem, 
                     percentage_ShortDurationVisual + percentage_ShortDurationVisual_sem, 
                     color='blue', alpha=0.2)
    
    plt.plot(time_axis, percentage_LongDurationVisual, 'ro', label="LongDurationVisual")
    plt.fill_between(time_axis, 
                     percentage_LongDurationVisual - percentage_LongDurationVisual_sem, 
                     percentage_LongDurationVisual + percentage_LongDurationVisual_sem, 
                     color='red', alpha=0.2)
    
    plt.xlabel("Time in milliseconds", fontsize="20")
    plt.ylabel("Average Percentage Change in Pupil Size (z-scored)", fontsize="20")
    plt.legend(loc="upper left", fontsize="16")
    plt.title("Average Percentage Change in Pupil Size (z-scored)")
    # Set y-axis limits and ticks
    plt.ylim(-700, 450)
    plt.yticks(np.arange(-700, 451, 50))  # -350, -600, ..., 100
    
    plt.savefig("zscored_percentage_change_v.png")
    
    '''
    f = plt.figure(figsize=(3, 2.5))  # Small figure
    plt.plot(time_axis, percentage_ShortDurationVisual, 'bo', label="Short", linewidth=1)
    plt.fill_between(time_axis, 
                 percentage_ShortDurationVisual - percentage_ShortDurationVisual_sem, 
                 percentage_ShortDurationVisual + percentage_ShortDurationVisual_sem, 
                 color='blue', alpha=0.2)

    plt.plot(time_axis, percentage_LongDurationVisual, 'ro', label="Long", linewidth=1)
    plt.fill_between(time_axis, 
                 percentage_LongDurationVisual - percentage_LongDurationVisual_sem, 
                 percentage_LongDurationVisual + percentage_LongDurationVisual_sem, 
                 color='red', alpha=0.2)

    # Use smaller but readable fonts
    plt.xlabel("Time (ms)", fontsize=8)
    plt.ylabel("% Change in Pupil Size", fontsize=8)
    plt.title("% Change in Pupil Size (Visual)", fontsize=9)
    plt.legend(loc="lower left", fontsize=6, frameon=False)

    # Axis customization
    plt.ylim(-450, 150)
    plt.yticks(np.arange(-450, 151, 150), fontsize=6)
    plt.xticks(fontsize=6)

    # Tight layout for small space and clean export
    plt.tight_layout()
    plt.savefig("small_zscore_plot.png", dpi=600, bbox_inches='tight')
    plt.show()
    '''
    
  
    
    


    # Plot 5: Histogram of baseline averages
    fig, ax = plt.subplots(figsize=(20, 10))
    ax.hist(all_participants_baseline_avg, bins=20, alpha=0.7, label="All trials")
    ax.hist(all_participants_ShortDurationVisual_baselines, bins=20, alpha=0.5, label="ShortDurationVisual")
    ax.hist(all_participants_LongDurationVisual_baselines, bins=20, alpha=0.5, label="LongDurationVisual")
    ax.legend()
    ax.set_title("Baseline Pupil Diameter Distribution by Condition")
    ax.set_xlabel("Baseline Pupil Diameter (arbitrary units)")
    ax.set_ylabel("Count")
    plt.savefig("baseline_diameter_histogram_v.png")
    
    # Plot 6: Average pupil responses with SEM
    fig, ax = plt.subplots(figsize=(20, 10))
    
    ax.plot(time_axis, ShortDurationVisual_grand_mean, label="ShortDurationVisual", color="blue")
    ax.fill_between(time_axis, 
                    ShortDurationVisual_grand_mean - ShortDurationVisual_grand_sem, 
                    ShortDurationVisual_grand_mean + ShortDurationVisual_grand_sem, 
                    color="blue", alpha=0.2)
    
    ax.plot(time_axis, LongDurationVisual_grand_mean, label="LongDurationVisual", color="red")
    ax.fill_between(time_axis, 
                    LongDurationVisual_grand_mean - LongDurationVisual_grand_sem, 
                    LongDurationVisual_grand_mean + LongDurationVisual_grand_sem, 
                    color="red", alpha=0.2)
    
    ax.legend()
    ax.set_title("Average Pupil Response by Condition with SEM")
    ax.set_xlabel("Time (ms)")
    ax.set_ylabel("Baseline-corrected Pupil Diameter")
    plt.savefig("average_pupil_response_with_sem_v.png")

    # NEW: Export additional statistical measurements
    # Calculate peak values and time-to-peak
    ShortDurationVisual_peak_value = np.max(ShortDurationVisual_grand_mean)
    ShortDurationVisual_peak_time = time_axis[np.argmax(ShortDurationVisual_grand_mean)]
    
    LongDurationVisual_peak_value = np.max(LongDurationVisual_grand_mean)
    LongDurationVisual_peak_time = time_axis[np.argmax(LongDurationVisual_grand_mean)]
    
    # Area under the curve (simple calculation)
    ShortDurationVisual_auc = np.trapz(ShortDurationVisual_grand_mean, time_axis)
    LongDurationVisual_auc = np.trapz(LongDurationVisual_grand_mean, time_axis)
    
    # Export these additional metrics
    additional_metrics = {
        "metric": ["peak_value", "peak_time_ms", "area_under_curve"],
        "ShortDurationVisual": [ShortDurationVisual_peak_value, ShortDurationVisual_peak_time, ShortDurationVisual_auc],
        "LongDurationVisual": [LongDurationVisual_peak_value, LongDurationVisual_peak_time, LongDurationVisual_auc]
    }
    
    additional_metrics_df = pd.DataFrame(additional_metrics)
    additional_metrics_df.to_csv("additional_pupil_metrics_v.csv", index=False)
    print("Saved additional pupil response metrics to additional_pupil_metrics_v.csv")
    
    # NEW: Save raw trial data counts
    trial_counts = {
        "condition": ["ShortDurationVisual", "LongDurationVisual"],
        "trial_count": [len(all_participants_ShortDurationVisual_baselines), len(all_participants_LongDurationVisual_baselines)],
        "participant_count": [len(all_ShortDurationVisual_resampled), len(all_LongDurationVisual_resampled)]
    }
    
    trial_counts_df = pd.DataFrame(trial_counts)
    trial_counts_df.to_csv("trial_counts_v.csv", index=False)
    print("Saved trial count data to trial_counts_v.csv")

# NEW: Create a comprehensive summary report with all key statistics
if 'ShortDurationVisual_grand_mean' in locals() and 'LongDurationVisual_grand_mean' in locals():
    summary_report = {
        "metric": [
            "Total participants",
            "Total ShortDurationVisual trials",
            "Total LongDurationVisual trials",
            "Mean ShortDurationVisual baseline",
            "Mean LongDurationVisual baseline",
            "ShortDurationVisual peak amplitude",
            "LongDurationVisual peak amplitude",
            "ShortDurationVisual peak time (ms)",
            "LongDurationVisual peak time (ms)",
            "ShortDurationVisual AUC",
            "LongDurationVisual AUC",
            "ShortDurationVisual mean response",
            "LongDurationVisual mean response"
        ],
        "value": [
            len(all_ShortDurationVisual_resampled),
            len(all_participants_ShortDurationVisual_baselines),
            len(all_participants_LongDurationVisual_baselines),
            mean_ShortDurationVisual_baseline,
            mean_LongDurationVisual_baseline,
            ShortDurationVisual_peak_value,
            LongDurationVisual_peak_value,
            ShortDurationVisual_peak_time,
            LongDurationVisual_peak_time,
            ShortDurationVisual_auc,
            LongDurationVisual_auc,
            np.mean(ShortDurationVisual_grand_mean),
            np.mean(LongDurationVisual_grand_mean)
        ]
    }
    
    summary_report_df = pd.DataFrame(summary_report)
    summary_report_df.to_csv("pupillometry_summary_report_v.csv", index=False)
    print("Saved comprehensive summary report to pupillometry_summary_report_v.csv")

# Check if we have both conditions to calculate difference measures
if 'ShortDurationVisual_grand_mean' in locals() and 'LongDurationVisual_grand_mean' in locals():
    # Calculate difference between conditions
    condition_difference = LongDurationVisual_grand_mean - ShortDurationVisual_grand_mean
    
    # Export difference data
    difference_data = {
        "time_ms": time_axis,
        "LongDurationVisual_minus_ShortDurationVisual": condition_difference
    }
    
    difference_df = pd.DataFrame(difference_data)
    difference_df.to_csv("condition_difference_v.csv", index=False)
    print("Saved condition difference data to condition_difference_v.csv")
    
    # Plot difference between conditions
    fig, ax = plt.subplots(figsize=(20, 10))
    ax.plot(time_axis, condition_difference, 'g-', label="LongDurationVisual - ShortDurationVisual")
    ax.axhline(y=0, color='k', linestyle='--', alpha=0.3)
    ax.legend()
    ax.set_title("Difference Between Conditions (LongDurationVisual - ShortDurationVisual)")
    ax.set_xlabel("Time (ms)")
    ax.set_ylabel("Difference in Pupil Diameter")
    plt.savefig("condition_difference_v.png")

print("All data processing and exports completed successfully!")
plt.show()