In [1]:
import os
import re
import pandas as pd
import numpy as np
import scipy.io

def reconstruct_trial_data(mat_file: dict) -> pd.DataFrame | None:
    """
    This function reconstructs trial-by-trial data, including reaction times,
    from the low-level event log stored in the 'myscreen' variable.
    """
    if 'myscreen' not in mat_file or 'task' not in mat_file or not isinstance(mat_file.get('task'), list) or len(mat_file['task']) < 2:
        return None

    myscreen = mat_file['myscreen']
    task = mat_file['task'][1]
    
    if not isinstance(myscreen.get('events'), dict):
        return None
        
    events = pd.DataFrame({
        'tracenum': myscreen['events'].get('tracenum', np.array([])).flatten(),
        'data': myscreen['events'].get('data', np.array([])).flatten(),
        'time': myscreen['events'].get('time', np.array([])).flatten()
    })

    segment_trace_num = task.get('segmentTrace')
    response_trace_num = task.get('responseTrace')
    if segment_trace_num is None or response_trace_num is None:
        return None

    reconstructed_trials = []
    current_trial_num = -1
    expt_start_time = -1
    segment_start_time = -1

    for _, event in events.iterrows():
        if event['tracenum'] == segment_trace_num:
            segment_num = int(event['data'])
            if expt_start_time == -1: expt_start_time = event['time']
            if segment_num == 1:
                current_trial_num += 1
                reconstructed_trials.append({'trial_index': current_trial_num + 1, 'trial_time': event['time'] - expt_start_time})
            segment_start_time = event['time']
        
        elif event['tracenum'] == response_trace_num:
            if 0 <= current_trial_num < len(reconstructed_trials):
                if 'reaction_time' not in reconstructed_trials[current_trial_num]:
                    reconstructed_trials[current_trial_num]['reaction_time'] = event['time'] - segment_start_time
                    reconstructed_trials[current_trial_num]['raw_response_time'] = event['time'] - expt_start_time
    
    return pd.DataFrame(reconstructed_trials)


def load_experiments_from_mat(base_path: str, experiment_configs: dict) -> dict[str, pd.DataFrame]:
    """
    Loops through all experiment folders and files, calling the
    reconstruction function for each file to assemble a complete DataFrame.
    """
    all_experiments_data = {}

    for exp_folder, config in experiment_configs.items():
        all_blocks_list = []
        exp_path = os.path.join(base_path, exp_folder)
        if not os.path.isdir(exp_path):
            continue

        for dirpath, _, filenames in os.walk(exp_path):
            for filename in filenames:
                if filename.endswith('.mat') and 'sub' in filename:
                    file_path = os.path.join(dirpath, filename)
                    try:
                        mat = scipy.io.loadmat(file_path, simplify_cells=True)
                        
                        time_df = reconstruct_trial_data(mat)
                        if time_df is None or time_df.empty:
                            continue

                        rand_vars = mat['task'][1].get('randVars', {})
                        n_trials = len(time_df)
                        
                        stim_vars = {}
                        for key, path in config['vars'].items():
                            data_array = rand_vars.get(path.split('.')[1])
                            if data_array is not None:
                                stim_vars[key] = data_array[:n_trials]
                        
                        stim_df = pd.DataFrame(stim_vars)
                        
                        meta = {'experiment_name': exp_folder}
                        patterns = {'subject_id': r'sub(\d+)', 'session_id': r'sess(\d+)', 'run_id': r'run(\d+)', 'experiment_id': r'exp(\d+)', 'prior_std': r'Pstd(\d+)', 'prior_mean': r'mean(\d+)'}
                        for key, pattern in patterns.items():
                            match = re.search(pattern, filename)
                            if match:
                                meta[key] = int(match.group(1))

                        meta_df = pd.DataFrame([meta] * n_trials)
                        
                        block_df = pd.concat([meta_df.reset_index(drop=True), time_df.reset_index(drop=True), stim_df.reset_index(drop=True)], axis=1)
                        all_blocks_list.append(block_df)

                    except Exception:
                        continue
        
        if all_blocks_list:
            df = pd.concat(all_blocks_list, ignore_index=True)
            if 'estimate_coords' in df.columns:
                coords = df['estimate_coords'].apply(lambda x: pd.Series(x, index=['estimate_x', 'estimate_y']) if isinstance(x, (list, np.ndarray)) and len(x) >= 2 else pd.Series([np.nan, np.nan], index=['estimate_x', 'estimate_y']))
                df = pd.concat([df.drop('estimate_coords', axis=1), coords], axis=1)
            all_experiments_data[exp_folder] = df
            
    return all_experiments_data

# --- Main Execution ---

# 1. Define the path to your data and experiment configurations
BASE_DATA_PATH = r'D:' 
EXPERIMENT_CONFIGS = {
    'data01_direction4priors': {'vars': {'motion_direction': 'randVars.myRandomDir', 'motion_coherence': 'randVars.myRandomCoh', 'response_arrow_start_angle': 'randVars.initAngledeg', 'estimate_coords': 'randVars.prodcoor'}},
    'data02_direction1prior': {'vars': {'motion_direction': 'randVars.myRandomDir', 'motion_coherence': 'randVars.myRandomCoh', 'response_arrow_start_angle': 'randVars.initAngledeg', 'estimate_coords': 'randVars.prodcoor'}},
    'data03_orientation': {'vars': {'stimulus_orientation': 'randVars.myRandomloc', 'stimulus_contrast': 'randVars.myRandomCon', 'response_arrow_start_angle': 'randVars.initAngledeg', 'estimate_coords': 'randVars.prodcoor'}}
}

# 2. Load the data
dataframes = load_experiments_from_mat(BASE_DATA_PATH, EXPERIMENT_CONFIGS)

# 3. Verify the results by checking the shape and columns of the loaded data
if dataframes:
    for exp_name, df in dataframes.items():
        print(f"\nExperiment: '{exp_name}'")
        print(f"  - Data Shape (rows, columns): {df.shape}")
        print(f"  - Columns: {list(df.columns)}")
else:
    print("\nNo dataframes were created. Check paths and file contents.")


Experiment: 'data01_direction4priors'
  - Data Shape (rows, columns): (83213, 16)
  - Columns: ['experiment_name', 'subject_id', 'session_id', 'run_id', 'experiment_id', 'prior_std', 'prior_mean', 'trial_index', 'trial_time', 'motion_direction', 'motion_coherence', 'reaction_time', 'raw_response_time', 'response_arrow_start_angle', 'estimate_x', 'estimate_y']

Experiment: 'data02_direction1prior'
  - Data Shape (rows, columns): (53345, 15)
  - Columns: ['experiment_name', 'subject_id', 'session_id', 'run_id', 'prior_std', 'prior_mean', 'trial_index', 'trial_time', 'reaction_time', 'raw_response_time', 'motion_direction', 'motion_coherence', 'response_arrow_start_angle', 'estimate_x', 'estimate_y']

Experiment: 'data03_orientation'
  - Data Shape (rows, columns): (60062, 16)
  - Columns: ['experiment_name', 'subject_id', 'session_id', 'run_id', 'experiment_id', 'prior_std', 'prior_mean', 'trial_index', 'trial_time', 'reaction_time', 'raw_response_time', 'stimulus_orientation', 'stimulu