In [None]:
import scipy.io
import pandas as pd
import numpy as np
import os

# Directory containing the .mat files
files_dir = 'data_matlab/thina'
# List all .mat files in the directory
mat_files = [f for f in os.listdir(files_dir) if f.endswith('.mat')]
print(f"Found {len(mat_files)} .mat files in the directory.")
sorted_files = sorted(
    [f for f in os.listdir(files_dir) if f.endswith('.mat')],
    key=lambda x: int(''.join(filter(str.isdigit, x))))
# Initialize an empty list to store individual trial DataFrames
all_trials_df = []

# Columns for the trial data
types_1 = ['Control', 'HFS','Washout']
types_2 =['Control', 'HFS','Washout','Birst']
columns = ['time_milisecond', 'x', 'y', 'z','velocity']
additional_columns = ['move_time', 'end_time', 'update_time', 'start_turning', 'end_turning', 'id_target', 'id_update']

# Function to process each file and return a DataFrame
def process_mat_file(file_path, total_trials):
    # Load the .mat file
    mat_data = scipy.io.loadmat(file_path)
    print(f"Processing file: {file_path}")
    file_name = os.path.basename(file_path).replace('.mat', '')
    # Extract Update Trials
    handposU = mat_data['handposU']
    Cue2GoU = mat_data['Cue2GoU']
    Go2U = mat_data['Go2U']
    RTu = mat_data['RTu']
    MTu = mat_data['MTu']
    updTime = mat_data['updTime']
    updPause = mat_data['updPause']


    # Extract Non-Update Trials
    handpos = mat_data['handpos']
    Cue2Go = mat_data['Cue2Go']
    RT = mat_data['RT']
    MT = mat_data['MT']
    trials_in_this_file = 0


    # Initialize an empty list to store individual trial DataFrames for this file
    trial_dfs = []
    if handposU.shape[0] == 3:
        types = types_1
    if handposU.shape[0] == 4:
        types = types_2

     # Iterate over each type of control update trial

    for type_ind in range(len(types)):
        # Iterate over each control update trial
        num_trials_type =(handposU[type_ind][0].shape)[1]
        print(f"Processing Update {types[type_ind]} trials: {num_trials_type} trials found.")
        for i in range(num_trials_type):
            trial_data = handposU[type_ind, 0][0][i][:]
            trial_df = pd.DataFrame(trial_data, columns=columns)
            # Get the additional values for the current trial
            additional_values = handposU[type_ind, 0][1][i][0]
            # Create a dictionary of the additional values, repeating each value for the length of the trial DataFrame
            additional_data = {col_name: [additional_values[col_index]] * len(trial_df) for col_index, col_name in enumerate(additional_columns)}

            # Assign these values to the trial DataFrame
            trial_df = trial_df.assign(**additional_data)

            trial_df['id'] = total_trials + trials_in_this_file + 1 # Adjusting the trial ID to continue sequentially
            trial_df['type'] = 'Control'
            trial_df['file_name'] = file_name
            trial_df['trial_number_in_file'] = trials_in_this_file + 1
            trial_df['Cue2GoU'] = Cue2GoU[type_ind][0][0][i]
            trial_df['Go2U'] = Go2U[type_ind][0][0][i]
            trial_df['go_time'] = trial_df['time_milisecond'].iloc[0] + trial_df['Cue2GoU']
            trial_df['cue_time'] = trial_df['time_milisecond'].iloc[0]
            trial_df['MT'] = np.nan
            trial_df['MTu'] = MTu[type_ind][0][0][i]
            trial_df['RT'] = np.nan
            trial_df['RTu'] = RTu[type_ind][0][0][i]
            trial_df['updTime'] = updTime[type_ind][0][0][i]
            trial_df['updPause'] = updPause[type_ind][0][0][i]
            trial_dfs.append(trial_df)
            trials_in_this_file += 1

    for type_ind in range(len(types)):
        # Iterate over non-update trials
        num_trials_type =(handpos[type_ind][0].shape)[1]
        print(f"Processing Non-Update {types[type_ind]} trials: {num_trials_type} trials found.")
        for i in range(num_trials_type):
            trial_data = handpos[type_ind, 0][0][i][:]
            trial_df = pd.DataFrame(trial_data, columns=columns)

            # Get the additional values for the current trial
            additional_values = handpos[type_ind, 0][1][i][0]

            # Create a dictionary of the additional values, repeating each value for the length of the trial DataFrame
            additional_data = {col_name: [additional_values[col_index]] * len(trial_df) for col_index, col_name in enumerate(additional_columns)}

            # Assign these values to the trial DataFrame
            trial_df = trial_df.assign(**additional_data)

            trial_df['id'] = total_trials + trials_in_this_file + 1  # Adjusting the trial ID to continue sequentially
            trial_df['type'] = types[type_ind]
            trial_df['file_name'] = file_name
            trial_df['trial_number_in_file'] = trials_in_this_file + 1
            trial_df['Cue2Go'] = Cue2Go[type_ind][0][0][i]
            trial_df['Go2U'] = np.nan
            trial_df['go_time'] = trial_df['time_milisecond'].iloc[0] + trial_df['Cue2Go']
            trial_df['cue_time'] = trial_df['time_milisecond'].iloc[0]
            trial_df['MT'] = MT[type_ind][0][0][i]
            trial_df['MTu'] = np.nan
            trial_df['RT'] = RT[type_ind][0][0][i]
            trial_df['RTu'] = np.nan
            trial_df['updTime'] = np.nan
            trial_df['updPause'] = np.nan
            trial_dfs.append(trial_df)
            trials_in_this_file += 1


    print(f"Total trials in this file: {trials_in_this_file}")
    # Concatenate all trial DataFrames for this file into one DataFrame
    concatenated_df = pd.concat(trial_dfs, ignore_index=True)
    concatenated_df['turning_point'] = np.mean(concatenated_df[['start_turning', 'end_turning']], axis=1)

    return concatenated_df, total_trials + trials_in_this_file


# Initialize a variable to keep track of the total number of trials across all files
total_trials = 0

# Process each file and concatenate the results
for day_index, file in enumerate(sorted_files):
    total_trials_file = 0
    total_trials_file_u = 0
    print(f"Processing file {day_index+1}/{len(sorted_files)}: {file}")
    file_path = os.path.join(files_dir, file)
    file_df, total_trials = process_mat_file(file_path, total_trials)
    file_df['recording_day'] = day_index+1
    print(f"Processed {file}: {file_df['id'].nunique()} trials")
    all_trials_df.append(file_df)

# Concatenate all files' DataFrames into one big DataFrame
final_df = pd.concat(all_trials_df, ignore_index=True)

Found 20 .mat files in the directory.
Processing file 1/20: t010123_v2.mat
Processing file: /Users/avitalvasiliev/Documents/GitHub/TaggingV2Nirvik/ThinaData2/t010123_v2.mat
Processing Update Control trials: 57 trials found.
Processing Update HFS trials: 27 trials found.
Processing Update Washout trials: 36 trials found.
Processing Update Birst trials: 0 trials found.
Processing Non-Update Control trials: 244 trials found.
Processing Non-Update HFS trials: 148 trials found.
Processing Non-Update Washout trials: 134 trials found.
Processing Non-Update Birst trials: 0 trials found.
Total trials in this file: 646
Processed t010123_v2.mat: 646 trials
Processing file 2/20: t011222_v2.mat
Processing file: /Users/avitalvasiliev/Documents/GitHub/TaggingV2Nirvik/ThinaData2/t011222_v2.mat
Processing Update Control trials: 78 trials found.
Processing Update HFS trials: 47 trials found.
Processing Update Washout trials: 35 trials found.
Processing Update Birst trials: 0 trials found.
Processing Non

In [None]:
final_df.to_csv(RawData/'FinalDfThina.csv', index=False)