In [424]:
import scipy.io
import os
import pandas as pd
import numpy as np

# Define the folders
day_dir = '/Users/avitalvasiliev/Documents/GitHub/ExtractingTurningPointGUI/t011222'
csv_dir = os.path.join(day_dir, 'DLC_color')
Info_dir = os.path.join(day_dir, 'Info')
EDfiles_dir = os.path.join(day_dir, 'EDfiles')
ED2videomap_dir = os.path.join(day_dir, 'ED2videomap')

def extract_data_from_info(): #info of entire day
    Info_file = [f for f in os.listdir(Info_dir) if f.endswith('param.mat')][0]
    Info_file_path = os.path.join(Info_dir , Info_file)
    Info_data = scipy.io.loadmat(Info_file_path)
    recording_day_id = Info_data['DDFparam']['ID'][0][0][0][0]
    SESSparam = Info_data['SESSparam']
    fileConfig = SESSparam['fileConfig'][0, 0]
    HFS_field = fileConfig['HFS'][0]
    type_list = np.array([int(x[0][0]) for x in HFS_field])
    SubSess = SESSparam['SubSess'][0, 0]
    Files_field = SubSess['Files'][0][0]
    session_list = Files_field.tolist()

    return recording_day_id, type_list, session_list

def extract_data_from_edfiles_ed2videomap(recording_day_id, type_list, session_list):
    temp = [] # Empty list for dataframes

    # Loop through all MAT files in the folder
    for i, file_name in enumerate(os.listdir(EDfiles_dir)):
        ed_file_path = os.path.join(EDfiles_dir, file_name)
        map_file_path = os.path.join(ED2videomap_dir, file_name)
        file_index = os.path.splitext(file_name)[0].split('.')[-1]
        
        # Load the ED file
        ed_data = scipy.io.loadmat(ed_file_path)
        failed_non_failed = ed_data['trials'][:, 2]
        trial_events = ed_data['TrialTimes']
        id_target = ed_data['bhvStat'][:, 2]
        id_update = ed_data['bhvStat'][:, 3]
        
        # Load the map file
        ED2videomap = scipy.io.loadmat(map_file_path)['ed2video']

        # Extract the relevant columns
        df = pd.DataFrame(np.round(ED2videomap), columns=['trial_number_in_file', 'DLC_color_id'])
        df['failed_non_failed'] = failed_non_failed
        df_non_failed = df[df['failed_non_failed'] == 1].copy()
        df_non_failed['file_number'] = int(file_index)
        df_non_failed['recording_day'] = recording_day_id
        df_non_failed['type'] = df_non_failed['file_number'].apply(lambda idx: 'HFS' if type_list[int(idx)-1] == 1 else 'Control')
        for session_number, (start, end) in enumerate(session_list, start=1):
            if start <= int(file_index) <= end:
                df_non_failed['session_number'] = session_number
                break

        df_non_failed['start_time'] = trial_events[:, 0]
        df_non_failed['end_time'] = trial_events[:, 10]
        df_non_failed['que_time'] = trial_events[:, 3]
        df_non_failed['go_time'] = trial_events[:, 4]
        df_non_failed['update_time'] = trial_events[:, 5]
        df_non_failed['move_time'] = trial_events[:, 6]
        df_non_failed['id_target'] = id_target
        df_non_failed['id_update'] = id_update
        temp.append(df_non_failed)
    # Concatenate all DataFrames into one    
    final_df = pd.concat(temp, ignore_index=True)
    # Sort the DataFrame by 'file_number' and then by 'trial_number'
    final_df_sorted = final_df.sort_values(by=['file_number','trial_number_in_file'], ascending=[True,True])
    final_df_sorted = final_df_sorted.reset_index(drop=True)
    final_df_sorted.dropna(subset=['DLC_color_id'], inplace=True)
    final_df_sorted['DLC_color_id'] = final_df_sorted['DLC_color_id'].astype(int)
    final_df_sorted['trial_number_in_file'] = final_df_sorted['trial_number_in_file'].astype(int)
    final_df_sorted['file_number'] = final_df_sorted['file_number'].astype(int)
    final_df_sorted['id_target'] = final_df_sorted['id_target'].astype(int)

    return final_df_sorted

def extract_data_from_csv(final_df_sorted):
    dfs = []
    indices = final_df_sorted['DLC_color_id'].astype(int)
    for index in indices:
        pattern = "trial" + str(index) + "-"
        for i, file_name in enumerate(os.listdir(csv_dir)):
            if pattern in file_name:
                csv_file_path = os.path.join(csv_dir, file_name)
                headers = pd.read_csv(csv_file_path, nrows=3)
                combined_header = ['_'.join(col) for col in zip(headers.iloc[0], headers.iloc[1])]
                df = pd.read_csv(csv_file_path, skiprows=3, names=combined_header)
                # Calculate the median for the relevant 'x' and 'y' columns
                x_columns = ['tip2_x', 'tip3_x', 'tip4_x', 'tip5_x']
                x_median = np.median(df[x_columns], axis=1)
                y_columns = ['tip2_y', 'tip3_y', 'tip4_y', 'tip5_y']
                y_median = np.median(df[y_columns], axis=1)
                # Create a new DataFrame with 'id', 'x_median', and 'y_median'
                temp_df = pd.DataFrame({'DLC_color_id': index, 'x': x_median,'y': y_median})
                # Append the DataFrame to the list
                dfs.append(temp_df)
    coord_df = pd.concat(dfs, ignore_index=True)

    return coord_df

def merge_events_coordinates(final_df_sorted, coord_df):
    merged_df = final_df_sorted.merge(coord_df, on='DLC_color_id', how='left')
    merged_df.drop(columns=['failed_non_failed'], inplace=True)
    return merged_df

In [421]:
recording_day_id, type_list, session_list = extract_data_from_info()
events_df_sorted = extract_data_from_edfiles_ed2videomap(recording_day_id, type_list, session_list)
coord_df = extract_data_from_csv(events_df_sorted)
merged_df = merge_events_coordinates(events_df_sorted, coord_df)


Unnamed: 0,DLC_color_id,x,y,trial_number_in_file,file_number,recording_day,type,session_number,start_time,end_time,que_time,go_time,update_time,move_time,id_target,id_update
0,1,5.085012,1.148422,1,1,32,Control,1,67.1168,72.226233,68.323667,68.6578,,68.809167,4,
1,1,5.799622,1.531673,1,1,32,Control,1,67.1168,72.226233,68.323667,68.6578,,68.809167,4,
2,1,163.021950,2.770684,1,1,32,Control,1,67.1168,72.226233,68.323667,68.6578,,68.809167,4,
3,1,162.626259,2.479106,1,1,32,Control,1,67.1168,72.226233,68.323667,68.6578,,68.809167,4,
4,1,162.626259,2.273755,1,1,32,Control,1,67.1168,72.226233,68.323667,68.6578,,68.809167,4,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
263963,1341,563.167084,165.665161,121,15,32,Control,1,442.3766,443.881933,442.940533,443.2911,,443.358233,4,
263964,1341,563.153900,166.022758,121,15,32,Control,1,442.3766,443.881933,442.940533,443.2911,,443.358233,4,
263965,1341,563.391266,166.023102,121,15,32,Control,1,442.3766,443.881933,442.940533,443.2911,,443.358233,4,
263966,1341,562.923828,166.023102,121,15,32,Control,1,442.3766,443.881933,442.940533,443.2911,,443.358233,4,


In [68]:

import os
import pandas as pd
import numpy as np

# Define the folder containing your CSV files
csv_dir = '/Users/avitalvasiliev/Documents/GitHub/ExtractingTurningPointGUI/t011222/DLC_color'

# Initialize an empty list to hold DataFrames
dfs = []

# Loop through all CSV files in the folder
for i, file_name in enumerate(os.listdir(csv_dir)):
    # Construct full file path
    file_path = os.path.join(csv_dir, file_name)
    # Load the CSV, skipping the first row and reading the next two rows as the header
    headers = pd.read_csv(file_path, nrows=3)
    # Combine the two rows to form a single header
    combined_header = ['_'.join(col) for col in zip(headers.iloc[0], headers.iloc[1])]
    # Load the data, skipping the first three rows, and use the unique header
    df = pd.read_csv(file_path, skiprows=3, names=combined_header)
    # Calculate the median for the relevant 'x' and 'y' columns
    x_columns = ['tip2_x', 'tip3_x', 'tip4_x', 'tip5_x']
    x_median = np.median(df[x_columns], axis=1)
    y_columns = ['tip2_y', 'tip3_y', 'tip4_y', 'tip5_y']
    y_median = np.median(df[y_columns], axis=1)
    # Create a new DataFrame with 'id', 'x_median', and 'y_median'
    temp_df = pd.DataFrame({'id': i+1, 'x': x_median,'y': y_median})
    # Append the DataFrame to the list
    dfs.append(temp_df)

# Concatenate all DataFrames into one
coord_data = pd.concat(dfs, ignore_index=True)






def load_data_from_files(search_path:str, indices:list):
    for index in indices:
        pattern = "trial" + str(index) + "-"
        for root, dirs, files in os.walk(search_path):
            for file in files:
                if pattern in file:
            


load_data_from_files("/Users/avitalvasiliev/Documents/GitHub/ExtractingTurningPointGUI/t011222/DLC_color", indices)

Found pattern 'trial1-' in file: /Users/avitalvasiliev/Documents/GitHub/ExtractingTurningPointGUI/t011222/DLC_color/tkhina-trial1-camera-6DLC_resnet152_t-rest-colorJul19shuffle1_750000_filtered.csv
Found pattern 'trial2-' in file: /Users/avitalvasiliev/Documents/GitHub/ExtractingTurningPointGUI/t011222/DLC_color/tkhina-trial2-camera-6DLC_resnet152_t-rest-colorJul19shuffle1_750000_filtered.csv
Found pattern 'trial3-' in file: /Users/avitalvasiliev/Documents/GitHub/ExtractingTurningPointGUI/t011222/DLC_color/tkhina-trial3-camera-6DLC_resnet152_t-rest-colorJul19shuffle1_750000_filtered.csv
Found pattern 'trial4-' in file: /Users/avitalvasiliev/Documents/GitHub/ExtractingTurningPointGUI/t011222/DLC_color/tkhina-trial4-camera-6DLC_resnet152_t-rest-colorJul19shuffle1_750000_filtered.csv
Found pattern 'trial5-' in file: /Users/avitalvasiliev/Documents/GitHub/ExtractingTurningPointGUI/t011222/DLC_color/tkhina-trial5-camera-6DLC_resnet152_t-rest-colorJul19shuffle1_750000_filtered.csv
Found patt