In [267]:
import os
import shutil
import re
import pandas as pd

In [268]:
# Define source and destination directories
src_dir = "./Data/Preprocessed"
dst_dir = "./Data/Processed"

# Ensure the destination directory exists
os.makedirs(dst_dir, exist_ok=True)

# Loop through all files in the source directory

for folder in os.listdir(src_dir):
    folder_path = os.path.join(src_dir, folder)
    folder_dst_dir = os.path.join(dst_dir, folder)

    os.makedirs(folder_dst_dir, exist_ok=True)

    if not os.path.isdir(folder_path):
        continue

    for filename in os.listdir(folder_path):
        if "filtered" in filename:
            continue

        if 'Ai213' in filename and ('.CSV' in filename or '.csv' in filename):

            start_index = filename.find('Ai213')
            # Remove all characters before 'Ai213' and change .CSV to .csv
            new_filename = filename[start_index:].replace('.CSV', '.csv')

            # Extract the grouping pattern Ai213_x=x_#x using regex
            match = re.search(r'Ai213_\d+-\d+_#\d+', new_filename)
            if match:
                if "Side_viewDLC_Resnet50" in filename:
                    new_filename = match.group(0) + "_Pose_Data.csv"
                
                # Create a subdirectory based on the matched pattern
                subfolder_name = match.group(0)
                subfolder_path = os.path.join(folder_dst_dir, subfolder_name)
                os.makedirs(subfolder_path, exist_ok=True)

                # Construct full file paths
                old_filepath = os.path.join(folder_path, filename)
                new_filepath = os.path.join(subfolder_path, new_filename)

                # Move and rename the file
                shutil.copy(old_filepath, new_filepath)

    print("Files have been renamed, grouped into folders, and moved to the new directory.")


Files have been renamed, grouped into folders, and moved to the new directory.
Files have been renamed, grouped into folders, and moved to the new directory.
Files have been renamed, grouped into folders, and moved to the new directory.
Files have been renamed, grouped into folders, and moved to the new directory.


In [269]:
def reduce_df(df_path):
    df = pd.read_csv(df_path, index_col=0)
    likelihood_cols = [col for col in df.columns if 'likelihood' in col]
    df['total_likelihood'] = df[likelihood_cols].sum(axis=1)

    # reduced_df = df.groupby(df.index // 8).first().reset_index(drop=True)
    # Group rows into groups of 8 and select the one with the highest total likelihood in each group
    grouped = df.groupby(df.index // 8)

    reduced_df = grouped.apply(lambda group: group.loc[group['total_likelihood'].idxmax()])

    bool_columns = ['Is_Voiding', 'Shock_Start', 'Shock_End', 'Tone_Start', 'Tone_End']
    for col in bool_columns:
        reduced_df[col] = grouped[col].any()

    # Drop the 'total_likelihood' column used for selection
    reduced_df = reduced_df.drop(columns=['total_likelihood'])
    reduced_df = reduced_df.reset_index(drop=True)
    reduced_df.to_csv(df_path)

In [270]:
def fix_void_timing(df_path):
    df = pd.read_csv(df_path)
    df['seconds'] = df['Var4'].str.extract(r'(\d+\.\d+)')
    df = df[['seconds']]
    df.to_csv(df_path)

In [None]:
def fix_pose_data(df_path):
    df = pd.read_csv(df_path)

    body_parts = df.iloc[0, 1:] 
    coords = df.iloc[1, 1:] 

    new_columns = []
    new_columns.append(f'Image')
    for part, coord in zip(body_parts, coords):
        new_columns.append(f'{part}_{coord}')
    df.columns = new_columns
    df = df[2:]
    df = df.reset_index(drop=True)
    df = df.drop(['Image'], axis=1)

    df = df.apply(pd.to_numeric, errors='coerce')


    likelihood_threshold = 0.75
    for column_group in df.columns[::3]:
        base_name = column_group[:-2]
        x_col = f"{base_name}_x"
        y_col = f"{base_name}_y"
        likelihood_col = f"{base_name}_likelihood"
        mask = df[likelihood_col] < likelihood_threshold
        df.loc[mask, x_col] = pd.NA
        df.loc[mask, y_col] = pd.NA

    df.interpolate(method='linear', inplace=True)
    df.fillna(method='ffill', inplace=True)
    df.fillna(method='bfill', inplace=True)
    # df.to_csv(df_path)
    reference_part = 'Nose'
    x_cols = [col for col in df.columns if col.endswith('_x')]
    y_cols = [col for col in df.columns if col.endswith('_y')]
    x_df = df[x_cols]
    y_df = df[y_cols]
    reference_x = x_df[f'{reference_part}_x']
    reference_y = y_df[f'{reference_part}_y']
    relative_x_df = x_df.subtract(reference_x, axis=0)
    relative_y_df = y_df.subtract(reference_y, axis=0)
    relative_coordinates = pd.concat([relative_x_df, relative_y_df], axis=1)
    relative_coordinates.to_csv(df_path)

    # I Cannot Get this to work, I dont know if it is even needed honestly, this code seems really strange?
    tail_root_y = f'TailRoot_y'
    body_center_y = f'BodyCenter_y'

    if tail_root_y in relative_coordinates.columns and body_center_y in relative_coordinates.columns:
        if relative_coordinates.loc[0, tail_root_y] < relative_coordinates.loc[0, body_center_y]:
            relative_coordinates.loc[0, tail_root_y] = relative_coordinates.loc[0, body_center_y]

        for k in range(1, len(relative_coordinates)):
            if relative_coordinates.loc[k, tail_root_y] < relative_coordinates.loc[k, body_center_y]:
                relative_coordinates.loc[k, tail_root_y] = relative_coordinates.loc[k - 1, tail_root_y]

    # Save the updated data
    relative_coordinates.to_csv(df_path)


In [272]:
def fix_time_df(df_path):
    df = pd.read_csv(df_path, header=None, names=['DateTime', 'Seconds'])
    df = df.replace(r'[()]', '', regex=True)
    df.to_csv(df_path)

In [273]:
def combine_dfs(pose_path, side_path, void_path, shock_on_path, shock_off_path, tone_on_path, tone_off_path, new_path):
    pose_data_df = pd.read_csv(pose_path, index_col=0)
    side_view_df = pd.read_csv(side_path , index_col=0)
    void_data_df = pd.read_csv(void_path , index_col=0)
    shock_on_df = pd.read_csv(shock_on_path, index_col=0)
    shock_off_df = pd.read_csv(shock_off_path, index_col=0)
    tone_on_df = pd.read_csv(tone_on_path, index_col=0)
    tone_off_df = pd.read_csv(tone_off_path, index_col=0)

    pose_time_df = pd.merge(pose_data_df, side_view_df, left_index=True, right_index=True)
    pose_time_df['Is_Voiding'] = False 

    for voidtime in void_data_df["seconds"]:
        pose_time_df['difference'] = (pose_time_df['Seconds'] - voidtime).abs()

        closest_index = pose_time_df['difference'].idxmin()

        pose_time_df.loc[closest_index, 'Is_Voiding'] = True
        pose_time_df = pose_time_df.drop(columns=["difference"])

    pose_time_df['Shock_Start'] = False 
    for shock_on in shock_on_df["side_Shock_frame"]:

        pose_time_df.loc[shock_on, 'Shock_Start'] = True
        
    pose_time_df['Shock_End'] = False 
    for shock_off in shock_off_df["side_Shock_frame"]:

        pose_time_df.loc[shock_off, 'Shock_End'] = True

    pose_time_df['Tone_Start'] = False 
    for tone_on in tone_on_df["side_Tone_frame"]:

        pose_time_df.loc[tone_on, 'Tone_Start'] = True

    pose_time_df['Tone_End'] = False 
    for tone_off in tone_off_df["side_Tone_frame"]:

        pose_time_df.loc[tone_off, 'Tone_End'] = True

    pose_time_df.to_csv(new_path)

In [274]:
for trial in os.listdir(dst_dir):
    trail_path = os.path.join(dst_dir, trial)
    
    folders = [os.path.join(trail_path, d) for d in os.listdir(trail_path) if os.path.isdir(os.path.join(trail_path, d))]
    for folder in folders:
        
        for filename in os.listdir(folder):
            if "Bottom_camera" in filename:
                bottom_path = os.path.join(folder,filename)

            if "Pose_Data" in filename:
                pose_path = os.path.join(folder,filename)

            if "ShockOffset" in filename:
                shock_off_path = os.path.join(folder,filename)

            if "ShockONset" in filename:
                shock_on_path = os.path.join(folder,filename)

            if "Side_view" in filename:
                side_path = os.path.join(folder,filename)

            if "ToneOffset" in filename:
                tone_off_path = os.path.join(folder,filename)

            if "ToneONset" in filename:
                tone_on_path = os.path.join(folder,filename)

            if "VoidTiming" in filename:
                void_path = os.path.join(folder,filename)


        fix_pose_data(pose_path)
        fix_void_timing(void_path)
        fix_time_df(bottom_path)
        fix_time_df(side_path)

        new_path = os.path.join(folder, "pose_void_tone_shock_combined.csv")
        combine_dfs(pose_path, side_path, void_path, shock_on_path, shock_off_path, tone_on_path, tone_off_path, new_path)
        reduce_df(new_path)


  df = pd.read_csv(df_path)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df = pd.read_csv(df_path)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df = pd.read_csv(df_path)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df = pd.read_csv(df_path)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df = pd.read_csv(df_path)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df = pd.read_csv(df_path)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df = pd.read_csv(df_path)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df = pd.read_csv(df_path)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
  df = pd.read_csv(df_path)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplac

In [275]:
pose_data_df = pd.read_csv(pose_path, index_col=0)
side_view_df = pd.read_csv(side_path , index_col=0)
void_data_df = pd.read_csv(void_path , index_col=0)
shock_on_df = pd.read_csv(shock_on_path, index_col=0)
shock_off_df = pd.read_csv(shock_off_path, index_col=0)
tone_on_df = pd.read_csv(tone_on_path, index_col=0)
tone_off_df = pd.read_csv(tone_off_path, index_col=0)


In [276]:
pose_data_df

Unnamed: 0,RightEar_x,LeftEar_x,forehead_x,Nose_x,shoulder_x,Spine1_x,Spine2_x,Spine3_x,Hipbone_x,TailBase_x,...,Tail2_y,Tail3_y,Tail4_y,TailEnd_y,R_forepaw_y,L_forepaw_y,R_heel_y,R_hindpaw_y,L_heel_y,L_hindpaw_y
0,-75.63553,-102.18380,-66.44910,0.0,-113.785600,-154.32190,-201.09320,-238.192600,-261.07410,-291.67694,...,186.20025,164.68725,143.70625,130.59601,215.787410,214.974790,239.22145,244.62565,16.06615,16.32755
1,-74.17737,-102.24614,-64.99094,0.0,-114.119690,-155.25910,-202.07764,-236.734440,-260.48444,-290.00114,...,198.77448,178.00754,156.08344,141.24494,220.352845,222.013980,247.81884,252.11701,23.10534,23.36674
2,-78.23714,-104.27240,-68.85620,0.0,-117.299470,-159.96350,-209.06940,-240.599700,-265.44403,-292.99316,...,214.29000,193.39010,172.52855,157.97954,227.282151,233.001900,258.94480,262.25505,32.50840,32.76980
3,-80.53615,-103.50940,-70.96070,0.0,-119.932456,-161.88990,-211.39590,-240.648951,-268.88843,-295.58243,...,227.62950,207.30425,187.06610,171.69030,232.835196,228.384800,268.23516,270.51130,40.53520,40.79660
4,-82.13676,-105.98500,-72.36680,0.0,-121.867042,-164.37870,-213.04560,-239.999803,-269.49580,-296.10172,...,236.67145,215.58365,194.01835,178.75549,233.494991,218.874450,271.64471,273.54529,43.66875,43.93015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46695,35.71399,2.97986,15.92003,0.0,41.634175,69.52904,105.94942,146.900560,177.21450,209.51036,...,-10.53374,-22.66114,-25.63004,-35.35084,28.435460,31.706783,-64.22044,-59.03514,44.72821,64.12596
46696,36.72301,6.39281,16.66737,0.0,44.620610,69.83738,107.56364,147.657610,178.61791,211.19996,...,-9.65150,-21.43590,-25.36450,-31.67340,29.692000,32.863415,-62.96390,-57.77860,44.28260,66.06910
46697,40.73613,9.68516,18.72438,0.0,47.449680,72.07046,109.66526,149.808230,182.00022,213.41013,...,-7.81466,-19.64836,-24.30151,-30.49786,32.993940,36.065448,-59.66196,-54.47666,46.47924,69.91204
46698,44.67285,14.10076,23.14005,0.0,51.587680,75.98422,112.96878,153.163540,186.54590,218.17508,...,-4.81488,-17.71838,-21.93908,-25.39663,37.205670,40.177270,-55.45023,-50.26493,49.75047,73.96533
