In [1]:
from scipy import stats
import pandas as pd
import numpy as np
from math import atan2, degrees
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter, MaxNLocator
from scipy.interpolate import make_interp_spline
from scipy.interpolate import interp1d
from scipy import signal
from glob import glob
import re
from pathlib import Path
from itertools import product
import os
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

# Data Input, Parsing, and clean up

In [2]:

def combine_levels(df: pd.DataFrame) -> pd.DataFrame:
    df.columns = df.columns.map("_".join)
    return df


def read_raw_data(path: str) -> pd.DataFrame:
    raw_df = pd.read_csv(
        path,
        skiprows=[0],
        header=[0, 1, 2],
        index_col=[0],
    )

    return raw_df


def drop_likelihood(df: pd.DataFrame) -> pd.DataFrame:
    mask = df.columns.str.contains("likelihood")
    find_filter = df.columns[mask]
    filter_df = df.drop(columns=find_filter)
    return filter_df

def clean_data(raw_df):
    clean_df = (
        raw_df.rename_axis(index="frame")
        .interpolate(method="linear")
        .pipe(combine_levels)
        .pipe(drop_likelihood)
    )
    return clean_df

top_body_parts = [
'Mouse_Snout',
'Mouse_Left_forelimb', 
'Mouse_Right_forelimb', 
'Mouse_Top_body',
'Mouse_Left_hindlimb',
'Mouse_Right_hindlimb', 
'Mouse_Bottom_body'
]
tail_body_parts = [
'Mouse_Tail_2base', 
'Mouse_Tail_15', 
'Mouse_Tail_1', 
'Mouse_Tail_05', 
'Mouse_Tail_0'
]
def dist(x1, y1, x2, y2):
    distance = np.sqrt(np.square(x2 - x1) + np.square(y2 - y1))
    in_cm = distance / 51
    return in_cm

def body_outliers(df):
    indices_to_drop = set()

    for bp in top_body_parts:
        x, y = f"{bp}_x", f"{bp}_y"
        
        # Calculate the distance
        df['Distance'] = dist(df[x], df[y], df[x].shift(1), df[y].shift(1))

        # Identify rows where Distance is greater than 7 and add their indices to indices_to_drop
        outlier_indices = df[df['Distance'] > 10].index
        indices_to_drop.update(outlier_indices)

    final_df = df.drop(index=indices_to_drop).drop(columns='Distance')
    return final_df

def tail_outliers(df):
    indices_to_drop = set()

    for bp in tail_body_parts:
        x, y = f"{bp}_x", f"{bp}_y"
        
        # Calculate the distance
        df['Distance'] = dist(df[x], df[y], df[x].shift(1), df[y].shift(1))

        # Identify rows where Distance is greater than 7 and add their indices to indices_to_drop
        outlier_indices = df[df['Distance'] > 7].index
        indices_to_drop.update(outlier_indices)

    final_df = df.drop(index=indices_to_drop).drop(columns='Distance')

    return final_df

def outlier_clean(df):
    clean_df = df.pipe(body_outliers).pipe(tail_outliers)
    return clean_df

def walking_fig(df):
    fig = plt.figure(figsize=(10, 10))
    plt.rcParams["axes.linewidth"] = 5

    plt.tick_params(axis='x', labelsize=40, length=9, width=4)
    plt.tick_params(axis='y', labelsize=40, length=9, width=4)
    plt.xticks([])  
    plt.yticks([])  
    
    # Labels
    plt.title(f"{column_name} Walking Trajectory", fontsize='20')

    # Scatter plot 
    plt.plot(df['Mouse_Tail_2base_x'], df['Mouse_Tail_2base_y'], color='black', marker='o', markersize=18, alpha=0.1)
    plt.plot(df['Mouse_Snout_x'], df['Mouse_Snout_y'], color='paleturquoise', marker='o', markersize=18, alpha=0.1)
    
    # Save
    plt.savefig(f"{column_name} Walking Trajectory.jpeg")

# Combination Lists for All Possible Combinations

In [3]:
# This code is generating a list of all possible combinations of body parts. You can implement this into the 
import itertools

# All body parts
All_body_parts = ['Mouse_Snout',
             'Mouse_Left_forelimb', 
             'Mouse_Right_forelimb', 
             'Mouse_Top_body',
             'Mouse_Left_hindlimb',
             'Mouse_Right_hindlimb', 
             'Mouse_Bottom_body', 
             'Mouse_Tail_2base', 
             'Mouse_Tail_15', 
             'Mouse_Tail_1', 
             'Mouse_Tail_05', 
             'Mouse_Tail_0']

body_parts_no_tail = ['Mouse_Snout',
             'Mouse_Left_forelimb', 
             'Mouse_Right_forelimb', 
             'Mouse_Top_body',
             'Mouse_Left_hindlimb',
             'Mouse_Right_hindlimb', 
             'Mouse_Bottom_body']

tail_parts = [("Mouse_Tail_2base", "Mouse_Tail_15"),
    ("Mouse_Tail_15", "Mouse_Tail_1"),
    ("Mouse_Tail_1", "Mouse_Tail_05"),
    ("Mouse_Tail_05", "Mouse_Tail_0")]

# All possible combos (2 for distances, 3 for angles)
Most_dists = list(itertools.combinations(body_parts_no_tail, 2))
Most_dists.extend(tail_parts)
All_Angles = list(itertools.permutations(All_body_parts, 3))
Most_dists
All_dists = list(itertools.combinations(All_body_parts, 2))
print(len(All_Angles))
print(len(All_dists))


1320
66


# Calculate Distances of Interest

In [4]:
relevant_dist = [
    ("Mouse_Left_forelimb", "Mouse_Left_hindlimb"),
    ("Mouse_Right_forelimb", "Mouse_Right_hindlimb"),
    ("Mouse_Left_forelimb", "Mouse_Right_forelimb"),
    ("Mouse_Left_hindlimb", "Mouse_Right_hindlimb"),
    ("Mouse_Left_forelimb", "Mouse_Right_hindlimb"),
    ("Mouse_Right_forelimb", "Mouse_Left_hindlimb"),
    ("Mouse_Left_forelimb", "Mouse_Top_body"),
    ("Mouse_Right_forelimb", "Mouse_Top_body"),
    ("Mouse_Left_hindlimb", "Mouse_Tail_2base"),
    ("Mouse_Right_hindlimb", "Mouse_Tail_2base"),
    ("Mouse_Top_body", "Mouse_Bottom_body"),
    ("Mouse_Tail_2base", "Mouse_Tail_15"),
    ("Mouse_Tail_15", "Mouse_Tail_1"),
    ("Mouse_Tail_1", "Mouse_Tail_05"),
    ("Mouse_Tail_05", "Mouse_Tail_0"),
    ("Mouse_Snout", "Mouse_Top_body"),
]

def distance(x1, y1, x2, y2): 
    pixel_distance = np.sqrt(
        np.square(x2 - x1) + np.square(y2 - y1)
    )
    cm_distance = pixel_distance / 51 #pixel to centimeter conversion
    return cm_distance

def get_dist(df, bp_map):
    kwargs = {}
    for pair in bp_map: 
        coord1, coord2 = pair 
        coord1_x = f"{coord1}_x"
        coord1_y = f"{coord1}_y"
        coord2_x = f"{coord2}_x"
        coord2_y = f"{coord2}_y"
    
        kwargs[f"{coord1}_{coord2}_dist"] = distance(
        df[coord1_x],
        df[coord1_y],
        df[coord2_x],
        df[coord2_y],
        )
    new_df = pd.DataFrame()
    return new_df.assign(**kwargs) 

def sum_tails(df):
    new_df = df.copy()
    # Sum the values of each row for columns containing "Tail" in the title twice
    tail_columns = df.columns[df.columns.str.count('Tail') == 2]
    new_df['Total_Tail_Length_dist'] = df[tail_columns].sum(axis=1)
    # Drop the columns containing "Tail" twice in the title
    new_df.loc[new_df['Total_Tail_Length_dist'] > 8] = np.nan
    new_df = new_df.drop(columns=tail_columns)
    
    return new_df

def merge_distances(df1, df2): 
    Merged_df = df1.merge(df2, how='outer')
    return Merged_df

# Calculate Angles of Interest

In [5]:
Hindpaw_angle = [
    (
        "Mouse_Left_hindlimb",
        "Mouse_Tail_2base",
        "Mouse_Right_hindlimb",
    ),
        (
        "Mouse_Snout",
        "Mouse_Tail_2base",
        "Mouse_Tail_0",
    ),
]

def angle_between(row: pd.Series):
    x1 = row.iloc[0]
    y1 = row.iloc[1]
    x2 = row.iloc[2]
    y2 = row.iloc[3]
    x3 = row.iloc[4]
    y3 = row.iloc[5]
    
    deg1 = (
        360 + degrees(atan2(x3 - x2, y3 - y2))
    ) % 360
    deg2 = (
        360 + degrees(atan2(x1 - x2, y1 - y2))
    ) % 360
    angle =  deg2 - deg1 if deg1 <= deg2 else 360 - (deg1 - deg2)
    if angle > 180:
        angle = 360 - angle
    return angle


def get_angles(df, Hindpaw_angle):
    dict = {}
    for pair in Hindpaw_angle:
        coord1, coord2, coord3 = pair
        coord1_x = f'{coord1}_x'
        coord1_y = f'{coord1}_y'
        coord2_x = f'{coord2}_x'
        coord2_y = f'{coord2}_y'
        coord3_x = f'{coord3}_x'
        coord3_y = f'{coord3}_y'

        col_list = [
            coord1_x,
            coord1_y,
            coord2_x,
            coord2_y,
            coord3_x,
            coord3_y,
        ]
        dict[f"{coord1} to {coord2} to {coord3}_angle"] = df[col_list].apply(angle_between, axis=1)
    new_df = pd.DataFrame()
    return new_df.assign(**dict)

def hindlimb_angle_graph(df):
    data = get_angles(df, Hindpaw_angle)
    mean = data['Mouse_Left_hindlimb to Mouse_Tail_2base to Mouse_Right_hindlimb_angle'].mean()
    Big_pie = 180
    piece_of_Pie = (180 - mean) / 2
    whole_pie =  [piece_of_Pie,mean, piece_of_Pie, Big_pie]
    plt.pie(whole_pie , colors=["#D3C5E5", "#735DA5","#D3C5E5", 'white'], wedgeprops = {"linewidth": 2, "edgecolor": "white"},startangle=0)
    plt.legend(title = f'Mean Angle: {mean:.4}')
    
    plt.savefig(f'{column_name} Hindlimb Angle.jpeg')
    # plt.show()

# Calculate Speed and Acceleration For Each Limb

In [6]:
relevant_bp = [
    'Mouse_Snout', 
    'Mouse_Left_forelimb',
    'Mouse_Right_forelimb', 
    'Mouse_Left_hindlimb', 
    'Mouse_Right_hindlimb',
    'Mouse_Tail_2base',
]

def distance(x1, y1, x2, y2): 
    pixel_distance = np.sqrt(
        np.square(x2 - x1) + np.square(y2 - y1)
    )
    cm_distance = pixel_distance / 51
    return cm_distance

def calc_speed_acc(df, body_parts, fps=60, Remove_0s=True):
    results = {}
    Time_fps = 1 / fps
    for bp in body_parts:
        x, y = f"{bp}_x", f"{bp}_y"
        new_rows = (
            df.assign(
                new_frame_x=lambda df: df[x].shift(-1),
                new_frame_y=lambda df: df[y].shift(-1),
                frame=np.arange(1, len(df.index) + 1),
            ).dropna().assign(
                Cummulative_Time=lambda df: df["frame"] / fps,
                Distance=lambda df: distance(
                    df[x],
                    df[y],
                    df["new_frame_x"],
                    df["new_frame_y"],
                ), 
                Cumulative_Distance=lambda df: df["Distance"].cumsum(),
                Speed=lambda df: df["Distance"] / Time_fps,
                Velocity=lambda df: ((df["new_frame_x"] - df[x])/51) / Time_fps,####velocity for x only to incorporate position
                shift_Velocity=lambda df: df[["Velocity"]].shift(-1),
                delta_velocity=lambda df: (df["Velocity"] - df["shift_Velocity"]),
                Acceleration=lambda df: (df["delta_velocity"]) / Time_fps,
            ))
        
        # Remove speeds below 0.5 only for 'Mouse_Tail_2base' if Remove_0s is True
        if bp == 'Mouse_Tail_2base' and Remove_0s:
            new_rows.loc[new_rows['Speed'] < 0.5] = np.nan
        # Set boundary for max values
        new_rows.loc[new_rows['Speed'] > 80] = np.nan
        results[bp] = new_rows[["Cumulative_Distance", "Velocity", "Speed", "Acceleration"]]
    df_list = []
    for bp, rows in results.items():
        rows.columns = [f"{bp}_{col}" for col in rows.columns]
        df_list.append(rows)
    s_a_df = pd.concat(df_list, axis=1)
    if Remove_0s:
            check = (s_a_df['Mouse_Tail_2base_Speed'].isna().sum() / len(s_a_df['Mouse_Tail_2base_Speed']))*100
            s_a_df["% of Removed 0s"] = check
            print(check)
    return s_a_df

# Collect Data Files

In [7]:
import re
import os
from glob import glob

def get_files():
    # Make sure your data files follow this naming convention
    # AnimalID_Condition_dpi_ExperimentNumber
    pattern = r"(\d*\w+)_(Baseline|Ctl|Sham|SCI|CCI|EAE|Cuprizone|mtPst1)_(-?\d+)dpi_Ex(\d+)"
    paths = glob("*DLC*.csv")
    file_info = []

    for path in paths:
        if match := re.search(pattern, path):
            animal_id, condition, dpi, exp_num = match.groups()
            file_info.append({"file_path": os.path.abspath(path), "file_name": path, "animal_id": animal_id, "condition": condition, "dpi": int(dpi), "exp_num": int(exp_num)})

    # Sort files by DPI, condition, and then experiment number
    sorted_files = sorted(file_info, key=lambda x: (x["dpi"], x["condition"], x["exp_num"]))

    return [info["file_path"] for info in sorted_files]

# Consolidate and Output

In [None]:
DataFs = []
File_list = get_files()


last_df = pd.DataFrame()
Percents = []

# Irterate over the files
for file in File_list:
    # Extract the column name for the file
    match = re.search(r"\\([^\\]+)cropped", file)
    if match:
        column_name = match.group(1)
    df = read_raw_data(file)
    ccdf = clean_data(df)
    cdf = outlier_clean(ccdf)
    walking_fig(cdf)
    hindlimb_angle_graph(cdf)
    middf = get_dist(cdf, relevant_dist)  #can change to Most_dists or All_dists
    df1 = get_dist(cdf, relevant_dist)    #can change to Most_dists or All_dists
    ddf = sum_tails(middf) 
    adf = get_angles(cdf, Hindpaw_angle)  #can change to All_Angles
    # Change Remove_0s to True if you want to remove speeds below 0.5
    sav_df = calc_speed_acc(cdf, relevant_bp, fps=60, Remove_0s=False)
    
    ddf_cols = [col for col in ddf.columns if 'dist' in col]
    adf_cols = [col for col in adf.columns if 'angle' in col]
    sav_df_cols = [col for col in sav_df.columns if 'Speed' in col or 'Acceleration' in col or 'Velocity' in col]

    stats_dict = {}
    
    # Calculate descriptive statistics for distance
    for col in ddf_cols:
        col_mean = f"{col}_mean"
        col_median = f"{col}_median"
        col_min = f"{col}_min"
        col_max = f"{col}_max"
        col_std = f"{col}_std"
        
        stats_dict[col_mean] = ddf[col].mean()
        stats_dict[col_median] = ddf[col].median()
        stats_dict[col_min] = ddf[col].min()
        stats_dict[col_max] = ddf[col].max()
        stats_dict[col_std] = ddf[col].std()
    
    # Calculate descriptive statistics for angles
    for col in adf_cols:
        col_mean = f"{col}_mean"
        col_median = f"{col}_median"
        col_min = f"{col}_min"
        col_max = f"{col}_max"
        col_std = f"{col}_std"
        
        stats_dict[col_mean] = adf[col].mean()
        stats_dict[col_median] = adf[col].median()
        stats_dict[col_min] = adf[col].min()
        stats_dict[col_max] = adf[col].max()
        stats_dict[col_std] = adf[col].std()
    
    for col in sav_df_cols:
        col_mean = f"{col}_mean"
        col_median = f"{col}_median"
        col_min = f"{col}_min"
        col_max = f"{col}_max"
        col_std = f"{col}_std"

        stats_dict[col_mean] = sav_df[col].mean()
        stats_dict[col_median] = sav_df[col].median()
        stats_dict[col_min] = sav_df[col].min()
        stats_dict[col_max] = sav_df[col].max()
        stats_dict[col_std] = sav_df[col].std()

    # Create DataFrame from statistics
    stats_df = pd.DataFrame(stats_dict, index=[column_name])
    
    # Append the statistics DataFrame to the last_df
    last_df = pd.concat([last_df, stats_df])

# Transpose the last_df DataFrame
last_df = last_df.T
last_df.to_excel('Data_File_Name.xlsx', index=True)