In [1]:
# imports 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
import seaborn as sns
import ruptures as rpt

In [2]:
def convertDF(path): 
    '''converting a dataframe to a new datafrom with float, 5 decimal places and '''

    # reading in file, and skipping top row
    df = pd.read_csv(path, skiprows=1)

    print(df)

    # defining the titles of the rows
    body_loc = df.columns.tolist()

    # creating new empty dataframe
    df_new = pd.DataFrame()


    for part in body_loc:
        if part == 'bodyparts' in part:
            continue
        else:
            df_new[part] = pd.to_numeric(df[part].iloc[1:], errors='coerce').dropna().round(5).astype(float)


    return df_new

In [3]:
df = convertDF(r'X:\RadcliffeE\for_MorganHampton_BoettcherScholar\Subject Case Data\IO_03_09_2023_RSTN_subject1\Processed DLC\csv folder\20230309_b1_d0p4_session001_frontCam-0000DLC_resnet50_IO_2023-03-09_v1Jul26shuffle1_100000.csv')

     bodyparts            PalmBase         PalmBase.1           PalmBase.2  \
0       coords                   x                  y           likelihood   
1            0   427.6842041015625  525.0423583984375  0.14588609337806702   
2            1   427.5551452636719  525.5269775390625   0.1911419928073883   
3            2   428.8374938964844  525.0931396484375  0.17686539888381958   
4            3   427.6429138183594  525.9227905273438   0.1331915706396103   
...        ...                 ...                ...                  ...   
4254      4253   409.2789306640625  529.5111694335938  0.20932647585868835   
4255      4254        410.21484375  528.4324951171875   0.1871751844882965   
4256      4255  409.49407958984375   529.206298828125  0.22666633129119873   
4257      4256   409.3077697753906  529.0945434570312  0.23516501486301422   
4258      4257   409.7013854980469  529.1215209960938   0.1964298039674759   

                    MCP1             MCP1.1               MCP1.

In [4]:
import pandas as pd
import numpy as np

# Copy your DataFrame
cleaned_df = df.copy()

# Get base marker names by stripping '.1' and '.2'
marker_names = set(col.split('.')[0] for col in df.columns if '.2' in col)

for marker in marker_names:
    x_col = marker
    y_col = marker + '.1'
    l_col = marker + '.2'

    # Phase 1: Find first row where likelihood >= 0.8
    first_valid_idx = cleaned_df[cleaned_df[l_col] >= 0.8].index.min()

    # Remove all rows before first high-confidence point
    cleaned_df.loc[:first_valid_idx - 1, [x_col, y_col, l_col]] = np.nan

    # Phase 2: Set x/y to NaN where likelihood < 0.95
    low_conf_mask = cleaned_df[l_col] < 0.95
    cleaned_df.loc[low_conf_mask, [x_col, y_col]] = np.nan

    # Interpolate x and y
    cleaned_df[[x_col, y_col]] = cleaned_df[[x_col, y_col]].interpolate(method='linear', limit_direction='both')

# (Optional) Reset index or drop NaNs if needed
# cleaned_df = cleaned_df.dropna(subset=marker_names)  # if you want to drop completely bad rows

# cleaned_df now has cleaned x/y data based on per-marker likelihoods

cleaned_df


Unnamed: 0,PalmBase,PalmBase.1,PalmBase.2,MCP1,MCP1.1,MCP1.2,MCP2,MCP2.1,MCP2.2,MCP3,...,fTip5.2,MidForeArm,MidForeArm.1,MidForeArm.2,Elbow,Elbow.1,Elbow.2,MidUpperArm,MidUpperArm.1,MidUpperArm.2
1,400.61963,462.06732,,372.84390,448.05020,,418.41714,499.69974,,416.17508,...,,403.50226,498.96652,0.96144,411.58377,520.14423,,,,
2,400.61963,462.06732,,372.84390,448.05020,,418.41714,499.69974,,416.17508,...,,403.92142,498.57404,0.97414,411.58377,520.14423,,,,
3,400.61963,462.06732,,372.84390,448.05020,,418.41714,499.69974,,416.17508,...,,403.67477,499.32172,0.97494,411.58377,520.14423,,,,
4,400.61963,462.06732,,372.84390,448.05020,,418.41714,499.69974,,416.17508,...,,403.82156,498.84067,0.97444,411.58377,520.14423,,,,
5,400.61963,462.06732,,372.84390,448.05020,,418.41714,499.69974,,416.17508,...,,403.57819,499.19968,0.98218,411.58377,520.14423,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4254,421.39304,517.78931,0.20933,401.88693,512.87585,0.94712,419.35519,505.83731,0.99490,433.84900,...,0.04167,399.36777,498.08112,0.95745,412.41174,509.74567,0.05781,,,
4255,421.39304,517.78931,0.18718,401.88693,512.87585,0.93889,419.33099,505.77332,0.99508,434.20978,...,0.03273,399.50476,497.88647,0.95297,412.41174,509.74567,0.06670,,,
4256,421.39304,517.78931,0.22667,401.88693,512.87585,0.94178,419.25885,505.88922,0.99413,433.54303,...,0.03111,399.65521,498.39529,0.95158,412.41174,509.74567,0.06914,,,
4257,421.39304,517.78931,0.23517,401.88693,512.87585,0.94368,418.84924,506.24756,0.99355,433.41946,...,0.02979,399.34567,498.01099,0.96211,412.41174,509.74567,0.06746,,,


In [5]:
# defining my path to pt one
IO_03_09_2023_RSTN = Path(r'X:\RadcliffeE\for_MorganHampton_BoettcherScholar\Subject Case Data\IO_03_09_2023_RSTN_subject1\Processed DLC\csv folder')

# list of all the videos for this specific patient (c1s is missing for this patient)
dbs_loc = ['b1f', 'b1s', 'b2f', 'b2s', 'b3f', 'b3s', 'c1f', 'c1s', 'c2s', 'c3f', 'c3s', 't1f', 't1s', 't2f', 't2s', 't3f', 't3s']

# place to store all the df
dataframes = {}

files = list(IO_03_09_2023_RSTN.iterdir())

from pathlib import Path

# Assuming:
# - pt is a Path object to your directory of .csv or .h5 files
# - dbs_loc is a list of labels (e.g., subject names or conditions)
# - convertDF(file) loads and converts the DataFrame (e.g., to float)

dataframes = {}       # store original data
cleaned_dfs = {}      # store cleaned versions

# Loop over each file and its corresponding label
for file, label in zip(pt.iterdir(), dbs_loc):
    df = convertDF(file)                         # Load + convert
    dataframes[label] = df                       # Save original
    cleaned_dfs[label] = clean_and_interpolate_likelihood(df)  # Clean
    print(f"{label} loaded and cleaned from {file.name}")


NameError: name 'pt' is not defined

In [7]:
b1f = dataframes['b1f']

In [8]:
b1f

Unnamed: 0,PalmBase,PalmBase.1,PalmBase.2,MCP1,MCP1.1,MCP1.2,MCP2,MCP2.1,MCP2.2,MCP3,...,fTip5.2,MidForeArm,MidForeArm.1,MidForeArm.2,Elbow,Elbow.1,Elbow.2,MidUpperArm,MidUpperArm.1,MidUpperArm.2
1,427.68420,525.04236,0.14589,410.07465,512.46381,0.30188,422.62207,512.34229,0.65524,431.66183,...,0.08080,403.50226,498.96652,0.96144,428.74121,521.85260,0.04421,413.70947,508.22092,0.01721
2,427.55515,525.52698,0.19114,416.32242,519.02728,0.33058,422.38318,512.05750,0.62262,432.13925,...,0.04027,403.92142,498.57404,0.97414,428.34988,521.21143,0.06473,413.91519,508.15848,0.01832
3,428.83749,525.09314,0.17687,416.42743,514.66742,0.28389,426.05865,513.35712,0.49559,433.23996,...,0.02557,403.67477,499.32172,0.97494,429.20349,521.05377,0.04931,421.22336,511.42886,0.01794
4,427.64291,525.92279,0.13319,416.71466,518.57947,0.37307,425.62653,513.52405,0.61227,433.05511,...,0.02045,403.82156,498.84067,0.97444,427.55899,520.01587,0.06915,421.07886,510.67938,0.02062
5,426.92892,527.05646,0.13632,416.20743,519.60193,0.46074,425.37756,513.96991,0.67439,433.56567,...,0.02762,403.57819,499.19968,0.98218,426.80386,521.10657,0.07529,427.26505,519.75195,0.01845
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4254,409.27893,529.51117,0.20933,402.01764,512.64917,0.94712,419.35519,505.83731,0.99490,433.84900,...,0.04167,399.36777,498.08112,0.95745,404.47647,510.55844,0.05781,404.61240,507.07516,0.00635
4255,410.21484,528.43250,0.18718,402.41074,512.63715,0.93889,419.33099,505.77332,0.99508,434.20978,...,0.03273,399.50476,497.88647,0.95297,404.35373,510.64697,0.06670,404.78781,507.02295,0.00576
4256,409.49408,529.20630,0.22667,402.52710,512.88135,0.94178,419.25885,505.88922,0.99413,433.54303,...,0.03111,399.65521,498.39529,0.95158,404.72327,510.47168,0.06914,413.33014,512.70062,0.00767
4257,409.30777,529.09454,0.23517,402.19760,512.87628,0.94368,418.84924,506.24756,0.99355,433.41946,...,0.02979,399.34567,498.01099,0.96211,404.98578,510.48285,0.06746,411.86682,519.94336,0.00756


In [35]:
import pandas as pd
import numpy as np

def clean_and_interpolate_likelihood(df, threshold_start=0.8, threshold_interp=0.95):
    cleaned_df = df.copy()

    # Find all base markers from likelihood columns
    marker_names = set(col.split('.')[0] for col in df.columns if col.endswith('.2'))

    for marker in marker_names:
        x_col = marker
        y_col = f"{marker}.1"
        l_col = f"{marker}.2"

        if not all(c in df.columns for c in [x_col, y_col, l_col]):
            continue  # skip incomplete markers

        first_valid_idx = cleaned_df[cleaned_df[l_col] >= threshold_start].index.min()

        if pd.isna(first_valid_idx):
            # no good data — set all x and y to 0
            cleaned_df[[x_col, y_col]] = 0
            continue

        # Set x and y to 0 before first high likelihood frame and likelihood < threshold_start
        early_rows = cleaned_df.index < first_valid_idx
        low_conf_mask = cleaned_df[l_col] < threshold_start
        cleaned_df.loc[early_rows & low_conf_mask, [x_col, y_col]] = 0

        # Set x and y to NaN where likelihood < threshold_interp for interpolation
        interp_mask = cleaned_df[l_col] < threshold_interp
        cleaned_df.loc[interp_mask, [x_col, y_col]] = np.nan

        # Interpolate linearly
        cleaned_df[[x_col, y_col]] = cleaned_df[[x_col, y_col]].interpolate(limit_direction='both')

    return cleaned_df


In [36]:
# defining my path to pt one
pt = Path(r'X:\RadcliffeE\for_MorganHampton_BoettcherScholar\Subject Case Data\IO_03_09_2023_RSTN_subject1\Processed DLC\csv folder')

# list of all the videos for this specific patient (c1s is missing for this patient)
dbs_loc = ['b1f', 'b1s', 'b2f', 'b2s', 'b3f', 'b3s', 'c1f', 'c1s', 'c2s', 'c3f', 'c3s', 't1f', 't1s', 't2f', 't2s', 't3f', 't3s']

files = list(pt.iterdir())

from pathlib import Path

# Assuming:
# - pt is a Path object to your directory of .csv or .h5 files
# - dbs_loc is a list of labels (e.g., subject names or conditions)
# - convertDF(file) loads and converts the DataFrame (e.g., to float)

dataframes = {}       # store original data
cleaned_dfs = {}      # store cleaned versions

# Loop over each file and its corresponding label
for file, label in zip(pt.iterdir(), dbs_loc):
    df = convertDF(file)                         # Load + convert
    dataframes[label] = df                       # Save original
    cleaned_dfs[label] = clean_and_interpolate_likelihood(df)  # Clean
    print(f"{label} loaded and cleaned from {file.name}")


     bodyparts            PalmBase         PalmBase.1           PalmBase.2  \
0       coords                   x                  y           likelihood   
1            0   427.6842041015625  525.0423583984375  0.14588609337806702   
2            1   427.5551452636719  525.5269775390625   0.1911419928073883   
3            2   428.8374938964844  525.0931396484375  0.17686539888381958   
4            3   427.6429138183594  525.9227905273438   0.1331915706396103   
...        ...                 ...                ...                  ...   
4254      4253   409.2789306640625  529.5111694335938  0.20932647585868835   
4255      4254        410.21484375  528.4324951171875   0.1871751844882965   
4256      4255  409.49407958984375   529.206298828125  0.22666633129119873   
4257      4256   409.3077697753906  529.0945434570312  0.23516501486301422   
4258      4257   409.7013854980469  529.1215209960938   0.1964298039674759   

                    MCP1             MCP1.1               MCP1.

In [37]:
cbf1 = cleaned_dfs['b1f']
bf1 = dataframes['b1f']

In [38]:
cbf1

Unnamed: 0,PalmBase,PalmBase.1,PalmBase.2,MCP1,MCP1.1,MCP1.2,MCP2,MCP2.1,MCP2.2,MCP3,...,fTip5.2,MidForeArm,MidForeArm.1,MidForeArm.2,Elbow,Elbow.1,Elbow.2,MidUpperArm,MidUpperArm.1,MidUpperArm.2
1,400.61963,462.06732,0.14589,372.84390,448.05020,0.30188,418.41714,499.69974,0.65524,416.17508,...,0.08080,403.50226,498.96652,0.96144,411.58377,520.14423,0.04421,0,0,0.01721
2,400.61963,462.06732,0.19114,372.84390,448.05020,0.33058,418.41714,499.69974,0.62262,416.17508,...,0.04027,403.92142,498.57404,0.97414,411.58377,520.14423,0.06473,0,0,0.01832
3,400.61963,462.06732,0.17687,372.84390,448.05020,0.28389,418.41714,499.69974,0.49559,416.17508,...,0.02557,403.67477,499.32172,0.97494,411.58377,520.14423,0.04931,0,0,0.01794
4,400.61963,462.06732,0.13319,372.84390,448.05020,0.37307,418.41714,499.69974,0.61227,416.17508,...,0.02045,403.82156,498.84067,0.97444,411.58377,520.14423,0.06915,0,0,0.02062
5,400.61963,462.06732,0.13632,372.84390,448.05020,0.46074,418.41714,499.69974,0.67439,416.17508,...,0.02762,403.57819,499.19968,0.98218,411.58377,520.14423,0.07529,0,0,0.01845
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4254,421.39304,517.78931,0.20933,401.88693,512.87585,0.94712,419.35519,505.83731,0.99490,433.84900,...,0.04167,399.36777,498.08112,0.95745,412.41174,509.74567,0.05781,0,0,0.00635
4255,421.39304,517.78931,0.18718,401.88693,512.87585,0.93889,419.33099,505.77332,0.99508,434.20978,...,0.03273,399.50476,497.88647,0.95297,412.41174,509.74567,0.06670,0,0,0.00576
4256,421.39304,517.78931,0.22667,401.88693,512.87585,0.94178,419.25885,505.88922,0.99413,433.54303,...,0.03111,399.65521,498.39529,0.95158,412.41174,509.74567,0.06914,0,0,0.00767
4257,421.39304,517.78931,0.23517,401.88693,512.87585,0.94368,418.84924,506.24756,0.99355,433.41946,...,0.02979,399.34567,498.01099,0.96211,412.41174,509.74567,0.06746,0,0,0.00756


In [26]:
bf1

Unnamed: 0,PalmBase,PalmBase.1,PalmBase.2,MCP1,MCP1.1,MCP1.2,MCP2,MCP2.1,MCP2.2,MCP3,...,fTip5.2,MidForeArm,MidForeArm.1,MidForeArm.2,Elbow,Elbow.1,Elbow.2,MidUpperArm,MidUpperArm.1,MidUpperArm.2
1,427.68420,525.04236,0.14589,410.07465,512.46381,0.30188,422.62207,512.34229,0.65524,431.66183,...,0.08080,403.50226,498.96652,0.96144,428.74121,521.85260,0.04421,413.70947,508.22092,0.01721
2,427.55515,525.52698,0.19114,416.32242,519.02728,0.33058,422.38318,512.05750,0.62262,432.13925,...,0.04027,403.92142,498.57404,0.97414,428.34988,521.21143,0.06473,413.91519,508.15848,0.01832
3,428.83749,525.09314,0.17687,416.42743,514.66742,0.28389,426.05865,513.35712,0.49559,433.23996,...,0.02557,403.67477,499.32172,0.97494,429.20349,521.05377,0.04931,421.22336,511.42886,0.01794
4,427.64291,525.92279,0.13319,416.71466,518.57947,0.37307,425.62653,513.52405,0.61227,433.05511,...,0.02045,403.82156,498.84067,0.97444,427.55899,520.01587,0.06915,421.07886,510.67938,0.02062
5,426.92892,527.05646,0.13632,416.20743,519.60193,0.46074,425.37756,513.96991,0.67439,433.56567,...,0.02762,403.57819,499.19968,0.98218,426.80386,521.10657,0.07529,427.26505,519.75195,0.01845
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4254,409.27893,529.51117,0.20933,402.01764,512.64917,0.94712,419.35519,505.83731,0.99490,433.84900,...,0.04167,399.36777,498.08112,0.95745,404.47647,510.55844,0.05781,404.61240,507.07516,0.00635
4255,410.21484,528.43250,0.18718,402.41074,512.63715,0.93889,419.33099,505.77332,0.99508,434.20978,...,0.03273,399.50476,497.88647,0.95297,404.35373,510.64697,0.06670,404.78781,507.02295,0.00576
4256,409.49408,529.20630,0.22667,402.52710,512.88135,0.94178,419.25885,505.88922,0.99413,433.54303,...,0.03111,399.65521,498.39529,0.95158,404.72327,510.47168,0.06914,413.33014,512.70062,0.00767
4257,409.30777,529.09454,0.23517,402.19760,512.87628,0.94368,418.84924,506.24756,0.99355,433.41946,...,0.02979,399.34567,498.01099,0.96211,404.98578,510.48285,0.06746,411.86682,519.94336,0.00756
