# Setup

In [1]:
import os
import sys
import re

import pickle

import scipy


import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import statsmodels.api as sm

import kineticstoolkit.lab as ktk

# importing functions from our projects py files
module_path = os.path.abspath(os.path.join('..')) # or the path to your source code
sys.path.insert(0, module_path)

from markers_analysis import markers
from markers_analysis import constants as consts
from markers_analysis import subject as subj



In [2]:
mpl.rcParams.update({'font.size': 14})

# %matplotlib qt
%matplotlib widget

In [3]:
import importlib

import markers_analysis

importlib.reload(markers_analysis)
importlib.reload(consts)
importlib.reload(subj)
importlib.reload(markers)
importlib.reload(markers_analysis.constants)
importlib.reload(markers_analysis.subject)
importlib.reload(markers_analysis.markers)


<module 'markers_analysis.markers' from 'C:\\Users\\noamg\\OneDrive - post.bgu.ac.il\\Documents\\motor learning lab\\GitHub\\Noam-markers-analysis\\markers_analysis\\markers.py'>

## Subject list and paths

In [4]:
# Extracting the path to the needed results and figures (?)
results_path = os.path.join('.', 'pkl')
fig_path = os.path.join('.', 'img')

# delete later: 
print(results_path)
print(fig_path)

.\pkl
.\img


In [5]:
# Entering the data for this figure (subjects and data dates)
basename = 'white_ball_hit '
subject_id_list = ['007', '008', '009', '010', '011', '012', '013', '014']
date_list = ['2023_11_20', '2023_11_20', '2023_11_20', '2023_12_03', '2023_12_03', '2023_12_03', '2023_12_14', '2023_12_14']

# subject_id_list = ['007', '008', '009', '010', '011']
# date_list = ['2023_11_20', '2023_11_20', '2023_11_20', '2023_12_03', '2023_12_03']


In [6]:
interconnections = markers.get_interconnections()
print('interconnections = ', interconnections) # delete later

all_data = []

for subject_id,date in zip(subject_id_list,date_list):
    marker_file_name = os.path.join(results_path, f'id{subject_id} markers.pkl')

    if os.path.exists(marker_file_name):
        # File exists, unpickle the data
        with open(marker_file_name, 'rb') as file:
            subject_data = pickle.load(file)
    else:
        # Process the file
        subject_data = {}

        info = {}
        info['id'] = subject_id
        info['date'] = date
        info['basename'] = basename
        subject_data['info'] = info
        subject_data['data'] = []

        for filenum,filename in subj.find_marker_data_files(date, subject_id):
            data_dict = subj.load_marker_data_file(filename, interconnections)

            data_dict['filenum'] = filenum
            data_dict['filename'] = filename
            subject_data['data'].append(data_dict)
            print(f"************* Done with subject f{subject_data['info']['id']} file number {subject_data['data'][-1]['filenum']}: {subject_data['data'][-1]['filename']}")

        with open(marker_file_name, 'wb') as file:
            pickle.dump(subject_data, file)

    all_data.append(subject_data)

print('\n\nall_data = ','\n', all_data) # delete later


interconnections =  {'Back': {'Color': [0, 0.5, 1], 'Links': [['R_SAE', 'L5/S1', 'R_GR'], ['L_SAE', 'L5/S1', 'L_GR'], ['R_SAE', 'L_SAE'], ['R_GR', 'L_GR']]}, 'LUpperLimb': {'Color': [0, 0.5, 1], 'Links': [['L_SAE', 'L_ELB', 'L_W', 'L_H_M'], ['L_W', 'L_H_L', 'L_H_M']]}, 'RUpperLimb': {'Color': [1, 0.5, 0], 'Links': [['R_SAE', 'R_ELB', 'R_W', 'R_H_M'], ['R_W', 'R_H_L', 'R_H_M']]}, 'Cue': {'Color': [0.5, 1, 0.5], 'Links': [['FQ', 'BQ']]}}


all_data =  
 [{'info': {'id': '007', 'date': '2023_11_20', 'basename': 'white_ball_hit '}, 'data': [{'markers': TimeSeries with attributes:
         time: array([ 0. , 0.01, 0.02, ..., 163.49, 163.5 , 163.51])
         data: <dict with 23 entries>
    time_info: {'Unit': 's'}
    data_info: <dict with 19 entries>
       events: []
, 'interconnections': {'Back': {'Color': [0, 0.5, 1], 'Links': [['R_SAE', 'L5/S1', 'R_GR'], ['L_SAE', 'L5/S1', 'L_GR'], ['R_SAE', 'L_SAE'], ['R_GR', 'L_GR']]}, 'LUpperLimb': {'Color': [0, 0.5, 1], 'Links': [['L_SAE', 'L_ELB'

## Cut data files

This is slow so it is disabled and the data is loaded from the pickled file.

In [7]:
all_cut_data = []

for subject_data in all_data:
    subject_id = subject_data['info']['id']
    date = subject_data['info']['date']

    table_cut_filename = os.path.join(results_path, f"id{subject_id} cut.pkl")
    marker_cut_filename = os.path.join(results_path, f"id{subject_id} table markers cut.pkl")

    with open(table_cut_filename, 'rb') as file:
        table_data = pickle.load(file)
    removed_table_shots = False

    if os.path.exists(marker_cut_filename):
        # File exists, unpickle the data
        with open(marker_cut_filename, 'rb') as file:
            shots_data = pickle.load(file)
    else:
        shots_data = {}
        shots_data['info'] = subject_data['info']
        shots_data['shots'] = []

        for data_dict in subject_data['data']:
            file_data = data_dict["markers"]

            filenum = data_dict["filenum"]
            frames = data_dict["frames"]
            euler = data_dict["euler"]
            euler_vels = data_dict["euler_vels"]

            ts_rename = [frames, euler, euler_vels]
            ts_names = ['frames', 'angles', 'vels']
            for ts, name in zip(ts_rename, ts_names):
                old_names = list(ts.data.keys())
                new_names = [f'{name}_{on}' for on in old_names]
                for o,n in zip(old_names, new_names):
                    ts.rename_data(o, n, in_place=True)
                file_data.merge(ts, in_place=True)

            file_shots = [s for s in table_data['shots'] if s['filenum'] == filenum]

            indices_to_remove = []
            time = np.arange(start=-1, stop=1, step=0.01)
            for s in file_shots:
                # If we've run out of marker file (why??)
                if file_data.time[-1] < s['zero_time']-1:
                    indices_to_remove.append(table_data['shots'].index(s))
                    continue

                start_time = np.max([file_data.time[0], s['zero_time']-1])
                end_time = np.min([file_data.time[-1], s['zero_time']+1])

                shot_ts = file_data.get_ts_between_times(start_time, end_time, inclusive=True)
                shot_ts.shift(-s['zero_time'], in_place=True)

                shot_ts.resample(time, in_place=True)

                shot_data = {}
                shot_data['filenum'] = filenum
                shot_data['shotnum'] = s['shotnum']
                shot_data['start_time'] = start_time
                shot_data['zero_time'] = s['zero_time']
                shot_data['end_time'] = end_time
                
                shot_data['interconnections'] = data_dict["interconnections"]
                shot_data['global_transform'] = data_dict["global_transform"]

                shot_data['ts'] = shot_ts

                shots_data['shots'].append(shot_data)
            
            for i in reversed(indices_to_remove):
                del table_data['shots'][i]
            if indices_to_remove:
                removed_table_shots = True

        with open(marker_cut_filename, 'wb') as file:
            pickle.dump(shots_data, file)

    if removed_table_shots:
        with open(table_cut_filename, 'wb') as file:
            pickle.dump(table_data, file)

    all_cut_data.append({'marker': shots_data, 'table': table_data})

## Plot position and velocity "learning curves"

# Get successes for each participant

In [8]:
mean_success_list = []
for subject_data in all_cut_data:
    #extracting data fo a single subject at a time (?)
    marker_data = subject_data['marker']
    table_data = subject_data['table']
    
    # taking only the "good" shots
    hits = subj.is_hit(table_data['shots'])
    shots_good = [m for i,m in enumerate(marker_data['shots']) if hits[i]]
    
    # df = data frame?
    df_list = []
    # Extracting the second before each successful shot (?)
    for shot in shots_good:
        ts = shot['ts']
        success_test_index = ts.get_index_before_time(0)
        shot_df = ts.to_dataframe().iloc[[success_test_index],:]

        df_list.append(shot_df)

    success_df = pd.concat(df_list, axis=0, ignore_index = True)
    # Take robust mean of this subject's movements
    success_df.fillna(np.inf)
    mean_success = success_df.apply(scipy.stats.trim_mean, proportiontocut=0.1)
    success_df.replace(np.inf, np.nan)

    mean_success_list.append(mean_success)
print('mean_success_list = \n\n', mean_success_list) #delete later

mean_success_df = pd.concat(mean_success_list, axis=1).T


mean_success_list = 

 [R_SAE[0]          3.151211e-01
R_SAE[1]         -9.274250e-01
R_SAE[2]          3.779596e-01
R_SAE[3]          1.000000e+00
L_SAE[0]          3.206746e-01
                      ...     
vels_R_Hand[1]    2.691372e+01
vels_R_Hand[2]    3.971713e-14
vels_L_Hand[0]    1.592871e+01
vels_L_Hand[1]   -9.063265e-01
vels_L_Hand[2]    1.743126e-14
Length: 268, dtype: float64, R_SAE[0]          4.274861e-01
R_SAE[1]         -6.490156e-01
R_SAE[2]          3.941518e-01
R_SAE[3]          1.000000e+00
L_SAE[0]                   NaN
                      ...     
vels_R_Hand[1]   -5.372448e+01
vels_R_Hand[2]    5.580036e-14
vels_L_Hand[0]    1.644047e+00
vels_L_Hand[1]   -1.433044e+01
vels_L_Hand[2]   -3.674891e-14
Length: 268, dtype: float64, R_SAE[0]          4.310046e-01
R_SAE[1]         -6.875490e-01
R_SAE[2]          4.726558e-01
R_SAE[3]          1.000000e+00
L_SAE[0]          2.318620e-01
                      ...     
vels_R_Hand[1]   -2.610764e+01
vels_R_Hand[2]   -1

In [9]:
# import statsmodels.api as sem_dfs
mean_success_df

Unnamed: 0,R_SAE[0],R_SAE[1],R_SAE[2],R_SAE[3],L_SAE[0],L_SAE[1],L_SAE[2],L_SAE[3],L_H_L[0],L_H_L[1],...,vels_R_Forearm[2],vels_L_Forearm[0],vels_L_Forearm[1],vels_L_Forearm[2],vels_R_Hand[0],vels_R_Hand[1],vels_R_Hand[2],vels_L_Hand[0],vels_L_Hand[1],vels_L_Hand[2]
0,0.315121,-0.927425,0.37796,1.0,0.320675,-0.633386,0.291432,1.0,0.38411,-0.101915,...,141.724914,55.103929,15.068928,38.878619,3763.085292,26.913721,3.971713e-14,15.928707,-0.906326,1.743126e-14
1,0.427486,-0.649016,0.394152,1.0,,,,,0.360678,-0.096608,...,-49.326876,,,,4.116424,-53.724481,5.580036e-14,1.644047,-14.330435,-3.674891e-14
2,0.431005,-0.687549,0.472656,1.0,0.231862,-0.484029,0.353171,1.0,0.356749,0.012737,...,74.822682,-19.544047,-1.551147,-11.511102,1378.212325,-26.107645,-1.927164e-14,-0.442436,-15.418102,4.791454e-15
3,0.464638,-0.696692,0.421281,1.0,0.252353,-0.560158,0.306762,1.0,0.38335,-0.018622,...,-12.757884,-9.251618,11.183655,-3.44741,1968.776651,278.787137,5.534828e-14,14.474297,-5.356126,-2.498582e-15
4,0.363578,-0.58914,0.364884,1.0,0.198114,-0.463033,0.299626,1.0,0.37512,0.011951,...,-23.996865,-20.157387,27.001289,-11.771026,-10.548856,68.969451,1.677357e-14,-1.704285,-8.486765,9.286647e-15
5,0.368402,-0.669238,0.479847,1.0,0.250675,-0.445067,0.417,1.0,0.385638,-0.066673,...,-49.877555,-15.153222,20.680124,-11.406568,-45.570388,28.656316,-6.443258e-15,3.87839,0.225089,6.48651e-15
6,0.368495,-0.66595,0.350206,1.0,0.207131,-0.527947,0.257809,1.0,0.368243,-0.085338,...,5.114029,-27.131417,40.492436,-15.25431,-18.948147,-9.013179,-1.266696e-13,-2.143814,-14.592604,-1.130728e-14
7,0.40438,-0.586681,0.522536,1.0,0.250058,-0.393779,0.451592,1.0,0.360557,-0.046127,...,12.904834,-2.007261,9.47636,0.086986,-23.932886,32.561836,8.131744e-15,-0.444195,-6.409033,-5.959844e-14


## Outlier removal

### Get a dataframe of the time zero locations and velocities

In [10]:
# Creating a list of the 0 time of each hit
time_0_data_list = []
for subject_data in all_cut_data:
    marker_data = subject_data['marker']
    table_data = subject_data['table']
    
    plot_list = []
    for shot in marker_data['shots']:
        ts = shot['ts']
        plot_ts = ts.get_subset(['R_SAE', 'R_ELB', 'vels_R_Arm', 'vels_R_Forearm'])
        plot_index = plot_ts.get_index_before_time(0)

        shot_df = plot_ts.to_dataframe().iloc[[plot_index], :]
        plot_list.append(shot_df)

    plot_df = pd.concat(plot_list, ignore_index=True)

    time_0_data_list.append(plot_df)
print('time_0_data_list\n\n', time_0_data_list) # delete later


time_0_data_list

 [     R_SAE[0]  R_SAE[1]  R_SAE[2]  R_SAE[3]  R_ELB[0]  R_ELB[1]  R_ELB[2]  \
0    0.428135 -0.802446  0.529808       1.0  0.284419 -1.065923  0.463060   
1    0.384406 -0.783440  0.521096       1.0  0.257290 -1.046121  0.440524   
2    0.346966 -0.874794  0.424547       1.0  0.189664 -1.098600  0.322913   
3    0.323300 -0.900121  0.399443       1.0  0.138660 -1.105875  0.296787   
4    0.305348 -0.899220  0.400612       1.0  0.169630 -1.127115  0.288702   
..        ...       ...       ...       ...       ...       ...       ...   
166  0.292773 -0.944836  0.549231       1.0  0.110805 -1.169356  0.446749   
167  0.326767 -0.896925  0.427944       1.0  0.131529 -1.107886  0.387586   
168  0.278206 -0.910558  0.440879       1.0  0.086836 -1.126531  0.378438   
169  0.325536 -0.922016  0.411016       1.0  0.141192 -1.135497  0.336815   
170  0.236461 -0.949823  0.340369       1.0  0.037980 -1.145677  0.247772   

     R_ELB[3]  vels_R_Arm[0]  vels_R_Arm[1]  vels_R_Arm

### Now remove outliers from it

In [11]:
for plot_df in time_0_data_list:

    # Loop over the columns of the original data frame
    for col_name, col_series in plot_df.items():
        # Remove NaNs from the column
        col_series = col_series.dropna()
        
        col_median = col_series.median()
        col_mad = (col_series - col_median).abs().median()


        # Define the lower and upper bounds for outliers
        lower_bound = col_median - 3 * 1.5 * col_mad
        upper_bound = col_median + 3 * 1.5 * col_mad
        # Replace outliers with NaNs

        col_series = col_series.mask((col_series < lower_bound) | (col_series > upper_bound), np.nan)
        # Add the column to the new data frame
        plot_df[col_name] = col_series

print('plot_df\n', plot_df) #delete later

plot_df
     R_SAE[0]  R_SAE[1]  R_SAE[2]  R_SAE[3]  R_ELB[0]  R_ELB[1]  R_ELB[2]  \
0   0.185126 -0.559589  0.636112       1.0  0.197614 -0.871575  0.497740   
1   0.345346 -0.578788  0.550667       1.0  0.314995 -0.904968  0.425478   
2   0.308534 -0.565875  0.594102       1.0  0.282254 -0.882186  0.457293   
3   0.317859 -0.516249  0.572560       1.0  0.353273 -0.809401  0.383827   
4   0.308629 -0.561970  0.577876       1.0  0.262142 -0.877237  0.442245   
..       ...       ...       ...       ...       ...       ...       ...   
66  0.447062 -0.685083  0.433073       1.0  0.396415 -1.022057  0.374215   
67  0.435779 -0.693309  0.423609       1.0  0.381608 -1.031428  0.422269   
68  0.407846 -0.661950  0.487458       1.0  0.391447 -1.000935  0.434660   
69  0.456318 -0.702519  0.395646       1.0  0.413630 -1.042223  0.373983   
70  0.445956 -0.704275  0.404772       1.0  0.377370 -1.040287  0.378527   

    R_ELB[3]  vels_R_Arm[0]  vels_R_Arm[1]  vels_R_Arm[2]  vels_R_Forearm[0]  

# Now get the distance from the time 0 to the average of success

In [12]:
# fig = plt.figure()
# ax = fig.add_subplot(111)

# Function to interpolate NaN values in a column
def interpolate_column(column):
    indices = np.arange(len(column))
    mask = np.isnan(column)
    column[mask] = np.interp(indices[mask], indices[~mask], column[~mask])
    return column

def dist_to_success(df, mean_success, name, dims=[0,1,2]):
    names = [f'{name}[{i}]' for i in dims]
    delta = np.array(
        [df[n] - mean_success[n] for n in names]
    ).T
    np.apply_along_axis(interpolate_column, axis=0, arr=delta)

    return np.linalg.norm(delta, axis=1)


In [13]:
primary_joint_velocity_angles_dict = {
    'Pelvis': 2, 'Thorax': 1, 'Arm': 0, 'Forearm': 0, 'Hand': 1
}

In [15]:
time_0_dist_list = []

for plot_df, (i,mean_success) in zip(time_0_data_list, mean_success_df.iterrows()):
    
    dist_df = pd.DataFrame()

    for j in ['R_SAE', 'R_ELB']:
        dist_df[j] = dist_to_success(plot_df, mean_success, j)

    for j in ['vels_R_Arm', 'vels_R_Forearm']:
        dist_df[j] = dist_to_success(plot_df, mean_success, j, dims=[0])
    
    time_0_dist_list.append(dist_df)


ValueError: array of sample points is empty

In [16]:
# testing what array is missing data - only for right now, delete later
print("mean_success")
print(mean_success)
print("\nplot_df")
print(plot_df)
# 

mean_success
R_SAE[0]          4.274861e-01
R_SAE[1]         -6.490156e-01
R_SAE[2]          3.941518e-01
R_SAE[3]          1.000000e+00
L_SAE[0]                   NaN
                      ...     
vels_R_Hand[1]   -5.372448e+01
vels_R_Hand[2]    5.580036e-14
vels_L_Hand[0]    1.644047e+00
vels_L_Hand[1]   -1.433044e+01
vels_L_Hand[2]   -3.674891e-14
Name: 1, Length: 268, dtype: float64

plot_df
     R_SAE[0]  R_SAE[1]  R_SAE[2]  R_SAE[3]  R_ELB[0]  R_ELB[1]  R_ELB[2]  \
0    0.453381 -0.604005       NaN       1.0  0.494423 -0.898387  0.452567   
1    0.454554 -0.640597  0.422131       1.0  0.473617 -0.933662  0.405643   
2    0.452519 -0.650793  0.441686       1.0  0.469159 -0.944922  0.416845   
3    0.455022 -0.656639       NaN       1.0  0.474828 -0.949423  0.440528   
4         NaN -0.677841       NaN       1.0       NaN -0.963140  0.446518   
..        ...       ...       ...       ...       ...       ...       ...   
194  0.431633 -0.637830  0.378365       1.0  0.440127 -0.9218

In [17]:
upper_arm = vel_dfs['Upper arm'][zero_index]
upper_arm_success = upper_arm[hits].mean()

forearm = vel_dfs['Forearm'][zero_index]
forearm_success = forearm[hits].mean()

dist_df['vels_R_Arm'] = np.abs(upper_arm - upper_arm_success)
dist_df['vels_R_Forearm'] = np.abs(forearm - forearm_success)

NameError: name 'vel_dfs' is not defined

In [None]:
dist_df

In [None]:

fig, axes = plt.subplots(1, 2, figsize=(15, 4))

# Flatten the axes array if needed
axes = axes.flatten()

plot_in_axes = {'R_SAE': 0, 'R_ELB': 0, 'vels_R_Arm': 1, 'vels_R_Forearm': 1}
labels = {'R_SAE': 'Shoulder', 'R_ELB': 'Elbow', 'vels_R_Arm': 'Upper arm', 'vels_R_Forearm': 'Forearm'}
scale = {'R_SAE': 100, 'R_ELB': 100, 'vels_R_Arm': 1, 'vels_R_Forearm': 1}


# Iterate through columns and plot
for col in dist_df.columns:
    ax = axes[plot_in_axes[col]]
    ax.plot(dist_df.index, dist_df[col]*scale[col], label=labels[col], linewidth=2)

axes[0].axvspan(xmin=0, xmax=15, color='#D2B48C', alpha=0.3, label='Baseline')
axes[1].axvspan(xmin=0, xmax=15, color='#D2B48C', alpha=0.3, label='Baseline')

axes[0].set_xlim(0, 200)
axes[0].legend(loc='upper right', fontsize=12)
axes[0].set_title('Joint position')
axes[0].set_xlabel('Shot')
axes[0].set_ylabel('Error (cm)')

axes[1].set_xlim(0, 200)
axes[1].legend(loc='upper right', fontsize=12)
axes[1].set_title('Angular velocity')
axes[1].set_xlabel('Shot')
axes[1].set_ylabel('Error (deg/sec)')


# Adjust layout for better spacing
plt.tight_layout()

fig_savename = os.path.join(fig_path, f"joint learning {subject_id}.png")
plt.savefig(fig_savename, bbox_inches='tight')