### Notebook for preprocessing and analysis for One-handed Task
- preprocess data
- run SVD
- Ouptout overview Graphs as png and Results as Csv

In [None]:
import errno  # handy system and path functions
import glob
import locale
from matplotlib.lines import Line2D
import matplotlib.patches as mpatches
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mpl_toolkits.mplot3d import Axes3D

import numpy as np
import os

import pandas as pd
import sys  # to get file system encoding
import scipy.linalg
from scipy import signal
from scipy import stats
import seaborn as sns
from sklearn import preprocessing


# set Matplotlib parameters globally
params = {'legend.fontsize': 12,
          'legend.handlelength': 2,
         'figure.autolayout': True,
         'font.serif' : 'Ubuntu',
         'font.family' : 'serif',
         'font.monospace' : 'Ubuntu Mono',
         'font.size' : 12,
         'axes.labelsize' : 18,
         'axes.titlesize' : 20,
         'xtick.labelsize' : 20,
         'ytick.labelsize' : 20,
         'figure.titlesize' : 12,
         'figure.dpi' : 300}

rcParams.update(params)


# set Seaborn parameters globally
sns.set_context("paper")
sns.set_style('whitegrid')
sns.set_palette('colorblind')

In [None]:
# script containing utility functions to preprocess and analyse data
%run ba_analysis_1h_functions.py

In [None]:
marker_dict.values()

In [None]:
def get_model(dimensions, event_file, data_file):
    '''function to extract and resample movement events,
    calculate mean and variance of trahectories for each kinematic dimension
    and zero-center trajectories for further use in a SVD
    
    input: 
        dimensions = dimensionality of a model; One dimension consists of 1 kinematic dimension + one cartesian dimension
                    e.g. full scale model for right arm:
                    dimensions = right_hand_marker[:1] + right_wrist + right_lower_arm  + right_upper_arm + right_shoulder 
        event_file = dataframe containing information on which trials to include
        data_file = Dataframe containing all recorded movement data
        
    output:
        model containing zero-centered trajectories for specified dimensions
    '''
    df_model_a = pd.DataFrame()

    for i in dimensions:
        i = str(i)

        marker = ['marker_'+i+'_x', 'marker_'+i+'_y', 'marker_'+i+'_z']

        df_x, df_y, df_z = extract_and_resample(event_file, data_file, marker)
        x_mean_dimension, x_variance_dimension, x_df_corrected = get_corrected('x', df_x)
        y_mean_dimension, y_variance_dimension, y_df_corrected = get_corrected('y', df_y)
        z_mean_dimension, z_variance_dimension, z_df_corrected = get_corrected('z', df_z)

        reshaped_df = reshape(x_df_corrected, y_df_corrected, z_df_corrected)

        for idx, j in enumerate(reshaped_df.columns):
            df_model_a[str(marker[idx])] = reshaped_df[j]
    return df_model_a

In [None]:
def specify_model(model, parameters):
    '''
    Function to further specify which dimensions of full scale model created by get_model function 
    to use for further analysis.
    
    input:
        model = model containing zero-centered trajectories for dimensions of interest
        parameters = list of ones and zeros indicating which kinnematich degree of freedom to extract
        
    output:
        list of parameters by which Dataframe containing full scale model can be truncated.
    '''
    model_parameters = []
    for idx, i in enumerate(parameters):  
        if i == 1:  # if parameter should be included it is encoded with a 1
            model_parameters.append(model.columns[idx])
    return model_parameters
    

In [None]:
def analysis_and_plotting(marker_, pos, pos_name, subject_id, path, data_file):
    '''
    Output Trajectories, Variance and Velocity of Specified hand as csv and plot them.
    Perform SVD and plot mean loadings of first vector of U and Vt Matrix.
    
    Input:
        marker_ = Integer indicating which markers trajectory should be extracted
        pos= DataFrame containing all events for specific target location
        pos_name = identifier of target position
        subject_id = subject identifier
        data_file = Dataframe containing all recorded movement data of one subject
    
    '''
    # create subj directory
    directory = path+'/graphs/1H/'+subject_id+'/'+pos_name
    try:
        os.makedirs(directory)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise
    # create directory for ouputs on  velocity
    try:
        os.makedirs(directory+'/velocity/')
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise
    # create directory for ouputs on  trajectories
    try:
        os.makedirs(directory+'/trajectories/')
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise
    
    i = marker_ #  marker of specified hand
    # define Marker as dictionary with entry for marker in each cartesian dimension
    marker_ =['marker_'+i+'_x', 'marker_'+i+'_y', 'marker_'+i+'_z']
    

    # extract specifics of target postion
    box_pos_x = -1*(pos['box_pos_x'][0])
    box_pos_z_r = pos['box_pos_z'][0]
    box_pos_y_r = pos['box_pos_y'][0]
    print('!!!!! Traget Pos: z = '+ str(box_pos_z_r)+ ' ; y = '+ str(box_pos_y_r) )
    
    # Ectract and resample movement date for trials with specififed target position for specified marker
    df_x_r, df_y_r, df_z_r = extract_and_resample(pos, data_file, marker_)
    # extract mean trajectory, variance, and zero-centered trajectory for each cartesian dimension
    x_mean_dimension_r, x_variance_dimension_r, x_df_corrected_r = get_corrected('x', df_x_r)
    y_mean_dimension_r, y_variance_dimension_r, y_df_corrected_r = get_corrected('y', df_y_r)
    z_mean_dimension_r, z_variance_dimension_r, z_df_corrected_r = get_corrected('z', df_z_r)

    # output extracted data as csvs
    df_trajectories = pd.DataFrame()
    df_trajectories['x_dim_'+subject_id+'_'+pos_name] = x_mean_dimension_r
    df_trajectories['y_dim_'+subject_id+'_'+pos_name] = y_mean_dimension_r
    df_trajectories['z_dim_'+subject_id+'_'+pos_name] = z_mean_dimension_r
    df_trajectories['x_dim_var_'+subject_id+'_'+pos_name] = x_variance_dimension_r
    df_trajectories['y_dim_var_'+subject_id+'_'+pos_name] = y_variance_dimension_r
    df_trajectories['z_dim_var_'+subject_id+'_'+pos_name] = z_variance_dimension_r
    df_trajectories['box_pos_x'] = box_pos_x
    df_trajectories['box_pos_z'] = box_pos_z_r
    df_trajectories['box_pos_y'] = box_pos_y_r
    df_trajectories.to_csv(path+'/graphs/1H/'+subject_id+'/'+pos_name+'/trajectories/'+pos_name+'_trajectories.csv', index=False, encoding='utf-8')
    reshaped_df_r = reshape(x_df_corrected_r, y_df_corrected_r, z_df_corrected_r)
    reshaped_df_r.to_csv(path+'/graphs/1H/'+subject_id+'/'+pos_name+'/trajectories/'+pos_name+'_corrected_trajectories.csv', index=False, encoding='utf-8')

    # Transform data into np-arrays for easier plotting
    x = np.array(x_mean_dimension_r)
    z=  np.array(z_mean_dimension_r)
    y = np.array(y_mean_dimension_r)
    
    # lower boundaries of variance
    x_l = x - np.array(x_variance_dimension_r)
    z_l = z - np.array(z_variance_dimension_r)
    y_l  = y - np.array(y_variance_dimension_r)

    # upper boundaries of variance
    x_u = x + np.array(x_variance_dimension_r)
    z_u = z + np.array(z_variance_dimension_r)
    y_u  = y + np.array(y_variance_dimension_r)

    
    ## Plot Velocity for specifed marker in each cartesian dimension
    df_velocities = pd.DataFrame()
    # RIGHT HAND
    fig = plt.figure(figsize=(10,5))
    velocity = np.diff(x_mean_dimension_r)/np.diff(range(len(x_mean_dimension_r)))
    print(pos_name)
    print(subject_id)
    df_velocities['x_dim_velocity_'+str(subject_id)+'_'+str(pos_name)] = velocity
    plt.plot(range(len(velocity)),velocity, color='black', label='Mittlere Beschleunigung')
    plt.ylim(-0.05,0.05)
    plt.xlabel('Zeit/Samples')
    plt.ylabel('Beschleunigung(m/sample)')   
    plt.title('Mittlere Beschleunigung in X-Dimension; Targetposition: '+ str(box_pos_x))
    plt.legend()
    fig.savefig(path+'/graphs/1H/'+subject_id+'/'+pos_name+'/velocity/'+pos_name+'_velocity_x.png')
    plt.show()

    fig = plt.figure(figsize=(10,5))
    velocity = np.diff(y_mean_dimension_r)/np.diff(range(len(y_mean_dimension_r)))
    df_velocities['y_dim_velocity_'+(subject_id)+'_'+str(pos_name)] = velocity
    plt.plot(range(len(velocity)),velocity, color='black', label='Mittlere Beschleunigung')
    plt.ylim(-0.05,0.05)
    plt.xlabel('Zeit/Samples')
    plt.ylabel('Beschleunigung(m/sample)')
    plt.title('Mittlere Beschleunigung in Y-Dimension; Targetposition:'+ str(box_pos_y_r))
    plt.legend()
    fig.savefig(path+'/graphs/1H/'+subject_id+'/'+pos_name+'/velocity/'+pos_name+'_velocity_y.png')
    plt.show()

    fig = plt.figure(figsize=(10,5))
    velocity = np.diff(z_mean_dimension_r)/np.diff(range(len(z_mean_dimension_r)))
    df_velocities['z_dim_velocity_'+(subject_id)+'_'+str(pos_name)] = velocity
    plt.plot(range(len(velocity)),velocity, color='black', label='Mittlere Beschleunigung')
    plt.ylim(-0.05,0.05)
    plt.xlabel('Zeit/Samples')
    plt.ylabel('Beschleunigung(m/sample)')
    plt.title('Mittlere Beschleunigung in Z-Dimension; Targetposition:'+ str(box_pos_z_r))
    plt.legend()
    fig.savefig(path+'/graphs/1H/'+subject_id+'/'+pos_name+'/velocity/'+pos_name+'_velocity_z.png')
    plt.show()

    # save velocity data as csv
    df_velocities.to_csv(path+'/graphs/1H/'+subject_id+'/'+pos_name+'/velocity/'+pos_name+'_velocities.csv', index=False, encoding='utf-8')
    
    

    ## PLOT TRAJECTORIES 2D    
    ## X_DIM
    # plot trajectory
    fig = plt.figure(figsize=(14,7))
    plt.plot( range(0,len(x_mean_dimension_r)), 
                            x_mean_dimension_r, color='crimson', label='Handtrajektorie')
    # PLOT Single trials as shadows
    for i in df_x_r.columns:
        plt.plot(range(len(df_x_r[i])),df_x_r[i], alpha=0.2, color='grey')

    # Plot variance
    plt.plot(range(0,len(x_variance_dimension_r)),
                      x_l, color='black', alpha=0.75)
    plt.plot(range(0,len(x_variance_dimension_r)),
             (x_u), color='black', alpha=0.75)
    # FILL IN VARIANCE
    plt.fill_between(range(len(x_mean_dimension_r)), x_mean_dimension_r,(x_l),color='grey',alpha=.2)
    plt.fill_between(range(len(x_mean_dimension_r)), x_mean_dimension_r,x_u, color='grey',alpha=.2)
    plt.title('X-Dimension: Targetposition: '+ str(box_pos_x))
    plt.xlabel('Zeit/Samples')
    plt.ylabel('x-Koordinaten(Meter)')

    # plot target box
    plt.plot((0,0), (box_pos_x-.1, box_pos_x + .1), color='k',marker='x')
    plt.plot((0,len(x_mean_dimension_r)), (box_pos_x + .1, box_pos_x +.1), color='k',marker='x')
    plt.plot((0,len(x_mean_dimension_r)), (box_pos_x-.1, box_pos_x-.1), color='k',marker='x')
    plt.plot((len(x_mean_dimension_r),len(x_mean_dimension_r)), (box_pos_x-.1, box_pos_x + .1), color='k',marker='x')
    plt.ylim(-.1,1.1)

    fig.savefig(path+'/graphs/1H/'+subject_id+'/'+pos_name+'/trajectories/'+pos_name+'_x_coordinates.png') 
    plt.show()

    # Y-DIM
    fig = plt.figure(figsize=(14,7))
    # PLOT TRAJCETORY
    plt.plot( range(0,len(y_mean_dimension_r)), 
                            y_mean_dimension_r, label='Handtrajektorie', color='crimson')
    # PLOT Single trials as shadows
    for i in df_y_r.columns:
        plt.plot(range(len(df_y_r[i])),df_y_r[i], alpha=0.2, label=i, color='grey')

    ## PLOT VARIANCE
    plt.plot(range(0,len(x_variance_dimension_r)),
                      y_l, color='black', alpha=0.75)
    plt.plot(range(0,len(x_variance_dimension_r)),
             (y_u), color='black', alpha=0.75)
    # FILL IN VARIANCE
    plt.fill_between(range(len(x_mean_dimension_r)), y_mean_dimension_r,(y_l),color='grey',alpha=.2)
    plt.fill_between(range(len(y_mean_dimension_r)), y_mean_dimension_r,y_u, color='grey',alpha=.2)

    plt.title('Y-Dimension: Targetposition: '+ str(box_pos_y_r))
    plt.xlabel('Zeit/Samples')
    plt.ylabel('y-Koordinaten(Meter)')


    # plot target box
    plt.plot((0,0), (box_pos_y_r-.1, box_pos_y_r + .1), color='k',marker='x')
    plt.plot((0,len(y_mean_dimension_r)), (box_pos_y_r + .1, box_pos_y_r +.1), color='k',marker='x')
    plt.plot((0,len(y_mean_dimension_r)), (box_pos_y_r-.1, box_pos_y_r-.1), color='k',marker='x')
    plt.plot((len(y_mean_dimension_r),len(y_mean_dimension_r)), (box_pos_y_r-.1, box_pos_y_r + .1), color='k',marker='x')
    plt.ylim(0.8,2)
    
    fig.savefig(path+'/graphs/1H/'+subject_id+'/'+pos_name+'/trajectories/'+pos_name+'_y_coordinates.png')
    plt.show()

    # Z-DIM
    fig = plt.figure(figsize=(14,7))
    # PLOT TRAJCETORY
    plt.plot( range(0,len(z_mean_dimension_r)), 
                            z_mean_dimension_r, label='Handtrajektorie', color='crimson')

    # PLOT Single trials as shadows
    for i in df_z_r.columns:
        plt.plot(range(len(df_z_r[i])),df_z_r[i], alpha=0.2, label=i, color='grey')
        plt.plot(0,1)

    ## PLOT VARIANCE
    plt.plot(range(0,len(x_variance_dimension_r)),
                      z_l, color='black', alpha=0.4)
    plt.plot(range(0,len(x_variance_dimension_r)), (z_u), color='black', alpha=0.4)

    # FILL IN VARIANCE
    plt.fill_between(range(len(x_mean_dimension_r)), z_mean_dimension_r,(z_l),color='grey',alpha=.2)
    plt.fill_between(range(len(y_mean_dimension_r)), z_mean_dimension_r,z_u, color='grey',alpha=.2)

    plt.title('Z-Dimension: Targetposition: '+ str(box_pos_z_r))
    plt.xlabel('Zeit/Samples')
    plt.ylabel('z-Koordinaten(Meter)')

    # plot target box r
    plt.plot((0,0), (box_pos_z_r-.1, box_pos_z_r + .1), color='k',marker='x')
    plt.plot((0,len(z_mean_dimension_r)), (box_pos_z_r + .1, box_pos_z_r +.1), color='k',marker='x')
    plt.plot((0,len(z_mean_dimension_r)), (box_pos_z_r-.1, box_pos_z_r-.1), color='k',marker='x')
    plt.plot((len(z_mean_dimension_r),len(z_mean_dimension_r)), (box_pos_z_r-.1, box_pos_z_r + .1), color='k',marker='x')
    plt.ylim(-.6,.6)
    
    fig.savefig(path+'/graphs/1H/'+subject_id+'/'+pos_name+'/trajectories/'+pos_name+'_z_coordinates.png')

    plt.show()
    
    ### PLOT TRAJECTTORIES IN 3D
    fig = plt.figure(figsize=(16,9))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot(xs=x, ys=z, zs=y, color='crimson', label='Handtrajektorie')
    trajectory_r_x = x
    trajectory_r_z = z
    trajectory_r_y = y
    ax.plot(xs=x_u, ys=z_u, zs=y_u, color='black', alpha=0.6, label='+/- stdm')
    ax.plot(xs=x_l, ys=z_l, zs=y_l, color='black', alpha=0.6)

    for idx, i in enumerate(df_x_r.columns):
        x_single = np.array(df_x_r[df_x_r.columns[idx]])
        z_single=  np.array(df_z_r[df_z_r.columns[idx]])
        y_single = np.array(df_y_r[df_y_r.columns[idx]])
        ax.plot(xs=x_single, ys=z_single, zs=y_single, color='grey', alpha=0.2)

    ax.set_ylim(.5,-.6)
    ax.set_xlim(-0.2,1)
    ax.set_zlim(0.8,2)
    ax.view_init(azim=235)

    ax.set_xlabel('x', fontsize=23)
    ax.set_ylabel('z', fontsize=23)
    ax.set_zlabel('y', fontsize=23)

    plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05),
          fancybox=True, shadow=True)
    fig.savefig(path+'/graphs/1H/'+subject_id+'/'+pos_name+'/trajectories/'+pos_name+'_3D_coordinates.png')

    plt.show()


    ### Plot first mean loading on first vector of U-Matrix
    # define dimensionality in marker space    
    if subject_id == 'sub-05':
        # left_hand_marker = [4,5,6] -> specified in ba_analysis_1h_functions.pyl
        dimensions = left_hand_marker[:1] + left_wrist + left_lower_arm  + left_upper_arm + left_shoulder 
    else:
        dimensions = right_hand_marker[:1] + right_wrist + right_lower_arm  + right_upper_arm + right_shoulder 
#   

    # specify Model for dominant hand only
    model_r = get_model(dimensions, pos, data_file)
    # specify which parameters to extract
    parameters = [1,1,1,0,0,0,0,0,0,0,0,0,0,0,0]  # extract right hand marker in x,y,z dim
    model_parameters = specify_model(model_r, parameters)
    # build model
    model_r = model_r[model_parameters]
    
    # SVD; Extact U, S and Vt-Matrix, + transformed input matrix and variance explained by singular values
    U_r, s_r, S_r, VT_r, mean_loadings_r, stdm_r, concatenated_r, var_explained_r = extract_mean_factor_loading(model_r)
    df_mean_loadings = pd.DataFrame()
    df_mean_loadings[str(subject_id)+'_'+str(pos_name)+'_mean_loading'] = mean_loadings_r

    #plot mean loadings of the first singular vector of the U Matrix
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(14, 8))
    ax.plot(range(len(mean_loadings_r)), mean_loadings_r,color='crimson')
    ax.plot(range(len(stdm_r)), mean_loadings_r+stdm_r, color='black', alpha=0.6)
    ax.plot(range(len(stdm_r)), mean_loadings_r-stdm_r, color='black', alpha=0.6)
    ax.fill_between(range(len(mean_loadings_r)), mean_loadings_r,(mean_loadings_r+stdm_r),color='grey',alpha=.2)
    ax.fill_between(range(len(mean_loadings_r)), mean_loadings_r,mean_loadings_r-stdm_r, color='grey',alpha=.2)
    ax.set_xlabel('Zeit')
    ax.set_ylabel('Mittlere Faktorladung')
    plt.ylim(0,.05)
    plt.legend()
    plt.title('Mittlere Faktorladung auf Vektor Singulärvektor u1')
    fig.savefig(path+'/graphs/1H/'+subject_id+'/'+pos_name+'/trajectories/'+pos_name+'_u1_mean_loadings.png')


    ## PLOT FIRST SINGULAR VECTOR OF Vt-Matrix in 3D
    sv_r = np.abs(VT_r[0,:])

    fig = plt.figure(figsize=(20,12))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot(xs=(0,sv_r[0]), ys=(0,sv_r[2]), zs=(0,sv_r[1]), color='black',
           label = ('[x]='+str(sv_r[0])+' ; [y]='+str(sv_r[1])+' ; [z]='+str(sv_r[2])))
    ax.quiver(0,0, 0,sv_r[0],sv_r[2], sv_r[1], length = 1, arrow_length_ratio=0.1,
              normalize = True, color='black')

    ax.set_xlim(0,1)
    ax.set_ylim(0,1)
    ax.set_zlim(0,1)

    ax.set_xlabel('x', fontsize=23)
    ax.set_ylabel('z', fontsize=23)
    ax.set_zlabel('y', fontsize=23)
    ax.view_init(azim=230)

    plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05),
              fancybox=True, shadow=True)

    plt.title('Erste Singulärvektor v1', y=-0.03)
    fig.savefig(path+'/graphs/1H/'+subject_id+'/'+pos_name+'/trajectories/'+pos_name+'_v1.png')


In [None]:
dir_ = '/home/michael/bachelorarbeit/subjects/*' # input directory; 1H/ -> Sub-01, Sub-02, .. 
path = '/home/michael/bachelorarbeit' # output directory 

subjects_info = pd.DataFrame()  # Setup Dataframe containing handedness and height of subject

for subject in (glob.glob(dir_)): # iterate through subject level folders
    subject_id = subject.split('/')[5]  # subject identifier
    print(subject_id)

    if subject_id == 'sub-05':  # left handed subject
        marker = '5'  # left hand marker
    else:
        marker = '7'  # right hand marker

    for filename in (glob.glob(subject+'/*')):
    # iterate through target position level folders; 1H/ -> Sub-01/ -> r1, r2, ...

        if filename.split('_')[2] == 'parameters':
            # extract subject info
            print('info')
            info = pd.read_csv(filename)
            subjects_info = subjects_info.append(info)

        elif filename.split('-')[3] == '1H' and filename.split('-')[4] == 'events':
            # extract event files for specified target position
            print('events')
            print(filename)
            event_file_1H = pd.read_csv(filename)
        elif filename.split('-')[3] == '1H' and filename.split('-')[4] == 'data':
            # read all recorded movement data for subject; will later be reduced to data for specific events
            print('data')
            print(filename)
            data_file_1H = pd.read_csv(filename)
            
    # group events by target positions
    r1,r2,r3,m1,m2,m3,l1,l2,l3 = group_by_position(event_file_1H)
    positions_list = [r1,r2,r3,m1,m2,m3,l1,l2,l3]  # write event dataframes to dict
    pos_names = ['r1','r2','r3','m1','m2','m3','l1','l2','l3']

    for idx, i in enumerate(positions_list):  # iterate through all events by target for one subject
        # run analysis and plot results
        analysis_and_plotting(marker, i, pos_names[idx], subject_id, path, data_file_1H)