In [154]:
import kineticstoolkit.lab as ktk
import pandas as pd
# Set an interactive backend, not required if already enabled in Spyder
%matplotlib qt5
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.metrics import roc_curve, roc_auc_score
from pyopls import OPLS
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import cross_val_predict, LeaveOneOut
from sklearn.metrics import r2_score, accuracy_score

In [155]:
# Define the path to the CSV file to store the early data
Subejct01_alljoints_path = os.path.join('C:/Users/86153/Desktop/MCSRsystem/SetupV2/exampleplots/', "Subejct01_all_joints_df.csv")
# Save the all_fish_df to the CSV file
Subject01_alljoints_data = pd.read_csv(Subejct01_alljoints_path)
Subejct01_rotation_path = os.path.join('C:/Users/86153/Desktop/MCSRsystem/SetupV2/exampleplots/', "Subejct01_angerror_df.csv")
Subject01_rotation_angles= pd.read_csv(Subejct01_rotation_path)


# Define the path to the CSV file to store the early data
Subejct02_alljoints_path = os.path.join('C:/Users/86153/Desktop/MCSRsystem/SetupV2/exampleplots/', "Subejct02_all_joints_df.csv")
# Save the all_fish_df to the CSV file
Subject02_alljoints_data = pd.read_csv(Subejct02_alljoints_path)
Subejct02_rotation_path = os.path.join('C:/Users/86153/Desktop/MCSRsystem/SetupV2/exampleplots/', "Subejct02_angerror_df.csv")
Subject02_rotation_angles= pd.read_csv(Subejct02_rotation_path)


# Define the path to the CSV file to store the early data
Subejct03_alljoints_path = os.path.join('C:/Users/86153/Desktop/MCSRsystem/SetupV2/exampleplots/', "Subejct03_all_joints_df.csv")
# Save the all_fish_df to the CSV file
Subject03_alljoints_data = pd.read_csv(Subejct03_alljoints_path)
Subejct03_rotation_path = os.path.join('C:/Users/86153/Desktop/MCSRsystem/SetupV2/exampleplots/', "Subejct03_angerror_df.csv")
Subject03_rotation_angles= pd.read_csv(Subejct03_rotation_path)



# OPLS input

In [156]:
# Specify the subject ID dynamically
subject_id = "Subject03"

# Load data dynamically for the specified subject
alljoints_data = globals()[f"{subject_id}_alljoints_data"]
rotation_angles = globals()[f"{subject_id}_rotation_angles"]

# Extract early and late rotation data
Rotation_data_early = alljoints_data[(alljoints_data['Trial ID'] >= 11) & (alljoints_data['Trial ID'] <= 110)]
Rotation_data_late = alljoints_data[(alljoints_data['Trial ID'] >= 211) & (alljoints_data['Trial ID'] <= 310)]

# Select elements for early and late angle errors
early_angerror = rotation_angles[10:110]
late_angerror = rotation_angles[210:310]


OPLS_NoP = 25

In [157]:
# Step 1: Create pivot tables
early_value_data = Rotation_data_early.pivot_table(index='Trial ID', columns='Angle Name', values='Value')
early_euler_data = Rotation_data_early.pivot_table(index='Trial ID', columns='Angle Name', values='Euler')

# Step 2: Combine the two DataFrames into one
early_data = pd.concat([early_value_data, early_euler_data], axis=1)

# Step 3: Rename the columns for clarity
# Create new column names for Value and Euler
value_columns = [f"{angle}_Value" for angle in early_value_data.columns]
euler_columns = [f"{angle}_Euler" for angle in early_euler_data.columns]

# Assign the new column names
early_data.columns = value_columns + euler_columns

In [158]:
# Step 1: Create pivot tables
late_value_data = Rotation_data_late.pivot_table(index='Trial ID', columns='Angle Name', values='Value')
late_euler_data = Rotation_data_late.pivot_table(index='Trial ID', columns='Angle Name', values='Euler')

# Step 2: Combine the two DataFrames into one
late_data = pd.concat([late_value_data, late_euler_data], axis=1)

# Step 3: Rename the columns for clarity
# Create new column names for Value and Euler
value_columns = [f"{angle}_Value" for angle in late_value_data.columns]
euler_columns = [f"{angle}_Euler" for angle in late_euler_data.columns]

# Assign the new column names
late_data.columns = value_columns + euler_columns

In [159]:

from scipy.linalg import svd

def effective_pseudo_determinant(matrix, threshold=1e-10):
    """
    Calculate the pseudo-determinant of a matrix based on its effective rank.
    
    Parameters:
    matrix (np.ndarray): The input matrix.
    threshold (float): Threshold for considering singular values as non-zero.
    
    Returns:
    float: The pseudo-determinant based on effective rank.
    """
    # Compute singular values
    u, s, vh = svd(matrix)
    
    # Filter singular values by threshold to determine effective rank
    effective_singular_values = s[s > threshold]
    
    # Compute pseudo-determinant as the product of these singular values
    pseudo_det = np.prod(effective_singular_values)
    
    return pseudo_det


## early

In [160]:
has_nan = early_data.isna().any().any()
print("Does early_data contain NaN values?", has_nan)
nan_rows = early_data[early_data.isna().any(axis=1)]
print("Rows containing NaN values:")
print(nan_rows)

Does early_data contain NaN values? True
Rows containing NaN values:
          Pelvis[0] max vel_Value  Pelvis[0] move onset_Value  \
Trial ID                                                        
52                       3.031959                   -0.202995   
53                       3.833283                    0.166815   
56                            NaN                   -1.870524   
97                            NaN                   -0.421638   
104                           NaN                   -4.298876   

          Pelvis[1] max vel_Value  Pelvis[1] move onset_Value  \
Trial ID                                                        
52                      10.656412                    3.463677   
53                      -2.765145                    0.670077   
56                            NaN                   -0.825992   
97                            NaN                   -0.041723   
104                           NaN                   -1.857588   

          Pelvis[2]

In [161]:
# Forward-fill rows with NaNs based on the previous row
early_data = early_data.ffill(axis=0)

# Check if there are still any NaNs remaining in the first row
# If the first row has NaNs, they will remain as there is no previous row to copy from
early_data.iloc[0] = early_data.iloc[0].fillna(early_data.iloc[1])

In [162]:
early_cov_matrix = early_data.cov()
early_det_value = np.linalg.det(early_cov_matrix)

In [163]:
opls = OPLS(OPLS_NoP)
self = opls.fit(early_data,early_angerror)
TP = self.T_ortho_ @ np.transpose(self.P_ortho_)
early_reduandant_cov_matrix = np.cov(TP)
early_reduandant_det_value = np.linalg.det(early_reduandant_cov_matrix)
Z = opls.transform(early_data)

early_related_cov_matrix = np.cov(Z)
early_related_det_value = np.linalg.det(early_related_cov_matrix)
pseudo_det = effective_pseudo_determinant(early_reduandant_cov_matrix, threshold=1e-1)
pseudo_det_related = effective_pseudo_determinant(early_related_cov_matrix, threshold=1e-1)
pseudo_efficiency  = pseudo_det/pseudo_det_related

## late

In [164]:
has_nan = late_data.isna().any().any()
print("Does late_data contain NaN values?", has_nan)
nan_rows = late_data[late_data.isna().any(axis=1)]
print("Rows containing NaN values:")
print(nan_rows)


Does late_data contain NaN values? True
Rows containing NaN values:
          Pelvis[0] max vel_Value  Pelvis[0] move onset_Value  \
Trial ID                                                        
291                           NaN                    3.820806   

          Pelvis[1] max vel_Value  Pelvis[1] move onset_Value  \
Trial ID                                                        
291                           NaN                   30.314916   

          Pelvis[2] max vel_Value  Pelvis[2] move onset_Value  \
Trial ID                                                        
291                           NaN                   -3.839345   

          R_Arm[1] max vel_Value  R_Arm[1] move onset_Value  \
Trial ID                                                      
291                    29.657539                   4.863712   

          R_Arm[2] max vel_Value  R_Arm[2] move onset_Value  ...  \
Trial ID                                                     ...   
291               

In [165]:
# Forward-fill rows with NaNs based on the previous row
late_data = late_data.ffill(axis=0)

# Check if there are still any NaNs remaining in the first row
# If the first row has NaNs, they will remain as there is no previous row to copy from
late_data.iloc[0] = late_data.iloc[0].fillna(late_data.iloc[1])

In [166]:
late_cov_matrix = late_data.cov()
late_det_value = np.linalg.det(late_cov_matrix)

In [167]:
opls = OPLS(OPLS_NoP)

self = opls.fit(late_data,late_angerror)
TP = self.T_ortho_ @ np.transpose(self.P_ortho_)
late_reduandant_cov_matrix = np.cov(TP)
late_reduandant_det_value = np.linalg.det(early_reduandant_cov_matrix)
Z = opls.transform(late_data)

late_related_cov_matrix = np.cov(Z)
late_related_det_value = np.linalg.det(late_related_cov_matrix)
pseudo_det_late = effective_pseudo_determinant(late_reduandant_cov_matrix, threshold=1e-1)
pseudo_det_related_late = effective_pseudo_determinant(late_related_cov_matrix, threshold=1e-1)
pseudo_efficiency_late  = pseudo_det_late/pseudo_det_related_late

late_related_cov_matrix = np.cov(Z)
late_related_det_value = np.linalg.det(late_related_cov_matrix)


# Movement complexity

In [168]:
from sklearn.decomposition import PCA

# Perform PCA
pca_early = PCA()
pca_early.fit(early_data)

# Calculate explained variance ratio for each component
early_explained_variance_ratio = pca_early.explained_variance_ratio_

# Determine number of PCs needed to explain 90% of variance
early_cumulative_variance = np.cumsum(early_explained_variance_ratio)
early_num_pcs_90 = np.argmax(early_cumulative_variance >= 0.90) + 1

# Count how many PCs explain more than 1% of variance
early_num_pcs_above_1_percent = np.sum(early_explained_variance_ratio > 0.01)

print(f"Number of PCs needed to explain 90% of variance: {early_num_pcs_90}")
print(f"Number of PCs that explain more than 1% of variance: {early_num_pcs_above_1_percent}")

Number of PCs needed to explain 90% of variance: 7
Number of PCs that explain more than 1% of variance: 11


In [169]:
# Perform PCA
pca_late = PCA()
pca_late.fit(late_data)

# Calculate explained variance ratio for each component
late_explained_variance_ratio = pca_late.explained_variance_ratio_

# Determine number of PCs needed to explain 90% of variance
late_cumulative_variance = np.cumsum(late_explained_variance_ratio)
late_num_pcs_90 = np.argmax(late_cumulative_variance >= 0.90) + 1

# Count how many PCs explain more than 1% of variance
late_num_pcs_above_1_percent = np.sum(late_explained_variance_ratio > 0.01)

print(f"Number of PCs needed to explain 90% of variance: {late_num_pcs_90}")
print(f"Number of PCs that explain more than 1% of variance: {late_num_pcs_above_1_percent}")

Number of PCs needed to explain 90% of variance: 9
Number of PCs that explain more than 1% of variance: 13


In [None]:
# Assuming early_angerror and late_angerror are arrays or lists with 100 elements each.
# Replace `x_values` with your actual x-coordinates if they are defined. If you just want indices as x-values:
x_values = range(100)

# Scatter plot for early_angerror
plt.scatter(x_values, early_angerror, label="Early Angle Error", color='blue')

# Scatter plot for late_angerror
plt.scatter(x_values, late_angerror, label="Late Angle Error", color='red')

# Adding labels and title
plt.xlabel("Index")
plt.ylabel("Angle Error")
plt.title("Scatter Plot of Early and Late Angle Errors")

# Show legend
plt.legend()

# Display the plot
plt.show()

In [171]:

print(f"The Efficiency in the early rotation using Pseudo-Determinant : {pseudo_efficiency}")
print(f"The Efficiency in the late rotation using Pseudo-Determinant : {pseudo_efficiency_late}")

print(f"Number of PCs needed to explain 90% of variance - early: {early_num_pcs_90}")
print(f"Number of PCs that explain more than 1% of variance - early: {early_num_pcs_above_1_percent}")

print(f"Number of PCs needed to explain 90% of variance - late: {late_num_pcs_90}")
print(f"Number of PCs that explain more than 1% of variance - late: {late_num_pcs_above_1_percent}")

The Efficiency in the early rotation using Pseudo-Determinant : 370617288.652938
The Efficiency in the late rotation using Pseudo-Determinant : 4425568984.258379
Number of PCs needed to explain 90% of variance - early: 7
Number of PCs that explain more than 1% of variance - early: 11
Number of PCs needed to explain 90% of variance - late: 9
Number of PCs that explain more than 1% of variance - late: 13


In [172]:
import csv


# Create a dictionary with the parameters, including subject_id
data = {
    'subject_id': subject_id,
    'early_det_value': early_det_value,
    'late_det_value': late_det_value,
    'pseudo_efficiency': pseudo_efficiency,
    'pseudo_efficiency_late': pseudo_efficiency_late,
    'early_num_pcs_90': early_num_pcs_90,
    'early_num_pcs_above_1_percent': early_num_pcs_above_1_percent,
    'late_num_pcs_90': late_num_pcs_90,
    'late_num_pcs_above_1_percent': late_num_pcs_above_1_percent
}

# Define the filename
filename = f'C:/Users/86153/Desktop/MCSRsystem/SetupV2/exampleplots/data/{subject_id}_rotation_analysis.csv'

# Write the data to the CSV file
with open(filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(data.keys())  # Write header
    writer.writerow(data.values())  # Write values

print(f"Data saved to {filename}")


Data saved to C:/Users/86153/Desktop/MCSRsystem/SetupV2/exampleplots/data/Subject03_rotation_analysis.csv


# Compenents test

In [173]:
# Define the range of input values for OPLS
opls_inputs = range(5, 41)

# Empty lists to store the results for each opls input
early_pseudo_det_values = []
late_pseudo_det_values = []
early_efficiency_values = []
late_efficiency_values = []

# Loop through each opls input value
for n_components in opls_inputs:
    # Initialize and fit OPLS model for early data
    opls_early = OPLS(n_components)
    self_early = opls_early.fit(early_data, early_angerror)
    TP_early = self_early.T_ortho_ @ np.transpose(self_early.P_ortho_)
    early_redundant_cov_matrix = np.cov(TP_early)
    
    Z_early = opls_early.transform(early_data)
    early_related_cov_matrix = np.cov(Z_early)
    
    # Compute pseudo-determinants and efficiency for early data
    pseudo_det_early_redundant = effective_pseudo_determinant(early_redundant_cov_matrix, threshold=1e-1)
    pseudo_det_early_related = effective_pseudo_determinant(early_related_cov_matrix, threshold=1e-1)
    pseudo_efficiency_early = pseudo_det_early_redundant / pseudo_det_early_related
    pseudo_early_cov_matrix = effective_pseudo_determinant(early_cov_matrix, threshold=1e-1)
    
    # Store the results for early data
    early_efficiency_values.append(pseudo_efficiency_early)

    # Initialize and fit OPLS model for late data
    opls_late = OPLS(n_components)
    self_late = opls_late.fit(late_data, late_angerror)
    TP_late = self_late.T_ortho_ @ np.transpose(self_late.P_ortho_)
    late_redundant_cov_matrix = np.cov(TP_late)
    
    Z_late = opls_late.transform(late_data)
    late_related_cov_matrix = np.cov(Z_late)
    
    # Compute pseudo-determinants and efficiency for late data
    pseudo_det_late_redundant = effective_pseudo_determinant(late_redundant_cov_matrix, threshold=1e-1)
    pseudo_det_late_related = effective_pseudo_determinant(late_related_cov_matrix, threshold=1e-1)
    pseudo_efficiency_late = pseudo_det_late_redundant / pseudo_det_late_related
    pseudo_late_cov_matrix = effective_pseudo_determinant(late_cov_matrix, threshold=1e-1)
    
    # Store the results for late data
    late_efficiency_values.append(pseudo_efficiency_late)




In [174]:
# Plot the efficiencies
plt.figure(figsize=(10, 6))
plt.plot(opls_inputs, early_efficiency_values, label="Early Efficiency", marker='o')
plt.plot(opls_inputs, late_efficiency_values, label="Late Efficiency", marker='s')

# Labeling the plot
plt.xlabel("OPLS Components")
plt.ylabel("Efficiency")
plt.title("Efficiency vs. OPLS Components")
plt.legend()
plt.grid(True)

# Show the plot
plt.show()