In [1]:
import sys
import platform
import os

# Print Python version details
print("Python version:", sys.version)
print("Python version info:", sys.version_info)
print("Platform:", platform.platform())

# Check for Conda environment; CONDA_DEFAULT_ENV is set when using Conda
conda_env = os.environ.get("CONDA_DEFAULT_ENV", "Not running in a Conda environment")
print("Current Conda Environment:", conda_env)


Python version: 3.9.21 (main, Dec 11 2024, 16:35:24) [MSC v.1929 64 bit (AMD64)]
Python version info: sys.version_info(major=3, minor=9, micro=21, releaselevel='final', serial=0)
Platform: Windows-10-10.0.26100-SP0
Current Conda Environment: ML_DL


In [2]:
import pandas as pd

# Read the CSV file into a DataFrame. Adjust the file path if necessary.
df = pd.read_csv("Turning_human_labels.csv")

# Filter for PD patients who are on medication.
pd_on_med = df[(df["PD_or_C"] == "PD") & (df["On_or_Off_medication"] == "On medication")]

# Filter for PD patients who are off medication.
pd_off_med = df[(df["PD_or_C"] == "PD") & (df["On_or_Off_medication"] == "Off medication")]

# Optionally, preview the filtered DataFrames.
print("PD on medication:")
print(pd_on_med.head())

print("\nPD off medication:")
print(pd_off_med.head())


PD on medication:
    Turn ID  Participant ID number PD_or_C  number_of_turning_steps  \
18       39                    842      PD                        2   
19       40                    842      PD                        3   
20       41                    842      PD                        2   
21       42                    842      PD                        3   
23       46                    842      PD                        3   

   turning_angle type_of_turn  turning_duration On_or_Off_medication  \
18   180_degrees   pivot_turn             1.127        On medication   
19   180_degrees    step_turn             1.581        On medication   
20   180_degrees   pivot_turn             1.531        On medication   
21   180_degrees   pivot_turn             1.672        On medication   
23   180_degrees    step_turn             1.332        On medication   

   DBS_state clinical_assessment  
18    On DBS                 Yes  
19    On DBS                 Yes  
20    On DBS     

In [3]:
print(pd_off_med.info())

<class 'pandas.core.frame.DataFrame'>
Index: 214 entries, 133 to 1730
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Turn ID                  214 non-null    int64  
 1   Participant ID number    214 non-null    int64  
 2   PD_or_C                  214 non-null    object 
 3   number_of_turning_steps  214 non-null    int64  
 4   turning_angle            214 non-null    object 
 5   type_of_turn             214 non-null    object 
 6   turning_duration         214 non-null    float64
 7   On_or_Off_medication     214 non-null    object 
 8   DBS_state                214 non-null    object 
 9   clinical_assessment      214 non-null    object 
dtypes: float64(1), int64(3), object(6)
memory usage: 18.4+ KB
None


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Step 1: Load Dataset
df = pd.read_csv("Turning_human_labels.csv")

# Step 2: Filter for PD patients who are ON medication
pd_on_med = df[(df["PD_or_C"] == "PD") & (df["On_or_Off_medication"] == "On medication")]

# Step 3: Select Features & Target
features = ["number_of_turning_steps", "turning_angle", "type_of_turn", "DBS_state", "clinical_assessment"]
target = "turning_duration"  # Continuous target variable

# Step 4: Drop missing values
pd_on_med = pd_on_med.dropna(subset=features + [target])

# Step 5: Store Turn IDs before encoding (to keep original references)
turn_ids = pd_on_med[["Turn ID"]].reset_index(drop=True)

# Step 6: Convert Categorical Features into Numeric Values
pd_on_med_encoded = pd.get_dummies(pd_on_med,
                                   columns=["turning_angle", "type_of_turn", "DBS_state", "clinical_assessment"],
                                   drop_first=True)

# Step 7: Define Features (X) and Target (y)
X = pd_on_med_encoded.drop(columns=["Turn ID", "Participant ID number", target, "PD_or_C", "On_or_Off_medication"],
                           errors="ignore")
y = pd_on_med_encoded[target]  # Continuous target remains unchanged

# Step 8: Train-Test Split (No Oversampling Needed)
X_train, X_test, y_train, y_test, train_index, test_index = train_test_split(
    X, y, turn_ids, test_size=0.2, random_state=42
)

# Step 9: Train RandomForest Regressor
regressor = RandomForestRegressor(random_state=42, n_estimators=100, max_depth=5)
regressor.fit(X_train, y_train)

# Step 10: Make Predictions
y_pred = regressor.predict(X_test)

# Step 11: Evaluate Model (Regression Metrics)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R² Score:", r2)

# Step 12: Save Predictions with Full Row Data
test_results_df = pd_on_med[pd_on_med["Turn ID"].isin(test_index["Turn ID"])].copy()
test_results_df["Actual Turning Duration"] = y_test.values
test_results_df["Predicted Turning Duration"] = y_pred

test_results_df.to_csv("turning_predictions_full.csv", index=False)
print("Full test predictions saved to turning_predictions_full.csv")


Mean Absolute Error (MAE): 0.46585121182625727
Mean Squared Error (MSE): 0.48827671615600593
R² Score: 0.4673081440730761
Full test predictions saved to turning_predictions_full.csv


In [16]:
# Calculate Mean Absolute Percentage Error (MAPE)
test_results_df["Percentage Error"] = abs(test_results_df["Actual Turning Duration"] - test_results_df["Predicted Turning Duration"]) / test_results_df["Actual Turning Duration"] * 100

# Calculate MAPE and Regression Accuracy
mape = test_results_df["Percentage Error"].mean()
regression_accuracy = 100 - mape

# Display results
mape, regression_accuracy



(21.442469954752827, 78.55753004524718)

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Step 1: Load Dataset
df = pd.read_csv("Turning_human_labels.csv")

# Step 2: Filter for PD patients who are ON medication
pd_off_med = df[(df["PD_or_C"] == "PD") & (df["On_or_Off_medication"] == "Off medication")]

# Step 3: Select Features & Target
features = ["number_of_turning_steps", "turning_angle", "type_of_turn", "DBS_state", "clinical_assessment"]
target = "turning_duration"  # Continuous target variable

# Step 4: Drop missing values
pd_off_med = pd_off_med.dropna(subset=features + [target])

# Step 5: Store Turn IDs before encoding (to keep original references)
turn_ids = pd_off_med[["Turn ID"]].reset_index(drop=True)

# Step 6: Convert Categorical Features into Numeric Values
pd_off_med_encoded = pd.get_dummies(pd_off_med,
                                   columns=["turning_angle", "type_of_turn", "DBS_state", "clinical_assessment"],
                                   drop_first=True)

# Step 7: Define Features (X) and Target (y)
X = pd_off_med_encoded.drop(columns=["Turn ID", "Participant ID number", target, "PD_or_C", "On_or_Off_medication"],
                           errors="ignore")
y = pd_off_med_encoded[target]  # Continuous target remains unchanged

# Step 8: Train-Test Split (No Oversampling Needed)
X_train, X_test, y_train, y_test, train_index, test_index = train_test_split(
    X, y, turn_ids, test_size=0.2, random_state=42
)

# Step 9: Train RandomForest Regressor
regressor = RandomForestRegressor(random_state=42, n_estimators=100, max_depth=5)
regressor.fit(X_train, y_train)

# Step 10: Make Predictions
y_pred = regressor.predict(X_test)

# Step 11: Evaluate Model (Regression Metrics)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R² Score:", r2)

# Step 12: Save Predictions with Full Row Data
test_results_df = pd_on_med[pd_on_med["Turn ID"].isin(test_index["Turn ID"])].copy()
test_results_df["Actual Turning Duration"] = y_test.values
test_results_df["Predicted Turning Duration"] = y_pred

test_results_df.to_csv("turning_predictions_full_off_med.csv", index=False)
print("Full test predictions saved to turning_predictions_full.csv")


Mean Absolute Error (MAE): 0.33351384978910287
Mean Squared Error (MSE): 0.15481963389564993
R² Score: 0.7879987209202153
Full test predictions saved to turning_predictions_full.csv


In [19]:
# Calculate Mean Absolute Percentage Error (MAPE)
test_results_df["Percentage Error"] = abs(test_results_df["Actual Turning Duration"] - test_results_df["Predicted Turning Duration"]) / test_results_df["Actual Turning Duration"] * 100

# Calculate MAPE and Regression Accuracy
mape = test_results_df["Percentage Error"].mean()
regression_accuracy = 100 - mape

# Display results
mape, regression_accuracy



(17.35120038792022, 82.64879961207978)

In [20]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from torch.utils.data import DataLoader, TensorDataset

# Step 1: Load Dataset
df = pd.read_csv("Turning_human_labels.csv")

# Step 2: Filter for PD patients who are ON medication
pd_on_med = df[(df["PD_or_C"] == "PD") & (df["On_or_Off_medication"] == "On medication")]

# Step 3: Select Features & Target
features = ["number_of_turning_steps", "turning_angle", "type_of_turn", "DBS_state", "clinical_assessment"]
target = "turning_duration"  # Continuous target variable

# Step 4: Drop missing values
pd_on_med = pd_on_med.dropna(subset=features + [target])

# Step 5: Store Turn IDs before encoding
turn_ids = pd_on_med[["Turn ID"]].reset_index(drop=True)

# Step 6: Convert Categorical Features into Numeric Values
pd_on_med_encoded = pd.get_dummies(pd_on_med,
                                   columns=["turning_angle", "type_of_turn", "DBS_state", "clinical_assessment"],
                                   drop_first=True)

# Step 7: Define Features (X) and Target (y)
X = pd_on_med_encoded.drop(columns=["Turn ID", "Participant ID number", target, "PD_or_C", "On_or_Off_medication"],
                           errors="ignore")
y = pd_on_med_encoded[target]  # Continuous target remains unchanged

# Step 8: Normalize Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 9: Train-Test Split
X_train, X_test, y_train, y_test, train_index, test_index = train_test_split(
    X_scaled, y, turn_ids, test_size=0.2, random_state=42
)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)



In [24]:
# Step 10: Define Neural Network Model
class RegressionModel(nn.Module):
    def __init__(self, input_size):
        super(RegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 1)  # Output single continuous value
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)  # No activation in the output layer
        return x


# Step 11: Model setup
input_size = X_train.shape[1]
model = RegressionModel(input_size)
criterion = nn.MSELoss()  # Mean Squared Error Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 12: Train the Model
epochs = 500
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

Epoch 1/500, Loss: 2.7315
Epoch 2/500, Loss: 0.7676
Epoch 3/500, Loss: 0.4926
Epoch 4/500, Loss: 0.4156
Epoch 5/500, Loss: 0.3720
Epoch 6/500, Loss: 0.3508
Epoch 7/500, Loss: 0.3442
Epoch 8/500, Loss: 0.3377
Epoch 9/500, Loss: 0.3313
Epoch 10/500, Loss: 0.3347
Epoch 11/500, Loss: 0.3323
Epoch 12/500, Loss: 0.3295
Epoch 13/500, Loss: 0.3211
Epoch 14/500, Loss: 0.3230
Epoch 15/500, Loss: 0.3263
Epoch 16/500, Loss: 0.3251
Epoch 17/500, Loss: 0.3155
Epoch 18/500, Loss: 0.3251
Epoch 19/500, Loss: 0.3227
Epoch 20/500, Loss: 0.3187
Epoch 21/500, Loss: 0.3215
Epoch 22/500, Loss: 0.3208
Epoch 23/500, Loss: 0.3172
Epoch 24/500, Loss: 0.3185
Epoch 25/500, Loss: 0.3249
Epoch 26/500, Loss: 0.3306
Epoch 27/500, Loss: 0.3127
Epoch 28/500, Loss: 0.3145
Epoch 29/500, Loss: 0.3185
Epoch 30/500, Loss: 0.3190
Epoch 31/500, Loss: 0.3084
Epoch 32/500, Loss: 0.3071
Epoch 33/500, Loss: 0.3122
Epoch 34/500, Loss: 0.3112
Epoch 35/500, Loss: 0.3086
Epoch 36/500, Loss: 0.3304
Epoch 37/500, Loss: 0.3174
Epoch 38/5

In [25]:

# Step 13: Evaluate Model
model.eval()
y_pred_list = []
y_true_list = []
test_ids_list = list(test_index["Turn ID"])  # Store Turn IDs for later

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        y_pred_list.extend(outputs.numpy().flatten())  # Convert predictions to list
        y_true_list.extend(y_batch.numpy().flatten())  # Convert actual values to list

# Step 14: Compute Regression Metrics
mae = mean_absolute_error(y_true_list, y_pred_list)
mse = mean_squared_error(y_true_list, y_pred_list)
r2 = r2_score(y_true_list, y_pred_list)

print("PyTorch Deep Learning Model Performance:")
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R² Score:", r2)

# Step 15: Save Predictions with Full Row Data
test_results_df = pd_on_med[pd_on_med["Turn ID"].isin(test_ids_list)].copy()
test_results_df["Actual Turning Duration"] = y_true_list
test_results_df["Predicted Turning Duration"] = y_pred_list

test_results_df.to_csv("turning_predictions_deep_learning.csv", index=False)
print("Full test predictions saved to turning_predictions_deep_learning.csv")

PyTorch Deep Learning Model Performance:
Mean Absolute Error (MAE): 0.46613698403040565
Mean Squared Error (MSE): 0.5315549267017103
R² Score: 0.4200932201731026
Full test predictions saved to turning_predictions_deep_learning.csv


In [26]:
# Re-import necessary libraries after execution state reset
import pandas as pd
import numpy as np

# Load the saved deep learning model predictions file
file_path = "turning_predictions_deep_learning.csv"
df = pd.read_csv(file_path)

# Extract actual and predicted values
y_true_array = df["Actual Turning Duration"].values
y_pred_array = df["Predicted Turning Duration"].values

# Avoid division by zero by replacing zero values with a small number
y_true_array = np.where(y_true_array == 0, 1e-8, y_true_array)

# Calculate Mean Absolute Percentage Error (MAPE)
mape = np.mean(np.abs((y_true_array - y_pred_array) / y_true_array)) * 100

# Compute Regression Accuracy as (100 - MAPE)
regression_accuracy = 100 - mape

# Display Accuracy
regression_accuracy


79.29326910215447