In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm
import os
import dataclasses
import sys
import ast

# Add the project root directory to the system path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# Import custom modules
from Model.modules_lstm import LSTMEncoder
from Model.modules_dense_nn import DenseNN, PersonalizedScalarNN
from Model.dbn import DBNModel, DBNConfig
from Model.data import WorkoutDataset, WorkoutDatasetConfig, make_dataloaders
from Model.trainer import Trainer

In [2]:
import dataclasses
import pandas as pd

df = pd.read_feather("../output/endomondo.feather")
df_tmp = df

In [3]:
import numpy as np

def safe_flatten(x):
    if isinstance(x, (list, np.ndarray)):  
        # Flatten the list or array
        if isinstance(x, np.ndarray) and x.ndim > 1:
            return x.flatten().tolist()
        return [item for sublist in x for item in sublist] if isinstance(x, list) else x.tolist()
    else:
        # Return single float as a list with one item
        return [x]

# Apply the function to the column
y = df['heart_rate_normalized'].apply(safe_flatten)

In [4]:
# Convert list-like columns into individual columns per feature if necessary.
X = df[['speed_h', 'speed_v', 'distance']].apply(lambda x: np.concatenate(x.values).ravel(), axis=1)
y = df['heart_rate_normalized'].apply(lambda x: np.concatenate([x]).ravel() if isinstance(x, list) else x)

# Check if the number of samples matches
assert len(X) == len(y), "Mismatched X and y lengths"

In [5]:
from sklearn.model_selection import train_test_split

# Perform the train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(30658,) (30658,)
(7665,) (7665,)


In [6]:
data_config_train = WorkoutDatasetConfig(
    subject_id_column = "userId",
    workout_id_column = "id",
    time_since_start_column ='time_grid',
    time_of_start_column = 'start_dt',
    heart_rate_column = 'heart_rate',
    heart_rate_normalized_column = 'heart_rate_normalized',
    activity_columns = ["speed_h", "speed_v"],
    weather_columns = [],
    history_max_length=512,   
)
data_config_test = dataclasses.replace(data_config_train, chunk_size=None, stride=None)

train_dataset = WorkoutDataset(df_tmp[df_tmp["in_train"]], data_config_train)
test_dataset = WorkoutDataset(df_tmp, data_config_test)

train_dataloader, test_dataloader = make_dataloaders(train_dataset, test_dataset, batch_size=128)

100%|██████████| 30430/30430 [00:09<00:00, 3290.06it/s]
100%|██████████| 30430/30430 [00:02<00:00, 11183.72it/s]
100%|██████████| 38323/38323 [00:05<00:00, 6765.97it/s]
100%|██████████| 38323/38323 [00:11<00:00, 3437.22it/s]


In [None]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, precision_score, recall_score
import os

# Load the LSTM model (DBNModel in your case)
# Assuming the model has already been defined and trained
model = DBNModel(config=dbn_config, workouts_info=df_tmp[["userId", "id"]])

# Load the trained model weights
model.load_state_dict(torch.load('best_model.pt'))
model.eval()  # Set the model to evaluation mode

# Check that the model is ready
print("Model loaded and ready for evaluation.")


In [7]:
from Model.dbn import DBNModel, DBNConfig
from Model.trainer import Trainer

# Define Model Configuration
dbn_config = DBNConfig(
    data_config=data_config_train,
    seq_length=64, 
    learning_rate=1e-3,
    seed=0,
    n_epochs=10,
    lstm_hidden_dim=128,
    lstm_layers=2,
    dbn_hidden_dim=64,
    personalization="none",
    dim_personalization=8,
    subject_embedding_dim=8,
    encoder_embedding_dim=8,
    dropout=0.5,
    device="cuda" if torch.cuda.is_available() else "cpu"
)

# Instantiate the Model
model = DBNModel(
    config=dbn_config,
    workouts_info=df_tmp[["userId", "id"]]
)
model
# Load the trained state dictionary into the model
model.load_state_dict(torch.load('best_model.pt'))

# Set the model to evaluation mode
model.eval()

# Your model is now ready for evaluation or inference

DBNModel(
  (embedding_store): EmbeddingStore(
    (subject_embeddings): Embedding(558, 8, max_norm=5.0)
    (encoder): LSTMEncoder(
      (lstm): LSTM(5, 128, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
      (dropout): Dropout(p=0.5, inplace=False)
      (fc): Linear(in_features=256, out_features=8, bias=True)
      (batch_norm): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (lstm_encoder): LSTMEncoder(
    (lstm): LSTM(5, 128, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
    (dropout): Dropout(p=0.5, inplace=False)
    (fc): Linear(in_features=256, out_features=8, bias=True)
    (batch_norm): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (adafs_soft): AdaFSSoft(
    (controller): ControllerMLP(
      (mlp): MultiLayerPerceptron(
        (mlps): ModuleList(
          (0): Sequential(
            (0): Linear(in_features=1216, out_features=1216, bias=True)
      

In [10]:
import torch
from torch.nn.utils.rnn import pad_sequence

# Assuming X_test is a list of numpy arrays (or lists)
X_test_tensors = [torch.tensor(x, dtype=torch.float32) for x in X_test.values]

# Pad the sequences to the same length
X_test_padded = pad_sequence(X_test_tensors, batch_first=True)

# Ensure padding was successful
print(X_test_padded.shape)


torch.Size([7665, 2158])


In [18]:
# Ensure that the model is on the correct device
device = dbn_config.device  # This is likely set to "cuda" or "cuda:0" if a GPU is available

# Move model to the correct device if it's not already there
model.to(device)

all_predictions = []

with torch.no_grad():
    for batch in test_dataloader:
        # Extract the necessary inputs from the batch dictionary and convert to tensor if needed, then move to the correct device
        activity = torch.tensor(batch['activity']).to(device) if isinstance(batch['activity'], np.ndarray) else batch['activity'].to(device)
        history = torch.tensor(batch['history']).to(device) if isinstance(batch['history'], np.ndarray) else batch['history'].to(device)
        subject_ids = torch.tensor(batch['subject_id']).to(device) if isinstance(batch['subject_id'], np.ndarray) else batch['subject_id'].to(device)
        workout_ids = torch.tensor(batch['workout_id']).to(device) if isinstance(batch['workout_id'], np.ndarray) else batch['workout_id'].to(device)

        # Forward pass through the model
        predictions = model(activity=activity, history=history, subject_ids=subject_ids, workout_ids=workout_ids)
        
        # Collect predictions and move to CPU for further processing
        all_predictions.append(predictions.cpu().numpy())

# Combine all batch predictions into one array
predicted_heart_rates = np.concatenate(all_predictions, axis=0)

print(predicted_heart_rates)



ValueError: too many values to unpack (expected 2)

In [14]:
for batch in test_dataloader:
    print(batch)
    break  # Print just the first batch to check the structure


{'subject_id': array([3905196, 3905196, 3905196, 3905196, 3905196,  653747,  653747,
        653747,  653747,  653747,  653747, 1609501, 1342020,   81753,
        653747,  653747,  260784,  260784,  136171, 2390403, 2390403,
       2390403, 2390403, 2390403, 2390403, 2390403, 2390403, 2390403,
       2390403, 2390403, 2390403, 2390403, 2390403, 2390403, 3471841,
       2390403, 2390403, 2390403, 3471841, 2390403, 3471841, 2390403,
        804068, 2390403, 2390403, 2390403, 2390403, 2390403, 2390403,
       2390403, 2390403, 2390403, 2390403, 2390403, 2390403, 2390403,
         81753, 2390403,   81753, 2390403, 2175400, 2175400, 2390403,
       4025656, 2390403, 4025656, 1818822, 2390403, 2390403, 1818822,
       1732973, 4025656, 2390403, 2390403, 2390403, 1732973, 2390403,
       8582302, 2390403, 8582302, 8582302, 4025656, 2175400, 2390403,
       8582302, 8582302, 2390403, 1818822, 2175400, 1818822, 2175400,
       1818822, 2232554, 1818822, 2175400, 4025656, 1818822, 2390403,
     

In [9]:
def recommend_workouts(predicted_hr, df, threshold=10):
    recommendations = []
    for i, workout in df.iterrows():
        workout_predicted_hr = model(torch.tensor([workout['speed_h'], workout['speed_v'], workout['distance']], dtype=torch.float32)).item()
        if abs(workout_predicted_hr - predicted_hr) < threshold:
            recommendations.append(workout)
    return recommendations

# Example: Generate recommendations for the first test case
recs = recommend_workouts(predicted_heart_rates[0], df)



NameError: name 'predicted_heart_rates' is not defined

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# For simplicity, let's evaluate on the first few test cases
mae = mean_absolute_error([item for sublist in y_test[:5] for item in sublist], predicted_heart_rates[:5])
rmse = mean_squared_error([item for sublist in y_test[:5] for item in sublist], predicted_heart_rates[:5], squared=False)

print(f"MAE: {mae}")
print(f"RMSE: {rmse}")



In [None]:
import matplotlib.pyplot as plt

# Plot actual vs predicted heart rates for the first test case
plt.figure(figsize=(10, 6))
plt.plot([item for sublist in y_test[:1] for item in sublist], label='Actual Heart Rates', marker='o')
plt.plot(predicted_heart_rates[:1], label='Predicted Heart Rates', marker='x')
plt.title('Actual vs Predicted Heart Rates')
plt.xlabel('Time Point Index')
plt.ylabel('Heart Rate')
plt.legend()
plt.show()
