In [1]:
from pathlib import Path

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from tqdm import tqdm

In [2]:
class FitRecWindowDataset(Dataset):
    def __init__(
        self,
        df: pd.DataFrame,
        numerical_columns: list,
        categorical_columns: list,
        target_input_column: str,
        target_output_column: str,
        workout_id_column: str = "id",
        window_size: int = 10,
    ):
        # Since all the workoutsequences have identical lengths (300),
        # we can exploit this to create sliding windows efficiently.
        #
        # We first stack all the workout feature sequences together, to form [N, n_features] array.
        # We then pad it from the left, since we will be creating sliding windows
        # right from t = 0 and our window length = 10 (so for the first 9 datapoints, they will be mostly 0s).
        # Finally, we create sliding windows by slicing the padded array.
        #
        # For static features like user, sport and gender ids, we do not duplicate them for each window.
        # Instead, we create a mapping from workout_id to these static features,
        # and then duplicate only workout ids when creating the windows.
        # at runtime, we take the workout id and lookup the static features.

        sequence_length = len(df.iloc[0][target_output_column])

        print("Stacking data arrays...")
        numerical_values_array = np.stack(
            [np.stack(df[col]) for col in numerical_columns], axis=-1
        )
        target_input_array = np.stack(df[target_input_column])
        target_output_array = np.stack(df[target_output_column])
        workout_id_array = np.stack(df["id"])

        print("Padding data arrays...")
        padding_config_num = ((0, 0), (window_size - 1, 0), (0, 0))
        padding_config_target_input = ((0, 0), (window_size, 0))

        numerical_values_array = np.pad(
            numerical_values_array,
            padding_config_num,
            mode="constant",
            constant_values=0,
        )

        target_input_array = np.pad(
            target_input_array,
            padding_config_target_input,
            mode="constant",
            constant_values=0,
        )

        print("Creating workout_id to static feature mapping...")
        self.workout_id_to_indices = {}

        for row in df[[workout_id_column] + categorical_columns].values:
            self.workout_id_to_indices[row[0]] = [
                row[i + 1] for i in range(len(categorical_columns))
            ]

        del df  # free memory

        print("Creating sliding windows...")
        numerical_datapoints = []
        target_output_datapoints = []

        for t in tqdm(range(window_size, sequence_length + window_size)):
            lower_index = t - window_size
            upper_index = t

            num_slice = numerical_values_array[:, lower_index:upper_index, :]
            target_input_slice = target_input_array[:, lower_index:upper_index]
            target_output_value = target_output_array[:, lower_index]

            num_slice = np.concat(
                [num_slice, target_input_slice[:, :, np.newaxis]], axis=-1
            )

            numerical_datapoints.append(num_slice)
            target_output_datapoints.append(target_output_value)

        self.X = np.vstack(numerical_datapoints)
        self.y = np.hstack(target_output_datapoints)
        self.X_ids = np.tile(workout_id_array, sequence_length)

        print("Cleaning up...")

        del numerical_values_array, target_input_array, target_output_array
        del numerical_datapoints, target_output_datapoints, workout_id_array

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        user_idx, sport_idx, gender_idx = self.workout_id_to_indices[self.X_ids[idx]]
        return (
            torch.tensor(self.X[idx], dtype=torch.float32),
            torch.tensor(self.y[idx], dtype=torch.float32),
            torch.tensor(user_idx, dtype=torch.long),
            torch.tensor(sport_idx, dtype=torch.long),
            torch.tensor(gender_idx, dtype=torch.long),
        )

In [14]:
class FitRecShortTerm(nn.Module):
    def __init__(
        self,
        n_users,
        n_sports,
        n_genders,
        seq_length=10,
        embed_dim=5,
        hidden_dim=128,
        dropout_prob=0.1,
    ):
        super(FitRecShortTerm, self).__init__()

        self.seq_length = seq_length

        # 1. Attribute Embeddings (learned from scratch)
        self.user_embedding = nn.Embedding(n_users, embed_dim)
        self.sport_embedding = nn.Embedding(n_sports, embed_dim)
        self.gender_embedding = nn.Embedding(n_genders, embed_dim)

        # 2. LSTM Sequential Processor
        # Input: speed (1) + altitude (1) + prev_hr (1) + 3 embeddings (5 each) = 18 total features
        self.input_dim = 3 + (embed_dim * 3)

        self.lstm = nn.LSTM(
            input_size=self.input_dim,
            hidden_size=hidden_dim,
            num_layers=1,
            batch_first=True,
            dropout=0.0,  # No dropout between LSTM layers since we have only 1 layer
        )

        self.dropout = nn.Dropout(dropout_prob)

        # 4. Final Prediction Layer (Regressor)
        # Using SELU activation as mentioned for the FitRec projection layer
        self.regressor = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.SELU(),
            nn.Dropout(dropout_prob),
            nn.Linear(hidden_dim // 2, 1),
        )

        # --- WEIGHT INITIALIZATION LOGIC ---
        # self._init_weights()

    def _init_weights(self):
        # Initialize LSTM weights
        for name, param in self.lstm.named_parameters():
            if "weight" in name:
                nn.init.xavier_uniform_(param)
            elif "bias" in name:
                nn.init.constant_(param, 0.0)

        # Initialize Linear layers in the regressor
        for m in self.regressor:
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.01)

    def forward(self, seq_data, user_id, sport_id, gender_id):
        """
        seq_data: [batch, 10, 3] -> (std_speed, std_alt, std_prev_hr)
        user_id, sport_id, gender_id: [batch] integers
        """

        # Generate and repeat static embeddings across all 10 time steps
        u_emb = self.user_embedding(user_id).unsqueeze(1).repeat(1, self.seq_length, 1)
        s_emb = (
            self.sport_embedding(sport_id).unsqueeze(1).repeat(1, self.seq_length, 1)
        )
        g_emb = (
            self.gender_embedding(gender_id).unsqueeze(1).repeat(1, self.seq_length, 1)
        )

        # Concatenate sequential and static features into the 18/19-dim vector
        # Order: [Sensor Data, Attributes]
        x = torch.cat([seq_data, u_emb, s_emb, g_emb], dim=2)

        # Pass through LSTM [cite: 238, 282]
        # lstm_out shape: [batch, 10, 64]
        lstm_out, (h_n, c_n) = self.lstm(x)

        # Use the LAST hidden state for short-term prediction [cite: 297]
        # Applying dropout to the hidden state before regression [cite: 352]
        last_hidden = self.dropout(h_n.squeeze(0))

        # Final output (Predicted Heart Rate BPM)
        prediction = self.regressor(last_hidden)

        return prediction

In [4]:
numerical_columns = [
    # "time_elapsed_standardized",
    "altitude_standardized",
    "derived_speed_standardized",
    # "derived_distance_standardized",
]
categorical_columns = ["user_idx", "sport_idx", "gender_idx"]
target_input_column = "heart_rate_standardized"
target_output_column = "heart_rate"
workout_id_column = "id"

columns = (
    numerical_columns
    + categorical_columns
    + [target_input_column, target_output_column, workout_id_column]
)

# Can't read only specific columns from parquet for some goofy reason
df = pd.read_parquet(
    Path("train.parquet"),
    dtype_backend="pyarrow",
)[columns]

In [5]:
n_unique_users = df["user_idx"].nunique()
n_unique_sports = df["sport_idx"].nunique()
n_unique_genders = df["gender_idx"].nunique()

In [15]:
model = FitRecShortTerm(
    n_users=n_unique_users,
    n_sports=n_unique_sports,
    n_genders=n_unique_genders,
)

In [16]:
train_dataset = FitRecWindowDataset(
    df,
    numerical_columns=numerical_columns,
    categorical_columns=categorical_columns,
    target_input_column=target_input_column,
    target_output_column=target_output_column,
)

Stacking data arrays...
Padding data arrays...
Creating workout_id to static feature mapping...
Creating sliding windows...


100%|██████████| 300/300 [00:05<00:00, 51.04it/s]


Cleaning up...


In [20]:
from torch.utils.data import DataLoader

# Hyperparameters
BATCH_SIZE = 2048  # Optimized for short-term prediction stability

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    pin_memory=True if torch.cuda.is_available() else False,
    num_workers=8,  # Set this to roughly half of your physical cores
)

In [21]:
import torch
import torch.optim as optim

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
criterion = torch.nn.MSELoss()

for epoch in range(4):
    # --- TRAINING PHASE ---
    model.train()
    train_loss = 0

    pbar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{4}")
    for x, y, user_idx, sport_idx, gender_idx in pbar:
        optimizer.zero_grad()

        # Forward pass
        outputs = model(x, user_idx, sport_idx, gender_idx)
        loss = criterion(outputs, y)

        # Backward pass
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        pbar.set_postfix({"loss": f"{loss.item():.4f}"})

    avg_train_loss = train_loss / len(train_loader)

    print(f"Epoch {epoch + 1} Summary: Train MSE: {avg_train_loss:.4f}")

Epoch 1/4:   0%|          | 0/11813 [00:00<?, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)
Epoch 1/4:   5%|▍         | 553/11813 [00:37<11:34, 16.22it/s, loss=564.6927]  

In [11]:
bruh = iter(train_loader)
X, y, u, s, g = next(bruh)