# LSTM heartrateclass prediction

Inspiration taken from: https://github.com/rikluost/athlete_hr_predict

In [None]:
# load libraries
import os, glob 
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# location of the fit files
fit_path = "../fit_file_csv"
fit_test_path = "../fit_file_test_csv"
graph_path = "../graphs"
os.chdir(fit_path)


In [None]:
# add calculated altitude difference column, and 5sec moving average column. Remove geographical coordinates for privacy.
fit_files = glob.glob("*.csv")
for file in fit_files:
    df = pd.read_csv(fit_path+'/'+file, index_col='timestamp')
    df['alt_difference'] = df['enhanced_altitude'] - df['enhanced_altitude'].shift(1)
    df['rolling_ave_alt'] = df['alt_difference'].rolling(window=5).mean()
    df = df.bfill()
    df = df.drop(['position_lat','position_long'], axis=1, errors='ignore')
    df.to_csv(fit_path+'/'+file)


In [None]:
# Load the entire CSV file
data = pd.read_csv('../with_ranges_features.csv')

# Determine the number of classes and create a global label encoder
label_encoder = LabelEncoder()
label_encoder.fit(data['HeartRateClass'])
num_classes = len(label_encoder.classes_)

# Group by 'RunID'
grouped = data.groupby('RunID')

def preprocess(df):
    # Extract features and label
    features = df[['Latitude', 'Longitude', 'Elevation', 'Distance', 'HeartRate', 'Cadence', 'Speed']]
    label = df['HeartRateClass']
    
    # Encode labels as integers and then convert to categorical
    label = label_encoder.transform(label)
    label = to_categorical(label, num_classes=num_classes)
    
    return features, label

def split_data(df):
    features, label = preprocess(df)
    # Split into training and validation sets
    x_train, x_val, y_train, y_val = train_test_split(features, label, test_size=0.2, random_state=42)
    return x_train, x_val, y_train, y_val

In [None]:
def build_model(num_classes):
    model = keras.Sequential([
        keras.layers.Input(shape=(7,)),  # 7 features
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(num_classes, activation='softmax')  # Multi-class classification
    ])
    return model

In [None]:
# Iterate over each group
for run_id, group in grouped:
    print(f"Processing RunID: {run_id}")
    
    df = group.copy()

    # Split the data into training and validation sets
    x_train, x_val, y_train, y_val = split_data(df)

    # Define callbacks
    es_callback = keras.callbacks.EarlyStopping(monitor="val_loss", min_delta=0, patience=5, verbose=1)
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=f'./logs/run_{run_id}', histogram_freq=1)
    modelckpt_callback = keras.callbacks.ModelCheckpoint(
        monitor="val_loss",
        filepath=f'model_checkpoint_{run_id}.weights.h5',
        verbose=1,
        save_weights_only=True,
        save_best_only=True,
    )

    # Build and compile the model
    model = build_model(num_classes)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), metrics=["accuracy"], loss="categorical_crossentropy")

    # Train the model
    model.fit(
        x_train, y_train,
        validation_data=(x_val, y_val),
        epochs=50,
        callbacks=[es_callback, tensorboard_callback, modelckpt_callback]
    )