In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm
import os
import dataclasses
import sys
import ast

# Add the project root directory to the system path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# Import custom modules
from Model.modules_lstm import LSTMEncoder
from Model.modules_dense_nn import DenseNN, PersonalizedScalarNN
from Model.dbn import DBNModel, DBNConfig
from Model.data import WorkoutDataset, WorkoutDatasetConfig, make_dataloaders
from Model.trainer import Trainer


In [2]:
import dataclasses
import pandas as pd

df = pd.read_feather("../output/endomondo.feather")
df_tmp = df

In [3]:
df.columns

Index(['time_grid', 'heart_rate', 'speed_h', 'speed_v', 'userId', 'id',
       'distance', 'start_dt', 'end_dt', 'heart_rate_normalized', 'in_train',
       'subject_idx'],
      dtype='object')

In [4]:
import pandas as pd
import numpy as np

# Assuming 'df_tmp' is your loaded DataFrame containing the workout data

# 1. Total number of unique users
total_users = df_tmp['userId'].nunique()

# 2. Total number of workouts
total_workouts = df_tmp['id'].nunique()

# 3. Average horizontal and vertical speed per workout
df_tmp['avg_speed_h'] = df_tmp['speed_h'].apply(np.mean)
df_tmp['avg_speed_v'] = df_tmp['speed_v'].apply(np.mean)
average_speed_h = df_tmp['avg_speed_h'].mean()
average_speed_v = df_tmp['avg_speed_v'].mean()

# 4. Average and maximum heart rate per workout
df_tmp['avg_heart_rate'] = df_tmp['heart_rate'].apply(np.mean)
df_tmp['max_heart_rate'] = df_tmp['heart_rate'].apply(np.max)
average_heart_rate = df_tmp['avg_heart_rate'].mean()
max_heart_rate = df_tmp['max_heart_rate'].mean()

# 5. Average workout duration in minutes
df_tmp['workout_duration'] = (pd.to_datetime(df_tmp['end_dt']) - pd.to_datetime(df_tmp['start_dt'])).dt.total_seconds() / 60
average_duration = df_tmp['workout_duration'].mean()

# 6. Total distance covered per workout
df_tmp['total_distance'] = df_tmp['distance'].apply(lambda x: x[-1] if len(x) > 0 else np.nan)
average_total_distance = df_tmp['total_distance'].mean()

# 7. Workout frequency per user
workout_frequency_per_user = df_tmp.groupby('userId')['id'].count().mean()

# 8. Average normalized heart rate
average_normalized_hr = df_tmp['heart_rate_normalized'].apply(np.mean).mean()

# 9. Distribution of workouts in training vs. testing sets
workouts_in_train = df_tmp['in_train'].sum()
workouts_not_in_train = len(df_tmp) - workouts_in_train

# Display the calculated statistics
print(f"Total number of users: {total_users}")
print(f"Total number of workouts: {total_workouts}")
print(f"Average horizontal speed per workout: {average_speed_h:.2f} m/s")
print(f"Average vertical speed per workout: {average_speed_v:.2f} m/s")
print(f"Average heart rate per workout: {average_heart_rate:.2f} BPM")
print(f"Maximum heart rate per workout: {max_heart_rate:.2f} BPM")
print(f"Average workout duration: {average_duration:.2f} minutes")
print(f"Average total distance covered per workout: {average_total_distance:.2f} meters")
print(f"Workout frequency per user: {workout_frequency_per_user:.2f}")
print(f"Average normalized heart rate: {average_normalized_hr:.2f}")
print(f"Workouts in training set: {workouts_in_train}")
print(f"Workouts not in training set: {workouts_not_in_train}")


Total number of users: 558
Total number of workouts: 38323
Average horizontal speed per workout: 2.95 m/s
Average vertical speed per workout: -0.00 m/s
Average heart rate per workout: 149.97 BPM
Maximum heart rate per workout: 168.04 BPM
Average workout duration: 48.54 minutes
Average total distance covered per workout: 8554.33 meters
Workout frequency per user: 68.68
Average normalized heart rate: 0.36
Workouts in training set: 30430
Workouts not in training set: 7893
