In [5]:
import pandas as pd

df = pd.read_csv('processed_data/with_ranges_features.csv')
df['Time'] = pd.to_datetime(df['Time'], utc=True)
pd.set_option('display.max_rows', 500)

In [7]:
print(df['HeartRateClass'].unique())

[ 4  5  6  7  8  9  3  2 10]


In [9]:
def calculate_statistics(group):
    time_range = pd.Timedelta(minutes=5)
    statistics = {}
    
    # Iterate over each minute in the first 5 minutes
    for i in range(1, 6):
        start_time = group['Time'].min() + pd.Timedelta(minutes=i - 1)
        end_time = group['Time'].min() + pd.Timedelta(minutes=i)
        
        # Filter the data for the current minute
        current_minute_data = group[(group['Time'] >= start_time) & (group['Time'] < end_time)]
        
        # Calculate statistics for heart rate, elevation, speed and cadence for the current minute
        statistics[f'HeartRate_mean_minute_{i}'] = current_minute_data['HeartRate'].mean()
        statistics[f'Elevation_mean_minute_{i}'] = current_minute_data['Elevation'].mean()
        statistics[f'Cadence_mean_minute_{i}'] = current_minute_data['Cadence'].mean()
        statistics[f'Speed_mean_minute_{i}'] = current_minute_data['Speed'].mean()
        statistics[f'HeartRateRange_mean_minute_{i}'] = current_minute_data['HeartRateRange'].mean()
        statistics[f'HeartRateQuotient_mean_minute_{i}'] = current_minute_data['HeartRateQuotient'].mean()
        statistics[f'SpeedRange_mean_minute_{i}'] = current_minute_data['SpeedRange'].mean()
        statistics[f'SpeedQuotient_mean_minute_{i}'] = current_minute_data['SpeedQuotient'].mean()

    last_30_seconds_data = group[group['Time'] >= group['Time'].max() - pd.Timedelta(seconds=30)]
    most_occuring_label = last_30_seconds_data['HeartRateClass'].mode().iloc[0]
    statistics['Label'] = most_occuring_label
    
    
    return pd.Series(statistics)



df_statistics = df.groupby('RunID').apply(calculate_statistics).reset_index()
df_statistics.dropna(inplace=True)

In [10]:
print(df_statistics['Label'].unique())

[6. 7. 9. 8. 5. 4.]


In [11]:
#display(df_statistics)
df_statistics.to_csv("processed_data/train_data.csv", index=False)