In [1]:
import pandas as pd

df = pd.read_csv('processed_data/with_ranges_features.csv')
df['Time'] = pd.to_datetime(df['Time'], utc=True, format='ISO8601')
pd.set_option('display.max_rows', 500)

In [2]:
print(df['HeartRateClass'].unique())

[ 4  6  7  8  5  9  3  2 10  1]


In [3]:
def calculate_statistics(group):
    statistics = {}
    start_time =  group['Time'].min()

    # Define here interval of statistics in seconds
    interval = 60
    
    # Iterate over each minute in the first 5 minutes
    for i in range(interval, 301,interval):
        end_time = start_time + pd.Timedelta(seconds=i)
        
        # Filter the data for the current minute
        current_period_data = group[(group['Time'] >= start_time) & (group['Time'] < end_time)]
        
        # Calculate statistics for heart rate, elevation, speed and cadence for the current minute
        statistics[f'HeartRate_mean_second_{i}'] = current_period_data['HeartRate'].mean()
        statistics[f'Elevation_mean_second_{i}'] = current_period_data['Elevation'].mean()
        statistics[f'Cadence_mean_second_{i}'] = current_period_data['Cadence'].mean()
        statistics[f'Speed_mean_second_{i}'] = current_period_data['Speed'].mean()
        statistics[f'HeartRateRange_mean_second_{i}'] = current_period_data['HeartRateRange'].mean()
        statistics[f'HeartRateQuotient_mean_second_{i}'] = current_period_data['HeartRateQuotient'].mean()
        statistics[f'SpeedRange_mean_second_{i}'] = current_period_data['SpeedRange'].mean()
        statistics[f'SpeedQuotient_mean_second_{i}'] = current_period_data['SpeedQuotient'].mean()

        # Additional statistical features
        statistics[f'HeartRate_std_second_{i}'] = current_period_data['HeartRate'].std()
        statistics[f'Elevation_std_second_{i}'] = current_period_data['Elevation'].std()
        statistics[f'Cadence_std_second_{i}'] = current_period_data['Cadence'].std()
        statistics[f'Speed_std_second_{i}'] = current_period_data['Speed'].std()
        
        statistics[f'HeartRate_min_second_{i}'] = current_period_data['HeartRate'].min()
        statistics[f'Elevation_min_second_{i}'] = current_period_data['Elevation'].min()
        statistics[f'Cadence_min_second_{i}'] = current_period_data['Cadence'].min()
        statistics[f'Speed_min_second_{i}'] = current_period_data['Speed'].min()
        
        statistics[f'HeartRate_max_second_{i}'] = current_period_data['HeartRate'].max()
        statistics[f'Elevation_max_second_{i}'] = current_period_data['Elevation'].max()
        statistics[f'Cadence_max_second_{i}'] = current_period_data['Cadence'].max()
        statistics[f'Speed_max_second_{i}'] = current_period_data['Speed'].max()
        
        start_time = end_time

    last_30_seconds_data = group[group['Time'] >= group['Time'].max() - pd.Timedelta(seconds=30)]
    most_occuring_label = last_30_seconds_data['HeartRateClass'].mode().iloc[0]
    statistics['Label'] = most_occuring_label
    statistics['Distance'] = last_30_seconds_data['Distance'].max()
    
    
    return pd.Series(statistics)



df_statistics = df.groupby('RunID').apply(calculate_statistics).reset_index()
df_statistics.dropna(inplace=True)

  df_statistics = df.groupby('RunID').apply(calculate_statistics).reset_index()


In [4]:
print(df_statistics['Label'].unique())

[6. 7. 8. 9. 5. 4. 3.]


In [7]:
df_statistics.shape

(410, 103)

In [6]:
#display(df_statistics)
df_statistics.to_csv("processed_data/train_data_1min.csv", index=False)