In [2]:
import pandas as pd
from typing import List
from pandas import DataFrame
from tabulate import tabulate
import os

In [3]:
def concat_path(folder_name, file_name):
  return os.path.join(folder_name, file_name)

In [4]:
def current_dir(dir_target: str) -> str:
  current_dir = os.getcwd()
  folder_path = concat_path(current_dir, dir_target)
  return folder_path

In [5]:
def get_csv_files(folder_name: str) -> List[str]:
  running_folder_path = current_dir(folder_name)
  files_in_running_folder = os.listdir(running_folder_path)
  files = sorted([concat_path(running_folder_path, file) for file in files_in_running_folder])

  return files

In [6]:
def edit_date_format(date: str):
  date_split = date.split(' ')
  del date_split[3]
  new_date_format = ' '.join(date_split)

  return new_date_format

In [7]:
def rename_column(df: DataFrame, old_name: str, new_name: str) -> None:
  df.rename(columns={old_name: new_name}, inplace=True)

In [8]:
def drop_column(df: DataFrame, column: str) -> None:
  df.drop(columns=[column], inplace=True)

In [9]:
def sec_to_min(sec: float) -> str:
  min = int(sec // 60)
  sec = int(sec % 60)

  if (len(str(sec)) == 1): sec = f'0{sec}'
  
  return f'{min}:{sec}'


sec_to_min(336.572727)


'5:36'

In [10]:
def convert_activity(activity):
  if (activity == 'Running (Indoor)'):
    return 'indoor'
  return 'outdoor'

In [11]:
runningcsv_files = get_csv_files('running')

df = pd.concat( 
  map(pd.read_csv, runningcsv_files), 
  ignore_index=True
) 

df['Date'].apply(edit_date_format)


rename_column(df, 'Active energy burned(kcal)', 'Energy (kcal)')
rename_column(df, 'Heart rate: Average(count/min)', 'Heart rate: Average(min)')
rename_column(df, 'Heart rate: Maximum(count/min)', 'Heart rate: Maximum(min)')

drop_columns = [
  'Heart rate zone: A Easy (<115bpm)(%)',
  'Heart rate zone: B Fat Burn (115-135bpm)(%)',
  'Heart rate zone: C Moderate Training (135-155bpm)(%)',
  'Heart rate zone: D Hard Training (155-175bpm)(%)',
  'Heart rate zone: E Extreme Training (>175bpm)(%)',
  'Elevation: Ascended(m)',
  'Elevation: Maximum(m)',
  'Elevation: Minimum(m)',
  'METs Average(kcal/hr·kg)',
  'Weather: Humidity(%)',
  'Weather: Temperature(degC)',
]

for column in drop_columns:
  drop_column(df, column)


df['Pace(sec)'] = df['Duration(s)'] / df['Distance(km)']
df['Pace(min)'] = df['Pace(sec)'].apply(sec_to_min)
df['Activity'] = df['Activity'].apply(convert_activity)


column_order = ['Date', 'Energy (kcal)', 'Activity', 'Distance(km)',  'Duration(s)', 'Pace(min)', 'Pace(sec)', 'Heart rate: Average(min)', 'Heart rate: Maximum(min)']
df = df[column_order]

df



Unnamed: 0,Date,Energy (kcal),Activity,Distance(km),Duration(s),Pace(min),Pace(sec),Heart rate: Average(min),Heart rate: Maximum(min)
0,2566-10-02 18:20:31 - 2566-10-02 18:39:08,183.992,indoor,3.300,1110.690,5:36,336.572727,164.525,182.0
1,2566-10-03 18:34:49 - 2566-10-03 19:03:43,267.510,indoor,4.631,1694.955,6:06,366.001943,166.159,197.0
2,2566-10-04 18:09:39 - 2566-10-04 18:53:54,434.123,indoor,7.388,2298.437,5:11,311.104088,158.990,189.0
3,2566-10-05 10:06:21 - 2566-10-05 10:34:35,306.220,indoor,5.565,1674.791,5:00,300.950764,160.902,183.0
4,2566-10-05 17:32:16 - 2566-10-05 17:40:01,80.592,indoor,1.488,462.267,5:10,310.663306,176.280,188.0
...,...,...,...,...,...,...,...,...,...
83,2567-01-25 18:13:50 - 2567-01-25 18:29:07,167.981,indoor,3.121,895.829,4:47,287.032682,159.672,174.0
84,2567-01-26 17:50:19 - 2567-01-26 18:04:53,175.991,indoor,3.244,874.217,4:29,269.487361,183.846,196.0
85,2567-01-29 17:41:04 - 2567-01-29 18:04:53,282.437,indoor,5.198,1414.299,4:32,272.085225,169.483,178.0
86,2567-01-30 17:33:17 - 2567-01-30 17:56:55,286.131,indoor,5.260,1415.417,4:29,269.090684,168.411,180.0


In [12]:
def filter_value(df: DataFrame, column: str, value: str) -> DataFrame:
  return df[df[column] == value]

In [13]:
outdoor = filter_value(df, 'Activity', 'outdoor')
outdoor.reset_index()

Unnamed: 0,index,Date,Energy (kcal),Activity,Distance(km),Duration(s),Pace(min),Pace(sec),Heart rate: Average(min),Heart rate: Maximum(min)
0,5,2566-10-07 07:57:08 - 2566-10-07 08:17:11,179.592,outdoor,3.041,1059.059,5:48,348.260112,167.339,179.0
1,10,2566-10-14 17:30:23 - 2566-10-14 17:58:10,253.719,outdoor,4.017,1190.477,4:56,296.359721,162.639,186.0
2,18,2566-10-21 17:57:09 - 2566-10-21 18:16:47,147.069,outdoor,2.09,658.899,5:15,315.262679,143.814,178.0
3,19,2566-10-23 08:02:10 - 2566-10-23 08:31:26,267.24,outdoor,4.104,1358.784,5:31,331.087719,154.657,175.0
4,25,2566-10-30 17:41:32 - 2566-10-30 18:15:11,291.367,outdoor,4.016,1298.729,5:23,323.388695,166.435,191.0
5,26,2566-10-31 17:32:28 - 2566-10-31 18:01:14,220.253,outdoor,3.59,1234.876,5:43,343.976602,166.377,190.0
6,27,2566-11-01 17:42:28 - 2566-11-01 18:14:47,267.341,outdoor,4.316,1526.713,5:53,353.733318,157.633,181.0
7,28,2566-11-02 17:43:54 - 2566-11-02 18:26:05,318.283,outdoor,4.612,1608.846,5:48,348.839115,150.512,173.0
8,34,2566-11-13 07:41:12 - 2566-11-13 08:20:22,337.239,outdoor,6.038,2048.237,5:39,339.224412,144.81,169.0
9,35,2566-11-13 17:34:55 - 2566-11-13 18:06:49,273.554,outdoor,5.044,1858.161,6:08,368.390365,163.325,176.0


In [14]:
indoor = filter_value(df, 'Activity', 'indoor')
indoor.reset_index()

Unnamed: 0,index,Date,Energy (kcal),Activity,Distance(km),Duration(s),Pace(min),Pace(sec),Heart rate: Average(min),Heart rate: Maximum(min)
0,0,2566-10-02 18:20:31 - 2566-10-02 18:39:08,183.992,indoor,3.3,1110.69,5:36,336.572727,164.525,182.0
1,1,2566-10-03 18:34:49 - 2566-10-03 19:03:43,267.51,indoor,4.631,1694.955,6:06,366.001943,166.159,197.0
2,2,2566-10-04 18:09:39 - 2566-10-04 18:53:54,434.123,indoor,7.388,2298.437,5:11,311.104088,158.99,189.0
3,3,2566-10-05 10:06:21 - 2566-10-05 10:34:35,306.22,indoor,5.565,1674.791,5:00,300.950764,160.902,183.0
4,4,2566-10-05 17:32:16 - 2566-10-05 17:40:01,80.592,indoor,1.488,462.267,5:10,310.663306,176.28,188.0
5,6,2566-10-09 18:16:16 - 2566-10-09 18:46:27,290.957,indoor,5.355,1801.485,5:36,336.411765,167.655,187.0
6,7,2566-10-10 07:41:58 - 2566-10-10 08:00:30,181.589,indoor,3.135,1077.189,5:43,343.600957,156.487,169.0
7,8,2566-10-10 18:04:01 - 2566-10-10 18:48:12,444.711,indoor,7.471,2476.96,5:31,331.543301,165.09,185.0
8,9,2566-10-11 17:46:21 - 2566-10-11 18:36:16,510.677,indoor,8.81,2861.538,5:24,324.805675,164.115,178.0
9,11,2566-10-16 08:45:26 - 2566-10-16 09:12:00,289.284,indoor,4.97,1515.587,5:04,304.947082,147.262,179.0


In [15]:
def pace_table(data) -> str:
  fastest_pace = df.loc[data]

  table_data = {
    'Date': [fastest_pace['Date']],
    'Energy (kcal)': [fastest_pace['Energy (kcal)']],
    'Activity': [fastest_pace['Activity']],
    'Distance(km)': [fastest_pace['Distance(km)']],
    'Duration(s)': [fastest_pace['Duration(s)']],
    'Pace(min)': [fastest_pace['Pace(min)']],
    'Pace(sec)': [fastest_pace['Pace(sec)']],
    'Heart rate: Average(min)': [fastest_pace['Heart rate: Average(min)']],
    'Heart rate: Maximum(min)': [fastest_pace['Heart rate: Maximum(min)']]
  }

  return tabulate(table_data, headers='keys', tablefmt='pretty')

In [16]:
fastest_pace_outdoor = outdoor['Pace(sec)'].idxmin()
fastest_pace_indoor = indoor['Pace(sec)'].idxmin()

# fastest_outdoor_pace_table = pace_table(fastest_pace_outdoor)
# fastest_indoor_pace_table = pace_table(fastest_pace_indoor)

fastest_outdoor_pace_table = df.loc[fastest_pace_outdoor]
fastest_indoor_pace_table = df.loc[fastest_pace_indoor]

print('The fastest pace outdoor')
print(fastest_outdoor_pace_table)
print()

print('The fastest pace indoor')
print(fastest_indoor_pace_table)

The fastest pace outdoor
Date                        2567-01-18 17:55:43 - 2567-01-18 18:50:13
Energy (kcal)                                                  268.99
Activity                                                      outdoor
Distance(km)                                                    5.077
Duration(s)                                                  1338.707
Pace(min)                                                        4:23
Pace(sec)                                                  263.680717
Heart rate: Average(min)                                      140.719
Heart rate: Maximum(min)                                        193.0
Name: 77, dtype: object

The fastest pace indoor
Date                        2567-01-30 17:33:17 - 2567-01-30 17:56:55
Energy (kcal)                                                 286.131
Activity                                                       indoor
Distance(km)                                                     5.26
Duration(s)     

In [17]:
def date_selector(df: DataFrame, date: str) -> DataFrame:
  return df[df['Date'].astype(str).str.contains(date)]

In [18]:
def mean_column(df: DataFrame, column: str):
  return round(df[column].mean(), 2)

In [25]:
date = '2567-01'
print('Data on', date)

distance = 'Distance(km)'
indoor = 'indoor'
pace = 'Pace(sec)'
outdoor = 'outdoor'

running = date_selector(df, date)

def mean(column: str):
  return mean_column(running, column)

def find_best_data_column(activity: str, column: str):
  activity_condition = (running['Activity'] == activity)
  distance_condition = (running[distance] > 5)
  condition = activity_condition & distance_condition
  filtered_df = running[condition][column]
  try:
    if column == pace:
      return filtered_df.idxmin()
    
    return filtered_df.idxmax()
  except: return None

def best_loc_data(idx, column: str) -> DataFrame:
  if idx is None:
    return None
  if column == pace:
    return sec_to_min(running.loc[idx][column])
  
  return running.loc[idx][column]

avg_distance = mean(distance)
avg_duration = sec_to_min(mean('Duration(s)'))
avg_pace = sec_to_min(mean(pace))
avg_heart_rate = mean('Heart rate: Average(min)')
avg_max_heart_rate = mean('Heart rate: Maximum(min)')

id_fastest_pace_indoor = find_best_data_column(indoor, pace)
fastest_pace_indoor = best_loc_data(id_fastest_pace_indoor, pace)
id_fastest_pace_outdoor = find_best_data_column(outdoor, pace)
fastest_pace_outdoor = best_loc_data(id_fastest_pace_outdoor, pace)


id_longest_distance_indoor = find_best_data_column(indoor, distance)
longest_distance_indoor = best_loc_data(id_longest_distance_indoor, distance)
id_longest_distance_outdoor = find_best_data_column(outdoor, distance)
longest_distance_outdoor = best_loc_data(id_longest_distance_outdoor, distance)

all_distance = running['Distance(km)'].sum()


print(len(running), 'runs')
print(f'{all_distance:.2f} km')
print()
print('Fastest run indoor', fastest_pace_indoor, 'km')
print('Fastest run outdoor', fastest_pace_outdoor, 'km')
print()
print('Longest distance indoor', longest_distance_indoor, 'km')
print('Longest distance outdoor', longest_distance_outdoor, 'km')
print()
print('Average distance', avg_distance, 'km')
print('Average time', avg_duration, 'min')
print('Average pace', avg_pace, 'min')
print('Average heart rate', avg_heart_rate, 'per min')
print('Average max heart rate', avg_max_heart_rate, 'per min')

Data on 2567-01
25 runs
116.07 km

Fastest run indoor 4:29 km
Fastest run outdoor 4:23 km

Longest distance indoor 5.26 km
Longest distance outdoor 7.039 km

Average distance 4.64 km
Average time 21:48 min
Average pace 4:41 min
Average heart rate 167.32 per min
Average max heart rate 181.68 per min
