In [44]:
import pandas as pd
from typing import List
from pandas import DataFrame
from tabulate import tabulate
import os

In [45]:
def concat_path(folder_name, file_name):
  return os.path.join(folder_name, file_name)

In [46]:
def current_dir(dir_target: str) -> str:
  current_dir = os.getcwd()
  folder_path = concat_path(current_dir, dir_target)
  return folder_path

In [47]:
def get_csv_files(folder_name: str) -> List[str]:
  running_folder_path = current_dir(folder_name)
  files_in_running_folder = os.listdir(running_folder_path)
  files = sorted([concat_path(running_folder_path, file) for file in files_in_running_folder])

  return files

In [48]:
def edit_date_format(date: str):
  date_split = date.split(' ')
  del date_split[3]
  new_date_format = ' '.join(date_split)

  return new_date_format

In [49]:
def rename_column(df: DataFrame, old_name: str, new_name: str) -> None:
  df.rename(columns={old_name: new_name}, inplace=True)

In [50]:
def drop_column(df: DataFrame, column: str) -> None:
  df.drop(columns=[column], inplace=True)

In [51]:
def sec_to_min(sec: float) -> str:
  min = int(sec // 60)
  sec = int(sec % 60)

  if (len(str(sec)) == 1): sec = f'0{sec}'
  
  return f'{min}:{sec}'


sec_to_min(336.572727)


'5:36'

In [52]:
def convert_activity(activity):
  if (activity == 'Running (Indoor)'):
    return 'indoor'
  return 'outdoor'

In [53]:
d = '2566-10-02 18:20:31 - 2566-10-02 18:39:08'
a = list(set(d.split(' ')))
k = [i for i in a if i != '-']

t = '-'.join(k[:2])
v = f'{t} {k[2]}'
v


'18:20:31-2566-10-02 18:39:08'

In [54]:
runningcsv_files = get_csv_files('running')

df = pd.concat( 
  map(pd.read_csv, runningcsv_files), 
  ignore_index=True
)

rename_column(df, 'Active energy burned(kcal)', 'Energy (kcal)')
rename_column(df, 'Heart rate: Average(count/min)', 'Heart rate: Average(min)')
rename_column(df, 'Heart rate: Maximum(count/min)', 'Heart rate: Maximum(min)')

drop_columns = [
  'Heart rate zone: A Easy (<115bpm)(%)',
  'Heart rate zone: B Fat Burn (115-135bpm)(%)',
  'Heart rate zone: C Moderate Training (135-155bpm)(%)',
  'Heart rate zone: D Hard Training (155-175bpm)(%)',
  'Heart rate zone: E Extreme Training (>175bpm)(%)',
  'Elevation: Ascended(m)',
  'Elevation: Maximum(m)',
  'Elevation: Minimum(m)',
  'METs Average(kcal/hr·kg)',
  'Weather: Humidity(%)',
  'Weather: Temperature(degC)',
]

for column in drop_columns:
  drop_column(df, column)

df['Pace(sec)'] = df['Duration(s)'] / df['Distance(km)']
df['Pace(min)'] = df['Pace(sec)'].apply(sec_to_min)
df['Activity'] = df['Activity'].apply(convert_activity)
df['Date'] = df['Date'].apply(edit_date_format)
df['Duration(min)'] = df['Duration(s)'].apply(sec_to_min)

column_order = ['Date', 'Energy (kcal)', 'Activity', 'Distance(km)', 'Duration(min)', 'Pace(min)', 'Heart rate: Average(min)', 'Heart rate: Maximum(min)']
d = df[column_order]

d



Unnamed: 0,Date,Energy (kcal),Activity,Distance(km),Duration(min),Pace(min),Heart rate: Average(min),Heart rate: Maximum(min)
0,2566-10-02 18:20:31 - 18:39:08,183.992,indoor,3.300,18:30,5:36,164.525,182.0
1,2566-10-03 18:34:49 - 19:03:43,267.510,indoor,4.631,28:14,6:06,166.159,197.0
2,2566-10-04 18:09:39 - 18:53:54,434.123,indoor,7.388,38:18,5:11,158.990,189.0
3,2566-10-05 10:06:21 - 10:34:35,306.220,indoor,5.565,27:54,5:00,160.902,183.0
4,2566-10-05 17:32:16 - 17:40:01,80.592,indoor,1.488,7:42,5:10,176.280,188.0
...,...,...,...,...,...,...,...,...
134,2567-05-29 18:23:19 - 18:31:41,55.689,outdoor,1.028,4:08,4:01,176.220,192.0
135,2567-05-29 18:32:01 - 18:37:16,55.341,outdoor,1.042,4:17,4:06,184.492,195.0
136,2567-05-29 18:41:59 - 18:53:53,54.066,outdoor,1.028,4:07,4:00,161.895,191.0
137,2567-05-29 18:54:22 - 19:01:37,55.943,outdoor,1.024,4:00,3:54,171.747,190.0


In [55]:
def filter_value(df: DataFrame, column: str, value: str) -> DataFrame:
  return df[df[column] == value]

In [56]:
outdoor = filter_value(df, 'Activity', 'outdoor')
outdoor.reset_index()[column_order]

Unnamed: 0,Date,Energy (kcal),Activity,Distance(km),Duration(min),Pace(min),Heart rate: Average(min),Heart rate: Maximum(min)
0,2566-10-07 07:57:08 - 08:17:11,179.592,outdoor,3.041,17:39,5:48,167.339,179.0
1,2566-10-14 17:30:23 - 17:58:10,253.719,outdoor,4.017,19:50,4:56,162.639,186.0
2,2566-10-21 17:57:09 - 18:16:47,147.069,outdoor,2.090,10:58,5:15,143.814,178.0
3,2566-10-23 08:02:10 - 08:31:26,267.240,outdoor,4.104,22:38,5:31,154.657,175.0
4,2566-10-30 17:41:32 - 18:15:11,291.367,outdoor,4.016,21:38,5:23,166.435,191.0
...,...,...,...,...,...,...,...,...
68,2567-05-29 18:23:19 - 18:31:41,55.689,outdoor,1.028,4:08,4:01,176.220,192.0
69,2567-05-29 18:32:01 - 18:37:16,55.341,outdoor,1.042,4:17,4:06,184.492,195.0
70,2567-05-29 18:41:59 - 18:53:53,54.066,outdoor,1.028,4:07,4:00,161.895,191.0
71,2567-05-29 18:54:22 - 19:01:37,55.943,outdoor,1.024,4:00,3:54,171.747,190.0


In [57]:
indoor = filter_value(df, 'Activity', 'indoor')
indoor.reset_index()[column_order]

Unnamed: 0,Date,Energy (kcal),Activity,Distance(km),Duration(min),Pace(min),Heart rate: Average(min),Heart rate: Maximum(min)
0,2566-10-02 18:20:31 - 18:39:08,183.992,indoor,3.300,18:30,5:36,164.525,182.0
1,2566-10-03 18:34:49 - 19:03:43,267.510,indoor,4.631,28:14,6:06,166.159,197.0
2,2566-10-04 18:09:39 - 18:53:54,434.123,indoor,7.388,38:18,5:11,158.990,189.0
3,2566-10-05 10:06:21 - 10:34:35,306.220,indoor,5.565,27:54,5:00,160.902,183.0
4,2566-10-05 17:32:16 - 17:40:01,80.592,indoor,1.488,7:42,5:10,176.280,188.0
...,...,...,...,...,...,...,...,...
61,2567-02-28 18:15:58 - 18:39:40,271.163,indoor,5.021,23:20,4:38,177.660,190.0
62,2567-03-05 18:35:04 - 19:09:43,383.170,indoor,7.054,34:05,4:49,173.711,189.0
63,2567-03-06 18:17:35 - 18:50:43,384.148,indoor,7.043,33:08,4:42,169.310,178.0
64,2567-03-12 18:27:01 - 19:05:58,382.394,indoor,7.029,34:29,4:54,163.540,185.0


In [58]:
def pace_table(data) -> str:
  fastest_pace = df.loc[data]

  table_data = {
    'Date': [fastest_pace['Date']],
    'Energy (kcal)': [fastest_pace['Energy (kcal)']],
    'Activity': [fastest_pace['Activity']],
    'Distance(km)': [fastest_pace['Distance(km)']],
    'Duration(s)': [fastest_pace['Duration(s)']],
    'Pace(min)': [fastest_pace['Pace(min)']],
    'Pace(sec)': [fastest_pace['Pace(sec)']],
    'Heart rate: Average(min)': [fastest_pace['Heart rate: Average(min)']],
    'Heart rate: Maximum(min)': [fastest_pace['Heart rate: Maximum(min)']]
  }

  return tabulate(table_data, headers='keys', tablefmt='pretty')

In [59]:
fastest_pace_outdoor = outdoor['Pace(sec)'].idxmin()
fastest_pace_indoor = indoor['Pace(sec)'].idxmin()

# fastest_outdoor_pace_table = pace_table(fastest_pace_outdoor)
# fastest_indoor_pace_table = pace_table(fastest_pace_indoor)

fastest_outdoor_pace_table = df.loc[fastest_pace_outdoor]
fastest_indoor_pace_table = df.loc[fastest_pace_indoor]

print('The fastest pace outdoor')
print(fastest_outdoor_pace_table)
print()

print('The fastest pace indoor')
print(fastest_indoor_pace_table)

The fastest pace outdoor
Date                        2567-02-17 18:24:30 - 18:28:02
Energy (kcal)                                       22.642
Activity                                           outdoor
Distance(km)                                         0.399
Duration(s)                                         76.541
Heart rate: Average(min)                            166.52
Heart rate: Maximum(min)                             194.0
Pace(sec)                                        191.83208
Pace(min)                                             3:11
Duration(min)                                         1:16
Name: 97, dtype: object

The fastest pace indoor
Date                        2567-01-30 17:33:17 - 17:56:55
Energy (kcal)                                      286.131
Activity                                            indoor
Distance(km)                                          5.26
Duration(s)                                       1415.417
Heart rate: Average(min)                 

In [60]:
def date_selector(df: DataFrame, date: str) -> DataFrame:
  return df[df['Date'].astype(str).str.contains(date)]

In [61]:
def mean_column(df: DataFrame, column: str):
  return round(df[column].mean(), 2)

In [62]:
distance = df['Distance(km)'].sum()
print(f'Overall {distance:.2f} km')

Overall 663.15 km


In [63]:
date = '2567-05'
print('Data on', date)

distance = 'Distance(km)'
indoor = 'indoor'
pace = 'Pace(sec)'
outdoor = 'outdoor'

running = date_selector(df, date)

def mean(column: str):
  return mean_column(running, column)

def find_best_data_column(activity: str, column: str):
  activity_condition = (running['Activity'] == activity)
  distance_condition = (running[distance] > 5)
  condition = activity_condition & distance_condition
  filtered_df = running[condition][column]
  try:
    if column == pace:
      return filtered_df.idxmin()
    
    return filtered_df.idxmax()
  except: return None

def best_loc_data(idx, column: str) -> DataFrame:
  if idx is None:
    return None
  if column == pace:
    return sec_to_min(running.loc[idx][column])
  
  return running.loc[idx][column]

avg_distance = mean(distance)
avg_duration = sec_to_min(mean('Duration(s)'))
avg_pace = sec_to_min(mean(pace))
avg_heart_rate = mean('Heart rate: Average(min)')
avg_max_heart_rate = mean('Heart rate: Maximum(min)')

id_fastest_pace_indoor = find_best_data_column(indoor, pace)
fastest_pace_indoor = best_loc_data(id_fastest_pace_indoor, pace)
id_fastest_pace_outdoor = find_best_data_column(outdoor, pace)
fastest_pace_outdoor = best_loc_data(id_fastest_pace_outdoor, pace)


id_longest_distance_indoor = find_best_data_column(indoor, distance)
longest_distance_indoor = best_loc_data(id_longest_distance_indoor, distance)
id_longest_distance_outdoor = find_best_data_column(outdoor, distance)
longest_distance_outdoor = best_loc_data(id_longest_distance_outdoor, distance)

all_distance = running['Distance(km)'].sum()


print(len(running), 'runs')
print(f'{all_distance:.2f} km')
print()
print('Fastest run indoor', fastest_pace_indoor, 'km')
print('Fastest run outdoor', fastest_pace_outdoor, 'km')
print()
print('Longest distance indoor', longest_distance_indoor, 'km')
print('Longest distance outdoor', longest_distance_outdoor, 'km')
print()
print('Average distance', avg_distance, 'km')
print('Average time', avg_duration, 'min')
print('Average pace', avg_pace, 'min')
print('Average heart rate', avg_heart_rate, 'per min')
print('Average max heart rate', avg_max_heart_rate, 'per min')

Data on 2567-05
26 runs
97.60 km

Fastest run indoor None km
Fastest run outdoor 4:37 km

Longest distance indoor None km
Longest distance outdoor 7.061 km

Average distance 3.75 km
Average time 19:27 min
Average pace 4:55 min
Average heart rate 169.74 per min
Average max heart rate 189.77 per min
