In [86]:
import pandas as pd
from typing import List
from pandas import DataFrame
import os

In [57]:
def concat_path(folder_name, file_name):
  return os.path.join(folder_name, file_name)

In [58]:
def current_dir(dir_target: str) -> str:
  current_dir = os.getcwd()
  folder_path = concat_path(current_dir, dir_target)
  return folder_path

In [87]:
def get_csv_files(folder_name: str) -> List[str]:
  running_folder_path = current_dir(folder_name)
  files_in_running_folder = os.listdir(running_folder_path)
  files = sorted([concat_path(running_folder_path, file) for file in files_in_running_folder])

  return files

In [84]:
def edit_date_format(date: str):
  date_split = date.split(' ')
  del date_split[3]
  new_date_format = ' '.join(date_split)

  return new_date_format

In [88]:
def rename_column(df: DataFrame, old_name: str, new_name: str) -> None:
  df.rename(columns={old_name: new_name}, inplace=True)

In [92]:
def drop_column(df: DataFrame, column: str) -> None:
  df.drop(columns=[column], inplace=True)

In [136]:
def sec_to_min(sec: float) -> str:
  min = int(sec // 60)
  sec = int(sec % 60)

  if (len(str(sec)) == 1): sec = f'0{sec}'
  
  return f'{min}:{sec}'


sec_to_min(336.572727)


'5:36'

In [137]:
runningcsv_files = get_csv_files('running')

df = pd.concat( 
  map(pd.read_csv, runningcsv_files), 
  ignore_index=True
) 

df['Date'].apply(edit_date_format)

rename_column(df, 'Active energy burned(kcal)', 'Energy (kcal)')

drop_columns = [
  'Heart rate zone: A Easy (<115bpm)(%)',
  'Heart rate zone: B Fat Burn (115-135bpm)(%)',
  'Heart rate zone: C Moderate Training (135-155bpm)(%)',
  'Heart rate zone: D Hard Training (155-175bpm)(%)',
  'Heart rate zone: E Extreme Training (>175bpm)(%)',
  'Elevation: Ascended(m)',
  'Elevation: Maximum(m)',
  'Elevation: Minimum(m)',
  'METs Average(kcal/hr·kg)',
  'Weather: Humidity(%)',
  'Weather: Temperature(degC)',
]

for column in drop_columns:
  drop_column(df, column)

df['Pace(min)'] = df['Duration(s)'] / df['Distance(km)']
df['Pace(min)'] = df['Pace(min)'].apply(sec_to_min)


column_order = ['Date', 'Energy (kcal)', 'Activity', 'Distance(km)',  'Duration(s)', 'Pace(min)','Heart rate: Average(count/min)', 'Heart rate: Maximum(count/min)']
df = df[column_order]

df



Unnamed: 0,Date,Energy (kcal),Activity,Distance(km),Duration(s),Pace(min),Heart rate: Average(count/min),Heart rate: Maximum(count/min)
0,2566-10-02 18:20:31 - 2566-10-02 18:39:08,183.992,Running (Indoor),3.300,1110.690,5:36,164.525,182.0
1,2566-10-03 18:34:49 - 2566-10-03 19:03:43,267.510,Running (Indoor),4.631,1694.955,6:06,166.159,197.0
2,2566-10-04 18:09:39 - 2566-10-04 18:53:54,434.123,Running (Indoor),7.388,2298.437,5:11,158.990,189.0
3,2566-10-05 10:06:21 - 2566-10-05 10:34:35,306.220,Running (Indoor),5.565,1674.791,5:00,160.902,183.0
4,2566-10-05 17:32:16 - 2566-10-05 17:40:01,80.592,Running (Indoor),1.488,462.267,5:10,176.280,188.0
...,...,...,...,...,...,...,...,...
58,2566-12-23 17:24:30 - 2566-12-23 18:13:01,529.661,Running,10.052,2848.219,4:43,182.775,194.0
59,2566-12-25 13:25:17 - 2566-12-25 13:52:25,275.522,Running (Indoor),5.063,1532.991,5:02,158.084,178.0
60,2566-12-26 17:47:47 - 2566-12-26 18:07:29,169.381,Running,3.027,907.753,4:59,166.139,186.0
61,2566-12-28 07:37:58 - 2566-12-28 08:07:19,266.105,Running,5.056,1464.858,4:49,142.500,161.0


In [123]:
def filter_value(df: DataFrame, column: str, value: str) -> DataFrame:
  return df[df[column] == 'Running']

In [138]:
outdoor = filter_value(df, 'Activity', 'Running')
outdoor

Unnamed: 0,Date,Energy (kcal),Activity,Distance(km),Duration(s),Pace(min),Heart rate: Average(count/min),Heart rate: Maximum(count/min)
5,2566-10-07 07:57:08 - 2566-10-07 08:17:11,179.592,Running,3.041,1059.059,5:48,167.339,179.0
10,2566-10-14 17:30:23 - 2566-10-14 17:58:10,253.719,Running,4.017,1190.477,4:56,162.639,186.0
18,2566-10-21 17:57:09 - 2566-10-21 18:16:47,147.069,Running,2.09,658.899,5:15,143.814,178.0
19,2566-10-23 08:02:10 - 2566-10-23 08:31:26,267.24,Running,4.104,1358.784,5:31,154.657,175.0
25,2566-10-30 17:41:32 - 2566-10-30 18:15:11,291.367,Running,4.016,1298.729,5:23,166.435,191.0
26,2566-10-31 17:32:28 - 2566-10-31 18:01:14,220.253,Running,3.59,1234.876,5:43,166.377,190.0
27,2566-11-01 17:42:28 - 2566-11-01 18:14:47,267.341,Running,4.316,1526.713,5:53,157.633,181.0
28,2566-11-02 17:43:54 - 2566-11-02 18:26:05,318.283,Running,4.612,1608.846,5:48,150.512,173.0
34,2566-11-13 07:41:12 - 2566-11-13 08:20:22,337.239,Running,6.038,2048.237,5:39,144.81,169.0
35,2566-11-13 17:34:55 - 2566-11-13 18:06:49,273.554,Running,5.044,1858.161,6:08,163.325,176.0


In [133]:
indoor = filter_value(df, 'Activity', 'Indoor')
indoor.reset_index()

Unnamed: 0,index,Date,Energy (kcal),Activity,Distance(km),Pace(min),Duration(s),Heart rate: Average(count/min),Heart rate: Maximum(count/min)
0,5,2566-10-07 07:57:08 - 2566-10-07 08:17:11,179.592,Running,3.041,5:48,1059.059,167.339,179.0
1,10,2566-10-14 17:30:23 - 2566-10-14 17:58:10,253.719,Running,4.017,4:56,1190.477,162.639,186.0
2,18,2566-10-21 17:57:09 - 2566-10-21 18:16:47,147.069,Running,2.09,5:15,658.899,143.814,178.0
3,19,2566-10-23 08:02:10 - 2566-10-23 08:31:26,267.24,Running,4.104,5:31,1358.784,154.657,175.0
4,25,2566-10-30 17:41:32 - 2566-10-30 18:15:11,291.367,Running,4.016,5:23,1298.729,166.435,191.0
5,26,2566-10-31 17:32:28 - 2566-10-31 18:01:14,220.253,Running,3.59,5:43,1234.876,166.377,190.0
6,27,2566-11-01 17:42:28 - 2566-11-01 18:14:47,267.341,Running,4.316,5:53,1526.713,157.633,181.0
7,28,2566-11-02 17:43:54 - 2566-11-02 18:26:05,318.283,Running,4.612,5:48,1608.846,150.512,173.0
8,34,2566-11-13 07:41:12 - 2566-11-13 08:20:22,337.239,Running,6.038,5:39,2048.237,144.81,169.0
9,35,2566-11-13 17:34:55 - 2566-11-13 18:06:49,273.554,Running,5.044,6:8,1858.161,163.325,176.0
