# Running Statistics

## Imports

In [90]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [91]:
# Load df
garmin = pd.read_csv("activities.csv")

## Data Wrangling

### Create a Running 2022 dataframe

In [92]:
# Change variable names
garmin.columns = ['activity', 'date', 'fav', 'title', 'distance', 'calories', 
                  'time', 'avg_hr', 'max_hr', 'aerobic_te', 'avg_run_cadence',
                  'max_run_cadence', 'avg_pace', 'best_pace', 'total_ascent',
                  'total_descent', 'avg_stride', 'avg_vert_ratio', 
                  'avg_vert_osc', 'avg_ground_contact',
                  'avg_run_cadence1', 'max_run_cadence1', 'training_stress', 'max_avg_power20',
                  'avg_power', 'max_power', 'grit', 'flow', 'total_strokes', 'avg_swolf',
                  'avg_stroke_rate', 'total_reps', 'dive_time', 'min_temp',
                  'surface_interval', 'decomp', 'best_lap', 'number_laps',
                  'max_temp', 'moving_time', 'elapsed_time', 'min_elav', 'max_elav' ]

# Change date to datetime
garmin["date"] = pd.to_datetime(garmin["date"])

# Create 2022 dataset
garmin["year"] = garmin["date"].dt.year
garmin22 = garmin[garmin["year"] == 2022]

In [93]:
running = garmin22[ garmin22["activity"] == "Running" ]
tred_running = garmin22[ garmin22["activity"] == "Treadmill Running" ]

run = garmin22[ garmin22['activity'].isin(["Running", "Treadmill Running"]) ]

print("Number of rows in original dataframe:", garmin.count()[0])
print("Number of rows in 2022 dataframe:", len(garmin22))
print("Number of rows in running 2022 df:", len(running))
print("Number of rows in  treadmill running 2022 df:", len(tred_running))
print()
print("Number of rows in RUN 2022 df:", len(run))

Number of rows in original dataframe: 160
Number of rows in 2022 dataframe: 149
Number of rows in running 2022 df: 51
Number of rows in  treadmill running 2022 df: 7

Number of rows in RUN 2022 df: 58


### Running Data Wrangling

Trim unnecessary columns from the dataset and change the data type in columns.

In [95]:
# Trimming columns for running
keep_cols = ['activity', 'date', 'distance', 'calories', 
            'time', 'avg_hr', 'max_hr', 'avg_run_cadence',
            'max_run_cadence', 'avg_pace', 'best_pace', 'total_ascent',
            'total_descent', 'avg_stride', 'number_laps', 
             'moving_time', 'elapsed_time', 'min_elav', 'max_elav' ]

run = run[keep_cols]


# Choose columns to change
change_int = ['distance', 'calories', 'total_ascent', 
              'total_descent', 'min_elav', 'max_elav']


# Remove commas from dataset
run[change_int] = run[change_int].replace(",", "", regex=True)

# Change "--" to 0 in whole dataset
run = run.replace("--","", regex=True)

# Change empty cells to 0 and hope for the best
run[change_int] = run[change_int].replace("", "0", regex=True)


# Change to floats
run[change_int] = run[change_int].astype(float)

In [96]:
# Change to time columns "0:00"
run['avg_pace'] = pd.to_datetime(run['avg_pace'], format='%M:%S')
run['best_pace'] = pd.to_datetime(run['best_pace'], format='%M:%S')

# Change to time columns "00:00:00"
run['time'] = pd.to_datetime(run['time'], format='%H:%M:%S')
run['moving_time'] = pd.to_datetime(run['moving_time'], format='%H:%M:%S')
run['elapsed_time'] = pd.to_datetime(run['elapsed_time'], format='%H:%M:%S')

Slight issue with datetime cells. They have added in an arbitrary date. Will now convert them to number of minutes.

In [97]:
run['avg_pace'] = run['avg_pace'].dt.hour*60 + run['avg_pace'].dt.minute + run['avg_pace'].dt.second/60
run['best_pace'] = run['best_pace'].dt.hour*60 + run['best_pace'].dt.minute + run['best_pace'].dt.second/60

run['time'] = run['time'].dt.hour*60 + run['time'].dt.minute + run['time'].dt.second/60
run['elapsed_time'] = run['elapsed_time'].dt.hour*60 + run['elapsed_time'].dt.minute + run['elapsed_time'].dt.second/60
run['moving_time'] = run['moving_time'].dt.hour*60 + run['moving_time'].dt.minute + run['moving_time'].dt.second/60

# Round them all to nearest minute
run['time'] = round(run['time'])
run['elapsed_time'] = round(run['elapsed_time'])
run['moving_time'] = round(run['moving_time'])

## Data Exploration

In [98]:
run.head(1)

Unnamed: 0,activity,date,distance,calories,time,avg_hr,max_hr,avg_run_cadence,max_run_cadence,avg_pace,best_pace,total_ascent,total_descent,avg_stride,number_laps,moving_time,elapsed_time,min_elav,max_elav
2,Running,2022-12-12 14:38:08,9.73,747.0,64.0,138,156,163,177,6.566667,4.95,55.0,56.0,0.93,10,63.0,64.0,6.0,40.0


In [99]:
sum_cols = ['distance', 'calories', 'time', 
            'elapsed_time', 'moving_time',
            'total_ascent', 'total_descent']

run[sum_cols].sum()

distance           381.06
calories         29577.00
time              2299.00
elapsed_time      2299.00
moving_time       2193.00
total_ascent      3450.00
total_descent     3440.00
dtype: float64