# Running Data Analytics

This notebook will serve as the starting point for analyzing my running data dating back to 2022. Running data will come from Fitbit and Strava.

In [141]:
# import packages

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)

# data visualizations
import matplotlib.pyplot as plt
import seaborn as sns

In [142]:
strava_activities = pd.read_csv('activities_strava.csv', header=0)
strava_activities.head()

strava_runs = strava_activities.loc[strava_activities['Activity Type'] == 'Run']
print(strava_activities.shape)
print(strava_runs.shape)

(213, 92)
(176, 92)


There are 176 runs tracked in Strava.

In [143]:
strava_runs.info()

<class 'pandas.core.frame.DataFrame'>
Index: 176 entries, 0 to 212
Data columns (total 92 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Activity ID                   176 non-null    int64  
 1   Activity Date                 176 non-null    object 
 2   Activity Name                 176 non-null    object 
 3   Activity Type                 176 non-null    object 
 4   Activity Description          1 non-null      object 
 5   Elapsed Time                  176 non-null    int64  
 6   Distance                      176 non-null    float64
 7   Max Heart Rate                171 non-null    float64
 8   Relative Effort               171 non-null    float64
 9   Commute                       176 non-null    bool   
 10  Activity Private Note         1 non-null      object 
 11  Activity Gear                 52 non-null     object 
 12  Filename                      172 non-null    object 
 13  Athlete We

First all columns that only contain null values will be dropped.

In [144]:
strava_runs = strava_runs.dropna(how='all', axis=1)
strava_runs.info()

<class 'pandas.core.frame.DataFrame'>
Index: 176 entries, 0 to 212
Data columns (total 60 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Activity ID                  176 non-null    int64  
 1   Activity Date                176 non-null    object 
 2   Activity Name                176 non-null    object 
 3   Activity Type                176 non-null    object 
 4   Activity Description         1 non-null      object 
 5   Elapsed Time                 176 non-null    int64  
 6   Distance                     176 non-null    float64
 7   Max Heart Rate               171 non-null    float64
 8   Relative Effort              171 non-null    float64
 9   Commute                      176 non-null    bool   
 10  Activity Private Note        1 non-null      object 
 11  Activity Gear                52 non-null     object 
 12  Filename                     172 non-null    object 
 13  Elapsed Time.1           

In [145]:
strava_runs.head()

Unnamed: 0,Activity ID,Activity Date,Activity Name,Activity Type,Activity Description,Elapsed Time,Distance,Max Heart Rate,Relative Effort,Commute,Activity Private Note,Activity Gear,Filename,Elapsed Time.1,Moving Time,Distance.1,Max Speed,Average Speed,Elevation Gain,Elevation Loss,Elevation Low,Elevation High,Max Grade,Average Grade,Max Cadence,Average Cadence,Max Heart Rate.1,Average Heart Rate,Calories,Relative Effort.1,Prefer Perceived Exertion,Commute.1,From Upload,Grade Adjusted Distance,Weather Observation Time,Weather Condition,Weather Temperature,Apparent Temperature,Dewpoint,Humidity,Weather Pressure,Wind Speed,Wind Gust,Wind Bearing,Precipitation Intensity,Sunrise Time,Sunset Time,Moon Phase,Gear,Precipitation Probability,Precipitation Type,Cloud Cover,Weather Visibility,UV Index,Weather Ozone,Flagged,Average Elapsed Speed,Dirt Distance,Total Steps,Average Grade Adjusted Pace
0,8710510063,"Mar 14, 2023, 1:04:38 AM",Evening Run,Run,,906,2.89,,,False,,,activities/9349251252.fit.gz,906.0,906.0,2893.0,5.4,3.193157,30.148668,22.0,1621.400024,1639.199951,6.25,0.127895,90.0,86.505035,,,249.0,,,0.0,1.0,2928.0,1678756000.0,2.0,11.24,11.24,-7.47,0.26,1012.799988,2.18,5.37,328.0,0.0,1678713000.0,1678756000.0,0.72,13853811.0,0.0,,0.63,4324.0,0.0,335.299988,0.0,3.193157,625.0,,
1,8716161736,"Mar 15, 2023, 1:11:09 AM",Evening Run,Run,,1828,5.82,167.0,44.0,False,,,activities/9355328913.fit.gz,1828.0,1825.0,5823.0,4.6,3.190685,33.914471,23.0,1621.199951,1637.400024,7.272727,0.157994,95.0,86.739349,,152.429367,467.0,44.0,,0.0,1.0,5886.399902,1678842000.0,2.0,14.75,14.75,-12.29,0.14,1008.400024,2.0,4.74,18.0,0.0,1678800000.0,1678842000.0,0.75,13853811.0,0.0,,0.87,2463.0,0.0,312.0,0.0,3.185449,300.0,10402.0,
2,8716268112,"Dec 26, 2022, 2:54:47 PM",Morning Run,Run,,1633,2.65,157.0,1.0,False,,,activities/9355439698.fit.gz,1633.0,1110.0,2650.0,15.342857,2.387387,6.1575,12.7575,1622.599976,1631.300049,10.131579,-0.249057,102.0,73.721649,,133.036087,305.0,1.0,0.0,0.0,1.0,2680.800049,1672063000.0,2.0,-3.64,-6.63,-5.57,0.86,1027.599976,1.95,2.96,175.0,0.0,1672065000.0,1672098000.0,0.15,13853811.0,0.0,,0.43,16093.0,0.0,300.600006,0.0,1.62278,160.0,7196.0,
3,8716268113,"Feb 14, 2023, 2:11:13 AM",Evening Run,Run,,896,2.81,173.0,30.0,False,,,activities/9355439717.fit.gz,896.0,896.0,2813.0,5.8,3.139509,23.228878,0.0,1621.0,1635.400024,5.0,0.0,90.0,86.478409,,160.854538,256.0,30.0,0.0,0.0,1.0,2832.199951,1676340000.0,3.0,9.55,7.41,-1.67,0.45,996.400024,4.09,8.25,154.0,0.0,1676297000.0,1676335000.0,0.77,13853811.0,0.0,,0.91,3283.0,0.0,336.899994,0.0,3.139509,50.0,5126.0,
4,8716268119,"Feb 15, 2023, 12:33:29 AM",Afternoon Run,Run,,1179,3.38,176.0,17.0,False,,,activities/9355439720.fit.gz,1179.0,1102.0,3382.0,5.6,3.068965,27.94187,9.0,1621.599976,1637.400024,6.0,0.026611,105.0,85.923241,,141.388657,256.0,17.0,0.0,0.0,1.0,3412.100098,1676419000.0,2.0,4.42,1.0,-8.54,0.38,996.0,4.26,7.31,109.0,0.0,1676383000.0,1676421000.0,0.8,13853811.0,0.0,,0.74,6267.0,0.0,357.0,0.0,2.868533,363.0,5752.0,


In [146]:
strava_runs.describe(include='all')

Unnamed: 0,Activity ID,Activity Date,Activity Name,Activity Type,Activity Description,Elapsed Time,Distance,Max Heart Rate,Relative Effort,Commute,Activity Private Note,Activity Gear,Filename,Elapsed Time.1,Moving Time,Distance.1,Max Speed,Average Speed,Elevation Gain,Elevation Loss,Elevation Low,Elevation High,Max Grade,Average Grade,Max Cadence,Average Cadence,Max Heart Rate.1,Average Heart Rate,Calories,Relative Effort.1,Prefer Perceived Exertion,Commute.1,From Upload,Grade Adjusted Distance,Weather Observation Time,Weather Condition,Weather Temperature,Apparent Temperature,Dewpoint,Humidity,Weather Pressure,Wind Speed,Wind Gust,Wind Bearing,Precipitation Intensity,Sunrise Time,Sunset Time,Moon Phase,Gear,Precipitation Probability,Precipitation Type,Cloud Cover,Weather Visibility,UV Index,Weather Ozone,Flagged,Average Elapsed Speed,Dirt Distance,Total Steps,Average Grade Adjusted Pace
count,176.0,176,176,176,1,176.0,176.0,171.0,171.0,176,1,52,172,176.0,176.0,176.0,172.0,176.0,175.0,172.0,172.0,172.0,172.0,176.0,172.0,172.0,27.0,171.0,175.0,171.0,24.0,176.0,176.0,172.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,175.0,171.0,148.0,171.0,171.0,171.0,23.0,176.0,176.0,175.0,171.0,26.0
unique,,176,8,1,1,,,,,1,1,1,172,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
top,,"Mar 14, 2023, 1:04:38 AM",Evening Run,Run,Manual entry because I did not run this at a 1...,,,,,False,15 minute treadmill run,Second pair,activities/9349251252.fit.gz,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
freq,,1,89,176,1,,,,,176,1,52,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
mean,9958715000.0,,,,,2078.755682,6.32017,168.538012,54.590643,,,,,2078.755682,1998.181818,6323.799957,6.661315,3.180406,39.617062,46.732117,1590.970347,1609.873245,11.604334,0.047773,94.546512,86.646055,165.888889,152.507977,522.359587,54.590643,0.0,0.0,0.977273,6396.44069,1695814000.0,2.152047,14.64924,13.255322,-0.29193,0.400819,1010.967251,2.577836,5.388129,168.48538,0.047368,1695780000.0,1695825000.0,0.468216,14685420.0,0.150175,2.459459,0.466784,31311.365691,0.807018,322.495655,0.011364,2.998338,902.748571,6844.783626,3.201542
std,933586000.0,,,,,1318.506225,4.270621,9.884778,47.588638,,,,,1318.506225,1327.046371,4270.926061,2.614359,0.398464,29.154232,121.602536,173.256415,174.803786,10.02431,0.117812,5.440066,2.072143,11.6663,7.59319,337.533047,47.588638,0.0,0.0,0.149458,4340.854788,13321670.0,1.354832,9.167849,9.49417,7.381596,0.178132,7.820305,1.264159,2.548735,105.707942,0.367074,13323160.0,13323190.0,0.29643,1191216.0,0.222709,1.967001,0.350515,14631.391354,1.440244,22.951538,0.150756,0.401772,1078.509152,4632.01455,0.28201
min,8710510000.0,,,,,254.0,0.02,144.0,0.0,,,,,254.0,3.0,22.0,0.8,2.387387,0.0,0.0,0.0,0.0,0.0,-0.249057,85.0,73.721649,144.0,133.036087,66.0,0.0,0.0,0.0,0.0,22.0,1672063000.0,1.0,-5.95,-8.65,-14.87,0.1,988.400024,0.71,1.59,0.0,0.0,1672065000.0,1672098000.0,0.0,13853810.0,0.0,1.0,0.0,0.0,0.0,277.399994,0.0,0.024044,0.0,706.0,2.625652
25%,9134528000.0,,,,,1210.0,3.325,164.5,22.5,,,,,1210.0,1101.25,3324.0,5.0,3.003595,19.695028,17.0,1593.599976,1613.924957,5.833333,-0.004202,91.0,85.651476,159.0,147.750839,300.5,22.5,0.0,0.0,1.0,3379.849976,1684562000.0,1.0,7.845,5.905,-5.665,0.26,1006.810028,1.73,3.535,65.5,0.0,1684540000.0,1684592000.0,0.125,13853810.0,0.0,1.0,0.12,23511.0,0.0,309.350006,0.0,2.864261,91.25,3693.0,2.987463
50%,9791117000.0,,,,,1688.5,4.985,169.0,40.0,,,,,1688.5,1561.5,4985.5,5.8,3.155713,31.172844,32.0,1619.5,1639.199951,7.888889,0.011055,93.0,86.846569,167.0,153.155777,422.0,40.0,0.0,0.0,1.0,5030.75,1693750000.0,2.0,15.59,14.75,-1.47,0.37,1010.719971,2.24,4.92,174.0,0.0,1693744000.0,1693791000.0,0.375,13853810.0,0.0,1.0,0.51,34347.0,0.0,319.899994,0.0,3.041582,541.5,5226.0,3.235259
75%,10826560000.0,,,,,2471.25,7.5675,173.0,66.5,,,,,2471.25,2430.75,7569.25,7.45,3.3103,53.245325,54.0,1621.300049,1639.300049,12.625,0.094709,97.0,88.101282,170.5,156.893753,612.5,66.5,0.0,0.0,1.0,7733.525024,1708803000.0,3.0,21.684999,20.95,5.9,0.53,1015.529999,3.085,6.58,260.0,0.0,1708782000.0,1708822000.0,0.74,15467880.0,0.31,5.0,0.8,42725.269531,1.0,336.099991,0.0,3.238272,1422.5,8933.0,3.403974


In [147]:
dropped_cols = ['Activity ID', 'Activity Type', 'Activity Description', 'Commute', 'Activity Private Note', 'Activity Gear', \
                'Filename', 'From Upload', 'Gear', 'Flagged', 'Prefer Perceived Exertion', \
                'Precipitation Type', 'Weather Ozone', 'Average Grade Adjusted Pace']
repeat_cols = [col for col in strava_runs.columns if '.1' in col]
cols_to_drop = dropped_cols + repeat_cols
run_data = strava_runs.drop(columns=cols_to_drop, axis=1)
run_data.head()

Unnamed: 0,Activity Date,Activity Name,Elapsed Time,Distance,Max Heart Rate,Relative Effort,Moving Time,Max Speed,Average Speed,Elevation Gain,Elevation Loss,Elevation Low,Elevation High,Max Grade,Average Grade,Max Cadence,Average Cadence,Average Heart Rate,Calories,Grade Adjusted Distance,Weather Observation Time,Weather Condition,Weather Temperature,Apparent Temperature,Dewpoint,Humidity,Weather Pressure,Wind Speed,Wind Gust,Wind Bearing,Precipitation Intensity,Sunrise Time,Sunset Time,Moon Phase,Precipitation Probability,Cloud Cover,Weather Visibility,UV Index,Average Elapsed Speed,Dirt Distance,Total Steps
0,"Mar 14, 2023, 1:04:38 AM",Evening Run,906,2.89,,,906.0,5.4,3.193157,30.148668,22.0,1621.400024,1639.199951,6.25,0.127895,90.0,86.505035,,249.0,2928.0,1678756000.0,2.0,11.24,11.24,-7.47,0.26,1012.799988,2.18,5.37,328.0,0.0,1678713000.0,1678756000.0,0.72,0.0,0.63,4324.0,0.0,3.193157,625.0,
1,"Mar 15, 2023, 1:11:09 AM",Evening Run,1828,5.82,167.0,44.0,1825.0,4.6,3.190685,33.914471,23.0,1621.199951,1637.400024,7.272727,0.157994,95.0,86.739349,152.429367,467.0,5886.399902,1678842000.0,2.0,14.75,14.75,-12.29,0.14,1008.400024,2.0,4.74,18.0,0.0,1678800000.0,1678842000.0,0.75,0.0,0.87,2463.0,0.0,3.185449,300.0,10402.0
2,"Dec 26, 2022, 2:54:47 PM",Morning Run,1633,2.65,157.0,1.0,1110.0,15.342857,2.387387,6.1575,12.7575,1622.599976,1631.300049,10.131579,-0.249057,102.0,73.721649,133.036087,305.0,2680.800049,1672063000.0,2.0,-3.64,-6.63,-5.57,0.86,1027.599976,1.95,2.96,175.0,0.0,1672065000.0,1672098000.0,0.15,0.0,0.43,16093.0,0.0,1.62278,160.0,7196.0
3,"Feb 14, 2023, 2:11:13 AM",Evening Run,896,2.81,173.0,30.0,896.0,5.8,3.139509,23.228878,0.0,1621.0,1635.400024,5.0,0.0,90.0,86.478409,160.854538,256.0,2832.199951,1676340000.0,3.0,9.55,7.41,-1.67,0.45,996.400024,4.09,8.25,154.0,0.0,1676297000.0,1676335000.0,0.77,0.0,0.91,3283.0,0.0,3.139509,50.0,5126.0
4,"Feb 15, 2023, 12:33:29 AM",Afternoon Run,1179,3.38,176.0,17.0,1102.0,5.6,3.068965,27.94187,9.0,1621.599976,1637.400024,6.0,0.026611,105.0,85.923241,141.388657,256.0,3412.100098,1676419000.0,2.0,4.42,1.0,-8.54,0.38,996.0,4.26,7.31,109.0,0.0,1676383000.0,1676421000.0,0.8,0.0,0.74,6267.0,0.0,2.868533,363.0,5752.0


In [148]:
run_data.isna().sum()

Activity Date                0
Activity Name                0
Elapsed Time                 0
Distance                     0
Max Heart Rate               5
Relative Effort              5
Moving Time                  0
Max Speed                    4
Average Speed                0
Elevation Gain               1
Elevation Loss               4
Elevation Low                4
Elevation High               4
Max Grade                    4
Average Grade                0
Max Cadence                  4
Average Cadence              4
Average Heart Rate           5
Calories                     1
Grade Adjusted Distance      4
Weather Observation Time     5
Weather Condition            5
Weather Temperature          5
Apparent Temperature         5
Dewpoint                     5
Humidity                     5
Weather Pressure             5
Wind Speed                   5
Wind Gust                    5
Wind Bearing                 5
Precipitation Intensity      5
Sunrise Time                 5
Sunset T

In [149]:
run_data.loc[run_data.isna().any(axis=1)]

Unnamed: 0,Activity Date,Activity Name,Elapsed Time,Distance,Max Heart Rate,Relative Effort,Moving Time,Max Speed,Average Speed,Elevation Gain,Elevation Loss,Elevation Low,Elevation High,Max Grade,Average Grade,Max Cadence,Average Cadence,Average Heart Rate,Calories,Grade Adjusted Distance,Weather Observation Time,Weather Condition,Weather Temperature,Apparent Temperature,Dewpoint,Humidity,Weather Pressure,Wind Speed,Wind Gust,Wind Bearing,Precipitation Intensity,Sunrise Time,Sunset Time,Moon Phase,Precipitation Probability,Cloud Cover,Weather Visibility,UV Index,Average Elapsed Speed,Dirt Distance,Total Steps
0,"Mar 14, 2023, 1:04:38 AM",Evening Run,906,2.89,,,906.0,5.4,3.193157,30.148668,22.0,1621.400024,1639.199951,6.25,0.127895,90.0,86.505035,,249.0,2928.0,1678756000.0,2.0,11.24,11.24,-7.47,0.26,1012.799988,2.18,5.37,328.0,0.0,1678713000.0,1678756000.0,0.72,0.0,0.63,4324.0,0.0,3.193157,625.0,
57,"Jun 24, 2023, 1:00:00 AM",Evening Run,1500,4.82,,,1500.0,,3.218688,0.0,,,,,0.0,,,,430.52124,,,,,,,,,,,,,,,,,,,,3.218688,0.0,
96,"Aug 17, 2023, 1:40:00 AM",Night Run,1819,5.89,,,1819.0,,3.238152,0.0,,,,,0.0,,,,525.236145,,,,,,,,,,,,,,,,,,,,3.238152,0.0,
137,"Jan 29, 2024, 1:30:00 AM",Evening Run,2310,6.27,,,2310.0,,2.71708,0.0,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,2.71708,,
142,"Feb 8, 2024, 3:22:30 AM",Evening Run,1560,4.66,,,1560.0,,2.991735,,,,,,0.0,,,,416.17041,,,,,,,,,,,,,,,,,,,,2.991735,0.0,
204,"May 26, 2024, 4:33:40 PM",Morning Run,896,3.24,158.0,14.0,896.0,9.8,3.616071,0.0,0.0,0.0,0.0,0.0,0.0,88.0,85.45536,141.977676,207.0,3240.0,,,,,,,,,,,,,,,,,,,3.616071,0.0,2538.0


In [150]:
# removed runs that weren't tracked correctly
run_data = run_data.dropna(subset=['Max Heart Rate', 'Average Heart Rate'], axis=0)

# fill missing weather data will data from previous day
run_data = run_data.fillna(method='bfill', axis=0)
run_data.isna().sum()

  run_data = run_data.fillna(method='bfill', axis=0)


Activity Date                0
Activity Name                0
Elapsed Time                 0
Distance                     0
Max Heart Rate               0
Relative Effort              0
Moving Time                  0
Max Speed                    0
Average Speed                0
Elevation Gain               0
Elevation Loss               0
Elevation Low                0
Elevation High               0
Max Grade                    0
Average Grade                0
Max Cadence                  0
Average Cadence              0
Average Heart Rate           0
Calories                     0
Grade Adjusted Distance      0
Weather Observation Time     0
Weather Condition            0
Weather Temperature          0
Apparent Temperature         0
Dewpoint                     0
Humidity                     0
Weather Pressure             0
Wind Speed                   0
Wind Gust                    0
Wind Bearing                 0
Precipitation Intensity      0
Sunrise Time                 0
Sunset T

In [151]:
# convert column names to lower case and snake case
column_names = {column: column.strip().lower().replace(' ', '_').replace('.1', '') for column in run_data.columns}
run_data = run_data.rename(columns=column_names)

In [152]:
# convert date and time
run_data['activity_date'] = pd.to_datetime(run_data['activity_date'])

In [153]:
# convert activity_name to a categorical for time of day for the run
run_data['time_of_day'] = run_data['activity_name'].str.lower().str.split(' ').str[0]
run_data['time_of_day'] = run_data['time_of_day'].replace({'lunch': 'afternoon'}).astype('category')
run_data['time_of_day']

1        evening
2        morning
3        evening
4      afternoon
5      afternoon
         ...    
208      evening
209      evening
210      morning
211      morning
212      morning
Name: time_of_day, Length: 171, dtype: category
Categories (4, object): ['afternoon', 'evening', 'morning', 'night']

In [154]:
max_heart_rate = run_data['max_heart_rate'].quantile(0.99) # potential outlier in the max heart rate column

def heart_rate_zones(max_heart_rate, heart_rate):
    if heart_rate < 0.6 * max_heart_rate:
        return 1
    elif heart_rate < 0.7 * max_heart_rate:
        return 2
    elif heart_rate < 0.8 * max_heart_rate:
        return 3
    elif heart_rate < 0.9 * max_heart_rate:
        return 4
    else:
        return 5

run_data['average_running_zone'] = run_data.apply(lambda x: heart_rate_zones(max_heart_rate, x['average_heart_rate']), axis=1).astype('category')
run_data['average_running_zone']

1      4
2      2
3      4
4      3
5      3
      ..
208    3
209    3
210    3
211    4
212    3
Name: average_running_zone, Length: 171, dtype: category
Categories (4, int64): [2, 3, 4, 5]

In [155]:
run_data['distance_mi'] = run_data['distance'] / 1.609
run_data['moving_time_min'] = run_data['moving_time'] / 60
run_data['pace_min_per_mile'] = run_data['moving_time_min'] / run_data['distance_mi']
run_data.head()

Unnamed: 0,activity_date,activity_name,elapsed_time,distance,max_heart_rate,relative_effort,moving_time,max_speed,average_speed,elevation_gain,elevation_loss,elevation_low,elevation_high,max_grade,average_grade,max_cadence,average_cadence,average_heart_rate,calories,grade_adjusted_distance,weather_observation_time,weather_condition,weather_temperature,apparent_temperature,dewpoint,humidity,weather_pressure,wind_speed,wind_gust,wind_bearing,precipitation_intensity,sunrise_time,sunset_time,moon_phase,precipitation_probability,cloud_cover,weather_visibility,uv_index,average_elapsed_speed,dirt_distance,total_steps,time_of_day,average_running_zone,distance_mi,moving_time_min,pace_min_per_mile
1,2023-03-15 01:11:09,Evening Run,1828,5.82,167.0,44.0,1825.0,4.6,3.190685,33.914471,23.0,1621.199951,1637.400024,7.272727,0.157994,95.0,86.739349,152.429367,467.0,5886.399902,1678842000.0,2.0,14.75,14.75,-12.29,0.14,1008.400024,2.0,4.74,18.0,0.0,1678800000.0,1678842000.0,0.75,0.0,0.87,2463.0,0.0,3.185449,300.0,10402.0,evening,4,3.617154,30.416667,8.409006
2,2022-12-26 14:54:47,Morning Run,1633,2.65,157.0,1.0,1110.0,15.342857,2.387387,6.1575,12.7575,1622.599976,1631.300049,10.131579,-0.249057,102.0,73.721649,133.036087,305.0,2680.800049,1672063000.0,2.0,-3.64,-6.63,-5.57,0.86,1027.599976,1.95,2.96,175.0,0.0,1672065000.0,1672098000.0,0.15,0.0,0.43,16093.0,0.0,1.62278,160.0,7196.0,morning,2,1.646986,18.5,11.232642
3,2023-02-14 02:11:13,Evening Run,896,2.81,173.0,30.0,896.0,5.8,3.139509,23.228878,0.0,1621.0,1635.400024,5.0,0.0,90.0,86.478409,160.854538,256.0,2832.199951,1676340000.0,3.0,9.55,7.41,-1.67,0.45,996.400024,4.09,8.25,154.0,0.0,1676297000.0,1676335000.0,0.77,0.0,0.91,3283.0,0.0,3.139509,50.0,5126.0,evening,4,1.746426,14.933333,8.550795
4,2023-02-15 00:33:29,Afternoon Run,1179,3.38,176.0,17.0,1102.0,5.6,3.068965,27.94187,9.0,1621.599976,1637.400024,6.0,0.026611,105.0,85.923241,141.388657,256.0,3412.100098,1676419000.0,2.0,4.42,1.0,-8.54,0.38,996.0,4.26,7.31,109.0,0.0,1676383000.0,1676421000.0,0.8,0.0,0.74,6267.0,0.0,2.868533,363.0,5752.0,afternoon,3,2.100684,18.366667,8.743185
5,2023-01-08 00:02:48,Afternoon Run,1357,3.65,166.0,24.0,1355.0,4.4,2.695203,39.780704,8.0,1621.400024,1638.900024,6.25,0.21632,97.0,82.727119,146.831039,312.0,3712.199951,1673136000.0,1.0,4.47,2.55,-5.03,0.5,1024.099976,2.2,4.06,279.0,0.0,1673101000.0,1673136000.0,0.54,0.0,0.02,660.0,0.0,2.691231,558.0,7048.0,afternoon,3,2.26849,22.583333,9.955228


In [156]:
# designate cloudy if cloud cover more than 50%
run_data['cloudy'] = np.where(run_data['cloud_cover'] >= 0.5, 1, 0)

# designate windy if wind speed greater than 10
run_data['windy'] = np.where(run_data['wind_speed'] >= 5, 1, 0)
print(run_data['windy'].sum())

# general optimal running temperature is ~40F - 50F
def categorize_temperature(temperature):
    if temperature < 40:
        return 'cold'
    elif temperature >= 40 and temperature <= 50:
        return 'neutral'
    else:
        return 'hot'

run_data['temperature_F'] = (run_data['apparent_temperature'] * (9/5)) + 32
run_data['temperature_condition'] = run_data['temperature_F'].apply(categorize_temperature)

5


In [157]:
run_data['elevation_change_m'] = run_data['elevation_gain'] - run_data['elevation_loss']
run_data.head()

Unnamed: 0,activity_date,activity_name,elapsed_time,distance,max_heart_rate,relative_effort,moving_time,max_speed,average_speed,elevation_gain,elevation_loss,elevation_low,elevation_high,max_grade,average_grade,max_cadence,average_cadence,average_heart_rate,calories,grade_adjusted_distance,weather_observation_time,weather_condition,weather_temperature,apparent_temperature,dewpoint,humidity,weather_pressure,wind_speed,wind_gust,wind_bearing,precipitation_intensity,sunrise_time,sunset_time,moon_phase,precipitation_probability,cloud_cover,weather_visibility,uv_index,average_elapsed_speed,dirt_distance,total_steps,time_of_day,average_running_zone,distance_mi,moving_time_min,pace_min_per_mile,cloudy,windy,temperature_F,temperature_condition,elevation_change_m
1,2023-03-15 01:11:09,Evening Run,1828,5.82,167.0,44.0,1825.0,4.6,3.190685,33.914471,23.0,1621.199951,1637.400024,7.272727,0.157994,95.0,86.739349,152.429367,467.0,5886.399902,1678842000.0,2.0,14.75,14.75,-12.29,0.14,1008.400024,2.0,4.74,18.0,0.0,1678800000.0,1678842000.0,0.75,0.0,0.87,2463.0,0.0,3.185449,300.0,10402.0,evening,4,3.617154,30.416667,8.409006,1,0,58.55,hot,10.914471
2,2022-12-26 14:54:47,Morning Run,1633,2.65,157.0,1.0,1110.0,15.342857,2.387387,6.1575,12.7575,1622.599976,1631.300049,10.131579,-0.249057,102.0,73.721649,133.036087,305.0,2680.800049,1672063000.0,2.0,-3.64,-6.63,-5.57,0.86,1027.599976,1.95,2.96,175.0,0.0,1672065000.0,1672098000.0,0.15,0.0,0.43,16093.0,0.0,1.62278,160.0,7196.0,morning,2,1.646986,18.5,11.232642,0,0,20.066,cold,-6.6
3,2023-02-14 02:11:13,Evening Run,896,2.81,173.0,30.0,896.0,5.8,3.139509,23.228878,0.0,1621.0,1635.400024,5.0,0.0,90.0,86.478409,160.854538,256.0,2832.199951,1676340000.0,3.0,9.55,7.41,-1.67,0.45,996.400024,4.09,8.25,154.0,0.0,1676297000.0,1676335000.0,0.77,0.0,0.91,3283.0,0.0,3.139509,50.0,5126.0,evening,4,1.746426,14.933333,8.550795,1,0,45.338,neutral,23.228878
4,2023-02-15 00:33:29,Afternoon Run,1179,3.38,176.0,17.0,1102.0,5.6,3.068965,27.94187,9.0,1621.599976,1637.400024,6.0,0.026611,105.0,85.923241,141.388657,256.0,3412.100098,1676419000.0,2.0,4.42,1.0,-8.54,0.38,996.0,4.26,7.31,109.0,0.0,1676383000.0,1676421000.0,0.8,0.0,0.74,6267.0,0.0,2.868533,363.0,5752.0,afternoon,3,2.100684,18.366667,8.743185,1,0,33.8,cold,18.94187
5,2023-01-08 00:02:48,Afternoon Run,1357,3.65,166.0,24.0,1355.0,4.4,2.695203,39.780704,8.0,1621.400024,1638.900024,6.25,0.21632,97.0,82.727119,146.831039,312.0,3712.199951,1673136000.0,1.0,4.47,2.55,-5.03,0.5,1024.099976,2.2,4.06,279.0,0.0,1673101000.0,1673136000.0,0.54,0.0,0.02,660.0,0.0,2.691231,558.0,7048.0,afternoon,3,2.26849,22.583333,9.955228,0,0,36.59,cold,31.780704


In [159]:
cols_for_export = ['activity_date', 'moving_time', 'time_of_day', 'average_running_zone', 'distance_mi', \
                   'moving_time_min', 'pace_min_per_mile', 'temperature_condition', 'temperature_F', 'elevation_change_m']
run_data_to_export = run_data[cols_for_export]
run_data_to_export.head()

Unnamed: 0,activity_date,moving_time,time_of_day,average_running_zone,distance_mi,moving_time_min,pace_min_per_mile,temperature_condition,temperature_F,elevation_change_m
1,2023-03-15 01:11:09,1825.0,evening,4,3.617154,30.416667,8.409006,hot,58.55,10.914471
2,2022-12-26 14:54:47,1110.0,morning,2,1.646986,18.5,11.232642,cold,20.066,-6.6
3,2023-02-14 02:11:13,896.0,evening,4,1.746426,14.933333,8.550795,neutral,45.338,23.228878
4,2023-02-15 00:33:29,1102.0,afternoon,3,2.100684,18.366667,8.743185,cold,33.8,18.94187
5,2023-01-08 00:02:48,1355.0,afternoon,3,2.26849,22.583333,9.955228,cold,36.59,31.780704


In [160]:
run_data_to_export.to_csv()

',activity_date,moving_time,time_of_day,average_running_zone,distance_mi,moving_time_min,pace_min_per_mile,temperature_condition,temperature_F,elevation_change_m\n1,2023-03-15 01:11:09,1825.0,evening,4,3.617153511497825,30.416666666666668,8.409006300114546,hot,58.55,10.914470672607422\n2,2022-12-26 14:54:47,1110.0,morning,2,1.646985705407085,18.5,11.232641509433963,cold,20.065999794006345,-6.59999990463257\n3,2023-02-14 02:11:13,896.0,evening,4,1.7464263517712866,14.933333333333334,8.55079478054567,neutral,45.337999725341795,23.228878021240234\n4,2023-02-15 00:33:29,1102.0,afternoon,3,2.100683654443754,18.366666666666667,8.74318540433925,cold,33.8,18.941869735717773\n5,2023-01-08 00:02:48,1355.0,afternoon,3,2.2684897451833437,22.583333333333332,9.955228310502283,cold,36.58999991416931,31.780704498291016\n6,2023-02-06 00:57:07,1334.0,afternoon,4,2.1441889372280922,22.233333333333334,10.36911111111111,neutral,42.09800024032593,23.193675994873047\n7,2023-02-11 19:42:47,1476.0,afternoon,3,