Health and Exercise Tracker Analysis Notebook

The following cell pulls the live tracker from Google Drive so I don't have to redownaload the file to a data folder every time.

In [1]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import pandas as pd
import requests
from io import BytesIO
from datetime import datetime

spreadsheetId = "16EZhzrGxpV86c_1Axe9RpDaQNBbIbFWPVzkUyhOMIiA"  # <--- Please set the Spreadsheet ID.

# 1. Download the Google Spreadsheet as XLSX format.
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
url = "https://www.googleapis.com/drive/v3/files/" + spreadsheetId + "/export?mimeType=application%2Fvnd.openxmlformats-officedocument.spreadsheetml.sheet"
res = requests.get(url, headers={"Authorization": "Bearer " + gauth.attr['credentials'].access_token})

# 2. The downloaded XLSX data is read with `pd.read_excel`.
sheet = "Sheet1"
df = pd.read_excel(BytesIO(res.content), usecols=None, sheet_name=sheet)

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?client_id=895221966072-ivfclv15clemuid8o8fphc3205ccooh2.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&access_type=offline&response_type=code

Authentication successful.


Here is a preview of the DataFrame.

In [2]:
df

Unnamed: 0,date,weight_kg,hours_slept,sleep_quality,bpm,walk_distance_km,walk_time,run_distance_km,run_time,cycle_distance_km,cycle_time,weights_intensity,weights_time,notes
0,07.03.2025,77.0,06:30:00,4.0,54.0,,,5.1,0:30:32,,,,,
1,08.03.2025,75.0,10:36:00,7.0,56.0,,,,,,,,,
2,09.03.2025,76.2,07:07:00,5.0,58.0,,,,,,,3.0,,
3,10.03.2025,77.5,08:32:00,6.0,55.0,3.74,41.02,3.77,37.46,,,,,"St Ramon (Uphill), sprained ankle, slow walk back"
4,11.03.2025,76.6,08:41:00,5.0,54.0,,,,,19.69,56.19,,,
5,12.03.2025,75.7,08:06:00,6.0,56.0,,,,,,,,,
6,13.03.2025,76.3,07:38:00,4.0,56.0,,,,,,,,,
7,14.03.2025,,,,,,,,,,,,,Visit to Stoke
8,15.03.2025,,,,,,,,,,,,,Visit to Stoke
9,16.03.2025,,,,,,,,,,,,,Visit to Stoke


For the moment we will convert all the NaN's to a 0 to make the data organisation clear, we may have to revisit this later.

In [3]:
# df = df.fillna(0)

And here we can see the data types for the individual columns.

In [4]:
df

Unnamed: 0,date,weight_kg,hours_slept,sleep_quality,bpm,walk_distance_km,walk_time,run_distance_km,run_time,cycle_distance_km,cycle_time,weights_intensity,weights_time,notes
0,07.03.2025,77.0,06:30:00,4.0,54.0,,,5.1,0:30:32,,,,,
1,08.03.2025,75.0,10:36:00,7.0,56.0,,,,,,,,,
2,09.03.2025,76.2,07:07:00,5.0,58.0,,,,,,,3.0,,
3,10.03.2025,77.5,08:32:00,6.0,55.0,3.74,41.02,3.77,37.46,,,,,"St Ramon (Uphill), sprained ankle, slow walk back"
4,11.03.2025,76.6,08:41:00,5.0,54.0,,,,,19.69,56.19,,,
5,12.03.2025,75.7,08:06:00,6.0,56.0,,,,,,,,,
6,13.03.2025,76.3,07:38:00,4.0,56.0,,,,,,,,,
7,14.03.2025,,,,,,,,,,,,,Visit to Stoke
8,15.03.2025,,,,,,,,,,,,,Visit to Stoke
9,16.03.2025,,,,,,,,,,,,,Visit to Stoke


In [5]:
df.dtypes

date                  object
weight_kg            float64
hours_slept           object
sleep_quality        float64
bpm                  float64
walk_distance_km     float64
walk_time            float64
run_distance_km      float64
run_time              object
cycle_distance_km    float64
cycle_time           float64
weights_intensity    float64
weights_time         float64
notes                 object
dtype: object

So here we can see that we have some data types that will make life difficult for us in the future.

We could just change the data types in the original google sheets document but that wouldn't be good practice.

Let's start by trying to see if we can turn these into universal values.

First we'll convert the date into a date format:

In [6]:
df["date"] = pd.to_datetime(df["date"], format='mixed')
df["date"] = df['date'].dt.strftime('%m/%d/%Y')

In [7]:
df["date"]

0     07/03/2025
1     08/03/2025
2     09/03/2025
3     10/03/2025
4     11/03/2025
5     12/03/2025
6     03/13/2025
7     03/14/2025
8     03/15/2025
9     03/16/2025
10    03/17/2025
11    03/18/2025
Name: date, dtype: object

In [8]:
df.dtypes

date                  object
weight_kg            float64
hours_slept           object
sleep_quality        float64
bpm                  float64
walk_distance_km     float64
walk_time            float64
run_distance_km      float64
run_time              object
cycle_distance_km    float64
cycle_time           float64
weights_intensity    float64
weights_time         float64
notes                 object
dtype: object

In [9]:
df

Unnamed: 0,date,weight_kg,hours_slept,sleep_quality,bpm,walk_distance_km,walk_time,run_distance_km,run_time,cycle_distance_km,cycle_time,weights_intensity,weights_time,notes
0,07/03/2025,77.0,06:30:00,4.0,54.0,,,5.1,0:30:32,,,,,
1,08/03/2025,75.0,10:36:00,7.0,56.0,,,,,,,,,
2,09/03/2025,76.2,07:07:00,5.0,58.0,,,,,,,3.0,,
3,10/03/2025,77.5,08:32:00,6.0,55.0,3.74,41.02,3.77,37.46,,,,,"St Ramon (Uphill), sprained ankle, slow walk back"
4,11/03/2025,76.6,08:41:00,5.0,54.0,,,,,19.69,56.19,,,
5,12/03/2025,75.7,08:06:00,6.0,56.0,,,,,,,,,
6,03/13/2025,76.3,07:38:00,4.0,56.0,,,,,,,,,
7,03/14/2025,,,,,,,,,,,,,Visit to Stoke
8,03/15/2025,,,,,,,,,,,,,Visit to Stoke
9,03/16/2025,,,,,,,,,,,,,Visit to Stoke


We can set the individual columns according to their starting dtypes, see the column ["weight"] here:

In [10]:
df["weight_kg"] = df['weight_kg'].astype(float)

In [11]:
df.dtypes

date                  object
weight_kg            float64
hours_slept           object
sleep_quality        float64
bpm                  float64
walk_distance_km     float64
walk_time            float64
run_distance_km      float64
run_time              object
cycle_distance_km    float64
cycle_time           float64
weights_intensity    float64
weights_time         float64
notes                 object
dtype: object

Now we'll look at something a little more complicated, converting the object type "hours_slept" column into am HH:MM format.

In [13]:
df["hours_slept"] = pd.to_datetime(df["hours_slept"], format='%H:%M:%S', errors='coerce', utc = True).dt.time

In [14]:
df["hours_slept"]

0     06:30:00
1     10:36:00
2     07:07:00
3     08:32:00
4     08:41:00
5     08:06:00
6     07:38:00
7          NaT
8          NaT
9          NaT
10         NaT
11    09:11:00
Name: hours_slept, dtype: object

Here we'll look at converting two types simultaneously as we need both as integer values.

In [15]:
df[["sleep_quality", "bpm"]] = df[["sleep_quality", "bpm"]].astype(int)

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer

In [None]:
df.dtypes

date                  object
weight_kg            float64
hours_slept           object
sleep_quality        float64
bpm                  float64
walk_distance_km     float64
walk_time            float64
run_distance_km      float64
run_time              object
cycle_distance_km    float64
cycle_time           float64
weights_intensity    float64
weights_time         float64
notes                 object
dtype: object

In [None]:
df

Unnamed: 0,date,weight_kg,hours_slept,sleep_quality,bpm,walk_distance_km,walk_time,run_distance_km,run_time,cycle_distance_km,cycle_time,weights_intensity,weights_time,notes
0,07/03/2025,77.0,06:30:00,4.0,54.0,,,5.1,0:30:32,,,,,
1,08/03/2025,75.0,10:36:00,7.0,56.0,,,,,,,,,
2,09/03/2025,76.2,07:07:00,5.0,58.0,,,,,,,3.0,,
3,10/03/2025,77.5,08:32:00,6.0,55.0,3.74,41.02,3.77,37.46,,,,,"St Ramon (Uphill), sprained ankle, slow walk back"
4,11/03/2025,76.6,08:41:00,5.0,54.0,,,,,19.69,56.19,,,
5,12/03/2025,75.7,08:06:00,6.0,56.0,,,,,,,,,
6,03/13/2025,76.3,07:38:00,4.0,56.0,,,,,,,,,
7,03/14/2025,,NaT,,,,,,,,,,,Visit to Stoke
8,03/15/2025,,NaT,,,,,,,,,,,Visit to Stoke
9,03/16/2025,,NaT,,,,,,,,,,,Visit to Stoke


In [None]:
df["walk_time"] = pd.to_datetime(df["walk_time"], format='%H:%M:%S', errors='coerce', utc = True).dt.time

In [None]:
df["run_time"] = pd.to_datetime(df["run_time"], format='%H:%M:%S', errors='coerce', utc = True).dt.time

In [None]:
df["cycle_time"] = pd.to_datetime(df["cycle_time"], format='%H:%M:%S', errors='coerce', utc = True).dt.time

In [None]:
df["weights_time"] = pd.to_datetime(df["weights_time"], format='%H:%M:%S', errors='coerce', utc = True).dt.time

In [None]:
df.dtypes

date                         object
weight_kg                   float64
hours_slept                  object
sleep_quality               float64
bpm                         float64
walk_distance_km            float64
walk_time            datetime64[ns]
run_distance_km             float64
run_time                     object
cycle_distance_km           float64
cycle_time           datetime64[ns]
weights_intensity           float64
weights_time         datetime64[ns]
notes                        object
dtype: object

In [None]:
df

Unnamed: 0,date,weight_kg,hours_slept,sleep_quality,bpm,walk_distance_km,walk_time,run_distance_km,run_time,cycle_distance_km,cycle_time,weights_intensity,weights_time,notes
0,07/03/2025,77.0,06:30:00,4.0,54.0,,NaT,5.1,00:30:32,,NaT,,NaT,
1,08/03/2025,75.0,10:36:00,7.0,56.0,,NaT,,NaT,,NaT,,NaT,
2,09/03/2025,76.2,07:07:00,5.0,58.0,,NaT,,NaT,,NaT,3.0,NaT,
3,10/03/2025,77.5,08:32:00,6.0,55.0,3.74,NaT,3.77,NaT,,NaT,,NaT,"St Ramon (Uphill), sprained ankle, slow walk back"
4,11/03/2025,76.6,08:41:00,5.0,54.0,,NaT,,NaT,19.69,NaT,,NaT,
5,12/03/2025,75.7,08:06:00,6.0,56.0,,NaT,,NaT,,NaT,,NaT,
6,03/13/2025,76.3,07:38:00,4.0,56.0,,NaT,,NaT,,NaT,,NaT,
7,03/14/2025,,NaT,,,,NaT,,NaT,,NaT,,NaT,Visit to Stoke
8,03/15/2025,,NaT,,,,NaT,,NaT,,NaT,,NaT,Visit to Stoke
9,03/16/2025,,NaT,,,,NaT,,NaT,,NaT,,NaT,Visit to Stoke
