# Cleaning Physical & Sleep Data 😴

In [2]:
# Setup 

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

pd.options.display.max_rows = 6000
pd.options.display.max_columns = 6000

# Read Data and Create Data Frames
df_sleep_raw = pd.read_csv('./whoop data/sleeps.csv')
df_worko_raw = pd.read_csv('./whoop data/workouts.csv')
df_phys_raw = pd.read_csv('./whoop data/physiological_cycles.csv')
df_journ_raw = pd.read_csv('./whoop data/journal_entries.csv')

#### Functions

In [3]:
###### FUNCTIONS GO HERE ######

def null_count (dataframe):
    ''' 
    This function shows for an inputted dataframe a table of all the null values per columns
    input: your dataframe
    output: dataframe holding the null values count per column of your dataframe
    '''
    null_list = dataframe.isnull().sum()
    # Transforming the list into a dataframe 
    dataframe_nulls = pd.DataFrame(null_list)
    dataframe_nulls.columns = ['Null Count']
    # Filtering: only show columns where null values exist
    dataframe_nulls_only = dataframe_nulls[dataframe_nulls["Null Count"] > 0]
    return dataframe_nulls_only


## Cleaning `df_sleep_raw`

#### Preparing & Cleaning Columns

In [4]:
df_sleep_raw.head()

Unnamed: 0,Cycle start time,Cycle end time,Cycle timezone,Sleep onset,Wake onset,Sleep performance %,Respiratory rate (rpm),Asleep duration (min),In bed duration (min),Light sleep duration (min),Deep (SWS) duration (min),REM duration (min),Awake duration (min),Sleep need (min),Sleep debt (min),Sleep efficiency %,Sleep consistency %,Nap
0,2024-12-14 00:46:19,,UTC+01:00,2024-12-14 00:46:19,2024-12-14 09:50:51,76.0,16.3,488.0,544.0,248.0,102.0,138.0,56.0,638.0,125.0,92.0,66.0,False
1,2024-12-13 00:09:59,2024-12-14 00:46:19,UTC+01:00,2024-12-13 00:09:59,2024-12-13 07:23:22,61.0,16.4,385.0,433.0,217.0,83.0,85.0,48.0,636.0,116.0,89.0,89.0,False
2,2024-12-12 00:23:55,2024-12-13 00:09:59,UTC+01:00,2024-12-12 00:23:55,2024-12-12 07:03:04,61.0,16.6,362.0,399.0,200.0,85.0,77.0,37.0,594.0,89.0,91.0,87.0,False
3,2024-12-11 00:30:36,2024-12-12 00:23:55,UTC+01:00,2024-12-11 00:30:36,2024-12-11 07:31:11,69.0,16.5,402.0,419.0,262.0,78.0,62.0,17.0,581.0,89.0,96.0,85.0,False
4,2024-12-09 23:28:01,2024-12-11 00:30:36,UTC+01:00,2024-12-09 23:28:01,2024-12-10 08:00:56,77.0,16.5,466.0,512.0,250.0,103.0,113.0,46.0,606.0,127.0,95.0,77.0,False


In [5]:
#1) Rename Columns (if necessary)
df_sleep_raw = df_sleep_raw.rename({"Cycle start time": "cycle start time"}, axis=1)
df_sleep_raw = df_sleep_raw.rename({"Cycle end time": "cycle end time"}, axis=1)
df_sleep_raw = df_sleep_raw.rename({"Sleep performance %": "sleep performance"}, axis=1)
df_sleep_raw = df_sleep_raw.rename({"Sleep efficiency %": "sleep efficiency"}, axis=1)
df_sleep_raw = df_sleep_raw.rename({"Sleep consistency %": "sleep consistency"}, axis=1)
df_sleep_raw = df_sleep_raw.rename({"Respiratory rate (rpm)": "respiratory rate"}, axis=1)
df_sleep_raw = df_sleep_raw.rename({"Asleep duration (min)": "asleep duration"}, axis=1)
df_sleep_raw = df_sleep_raw.rename({"In bed duration (min)": "in bed duration"}, axis=1)
df_sleep_raw = df_sleep_raw.rename({"Light sleep duration (min)": "light sleep duration"}, axis=1)
df_sleep_raw = df_sleep_raw.rename({"Deep (SWS) duration (min)": "deep sleep duration"}, axis=1)
df_sleep_raw = df_sleep_raw.rename({"REM duration (min)": "rem sleep duration"}, axis=1)
df_sleep_raw = df_sleep_raw.rename({"Awake duration (min)": "awake duration"}, axis=1)
df_sleep_raw = df_sleep_raw.rename({"Sleep need (min)": "sleep need"}, axis=1)
df_sleep_raw = df_sleep_raw.rename({"Sleep debt (min)": "sleep debt"}, axis=1)

In [6]:
#2) Make columns lowercase
df_sleep_raw.columns = [x.lower() for x in df_sleep_raw.columns]

In [7]:
#3) Change Datatype of Columns

# Date Columns to Datetime
df_sleep_raw['cycle start time'] = pd.to_datetime(df_sleep_raw['cycle start time'], format='mixed',  dayfirst = True)
df_sleep_raw['cycle end time'] = pd.to_datetime(df_sleep_raw['cycle end time'], format='mixed',  dayfirst = True)
df_sleep_raw['sleep onset'] = pd.to_datetime(df_sleep_raw['sleep onset'], format='mixed',  dayfirst = True)
df_sleep_raw['wake onset'] = pd.to_datetime(df_sleep_raw['wake onset'], format='mixed',  dayfirst = True)

# Percentage Columns to 0-1 format 
df_sleep_raw['sleep performance'] = df_sleep_raw['sleep performance'] /100
df_sleep_raw['sleep efficiency'] = df_sleep_raw['sleep efficiency'] /100
df_sleep_raw['sleep consistency'] = df_sleep_raw['sleep consistency'] /100

In [8]:
#4) Create new columns
df_sleep_raw['to bed date'] = pd.to_datetime(df_sleep_raw['sleep onset'], format="mixed").dt.date.astype("datetime64[ns]")
df_sleep_raw['date'] = pd.to_datetime(df_sleep_raw['wake onset'], format="mixed").dt.date.astype("datetime64[ns]")
df_sleep_raw.head()


Unnamed: 0,cycle start time,cycle end time,cycle timezone,sleep onset,wake onset,sleep performance,respiratory rate,asleep duration,in bed duration,light sleep duration,deep sleep duration,rem sleep duration,awake duration,sleep need,sleep debt,sleep efficiency,sleep consistency,nap,to bed date,date
0,2024-12-14 00:46:19,NaT,UTC+01:00,2024-12-14 00:46:19,2024-12-14 09:50:51,0.76,16.3,488.0,544.0,248.0,102.0,138.0,56.0,638.0,125.0,0.92,0.66,False,2024-12-14,2024-12-14
1,2024-12-13 00:09:59,2024-12-14 00:46:19,UTC+01:00,2024-12-13 00:09:59,2024-12-13 07:23:22,0.61,16.4,385.0,433.0,217.0,83.0,85.0,48.0,636.0,116.0,0.89,0.89,False,2024-12-13,2024-12-13
2,2024-12-12 00:23:55,2024-12-13 00:09:59,UTC+01:00,2024-12-12 00:23:55,2024-12-12 07:03:04,0.61,16.6,362.0,399.0,200.0,85.0,77.0,37.0,594.0,89.0,0.91,0.87,False,2024-12-12,2024-12-12
3,2024-12-11 00:30:36,2024-12-12 00:23:55,UTC+01:00,2024-12-11 00:30:36,2024-12-11 07:31:11,0.69,16.5,402.0,419.0,262.0,78.0,62.0,17.0,581.0,89.0,0.96,0.85,False,2024-12-11,2024-12-11
4,2024-12-09 23:28:01,2024-12-11 00:30:36,UTC+01:00,2024-12-09 23:28:01,2024-12-10 08:00:56,0.77,16.5,466.0,512.0,250.0,103.0,113.0,46.0,606.0,127.0,0.95,0.77,False,2024-12-09,2024-12-10


#### Checking Duplicates

In [9]:
#5) Check duplicates
df_sleep_raw.duplicated().sum()

0

#### Dropping Rows & Columns

In [10]:
#6) Check for missing values (use the function)
null_count(df_sleep_raw)

Unnamed: 0,Null Count
cycle end time,1
sleep performance,19
respiratory rate,9
asleep duration,9
in bed duration,9
light sleep duration,9
deep sleep duration,9
rem sleep duration,9
awake duration,9
sleep need,9


In [11]:
#7) Dropping Rows
# drop all entries that are naps and not sleeps
df_sleep_raw = df_sleep_raw.drop(df_sleep_raw[df_sleep_raw["nap"]== True].index)
# dropping all rows where no sleep consistency was not recorded
df_sleep_raw = df_sleep_raw.dropna(subset="sleep consistency")
# dropping all rows where cycle end time has no entries
df_sleep_raw = df_sleep_raw.dropna(subset="cycle end time")
# dropping all rows where no sleep performance was not recorded
df_sleep_raw = df_sleep_raw.dropna(subset="sleep performance")

null_count(df_sleep_raw)

Unnamed: 0,Null Count


In [12]:
#8) Dropping Columns
df_sleep_raw = df_sleep_raw.drop(["nap"], axis=1)

#### Final Steps

In [13]:
#9) Describe
df_sleep_raw.describe(include="all")

Unnamed: 0,cycle start time,cycle end time,cycle timezone,sleep onset,wake onset,sleep performance,respiratory rate,asleep duration,in bed duration,light sleep duration,deep sleep duration,rem sleep duration,awake duration,sleep need,sleep debt,sleep efficiency,sleep consistency,to bed date,date
count,1420,1420,1420,1420,1420,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420,1420
unique,,,4,,,,,,,,,,,,,,,,
top,,,UTC+02:00,,,,,,,,,,,,,,,,
freq,,,822,,,,,,,,,,,,,,,,
mean,2022-12-23 14:03:30.824648192,2022-12-24 14:02:58.102113024,,2022-12-23 14:03:30.824648192,2022-12-23 22:08:43.759859200,0.749718,16.620282,416.307042,484.120423,246.902113,87.996479,81.408451,67.723239,556.235915,66.334507,0.890746,0.731239,2022-12-23 04:32:47.323943680,2022-12-23 13:46:28.732394496
min,2020-12-28 00:46:51,2020-12-29 01:10:26,,2020-12-28 00:46:51,2020-12-28 07:48:00,0.13,15.2,74.0,116.0,20.0,7.0,0.0,9.0,141.0,0.0,0.14,0.13,2020-12-28 00:00:00,2020-12-28 00:00:00
25%,2021-12-25 18:30:38.249999872,2021-12-26 18:27:29.249999872,,2021-12-25 18:30:38.249999872,2021-12-26 03:58:51.750000128,0.66,16.3,374.75,435.0,207.0,76.0,59.0,51.0,529.0,44.0,0.87,0.67,2021-12-25 18:00:00,2021-12-25 18:00:00
50%,2022-12-24 12:02:33,2022-12-25 12:41:32.500000,,2022-12-24 12:02:33,2022-12-24 22:05:09.500000,0.75,16.6,414.0,479.0,245.0,87.0,80.0,63.5,557.5,68.0,0.9,0.75,2022-12-24 00:00:00,2022-12-24 12:00:00
75%,2023-12-23 04:57:59,2023-12-24 05:39:00,,2023-12-23 04:57:59,2023-12-23 13:43:16.249999872,0.84,16.9,456.0,531.0,284.0,100.0,102.0,80.0,587.0,88.0,0.92,0.81,2023-12-22 06:00:00,2023-12-23 06:00:00
max,2024-12-13 00:09:59,2024-12-14 00:46:19,,2024-12-13 00:09:59,2024-12-13 07:23:22,1.0,19.0,703.0,793.0,513.0,187.0,238.0,460.0,672.0,196.0,0.98,0.96,2024-12-13 00:00:00,2024-12-13 00:00:00


In [14]:
#10) Set index
#Create column with ID of each sleep
df_sleep_raw['sleep index'] = range(1, len(df_sleep_raw) + 1)

#set index
df_sleep_raw.set_index("sleep index", inplace= True)

In [15]:
#11) Rearrange columns order 
df_sleep_raw = df_sleep_raw[['date','wake onset', 'to bed date', 'sleep onset', 'sleep performance', 'respiratory rate',
       'asleep duration', 'in bed duration', 'light sleep duration',
       'deep sleep duration', 'rem sleep duration', 'awake duration',
       'sleep need', 'sleep debt', 'sleep efficiency', 'sleep consistency', 'cycle start time', 'cycle end time', 'cycle timezone']]

In [16]:
# 12) Creating the final dataframe 
df_sleep = df_sleep_raw
df_sleep.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1420 entries, 1 to 1420
Data columns (total 19 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   date                  1420 non-null   datetime64[ns]
 1   wake onset            1420 non-null   datetime64[ns]
 2   to bed date           1420 non-null   datetime64[ns]
 3   sleep onset           1420 non-null   datetime64[ns]
 4   sleep performance     1420 non-null   float64       
 5   respiratory rate      1420 non-null   float64       
 6   asleep duration       1420 non-null   float64       
 7   in bed duration       1420 non-null   float64       
 8   light sleep duration  1420 non-null   float64       
 9   deep sleep duration   1420 non-null   float64       
 10  rem sleep duration    1420 non-null   float64       
 11  awake duration        1420 non-null   float64       
 12  sleep need            1420 non-null   float64       
 13  sleep debt            1

## Cleaning `df_phys_raw`

#### Preparing & Cleaning Columns

In [17]:
df_phys_raw.head(2)

Unnamed: 0,Cycle start time,Cycle end time,Cycle timezone,Recovery score %,Resting heart rate (bpm),Heart rate variability (ms),Skin temp (celsius),Blood oxygen %,Day Strain,Energy burned (cal),Max HR (bpm),Average HR (bpm),Sleep onset,Wake onset,Sleep performance %,Respiratory rate (rpm),Asleep duration (min),In bed duration (min),Light sleep duration (min),Deep (SWS) duration (min),REM duration (min),Awake duration (min),Sleep need (min),Sleep debt (min),Sleep efficiency %,Sleep consistency %
0,2024-12-14 00:46:19,,UTC+01:00,93.0,48.0,61.0,33.7,97.18,,,,,2024-12-14 00:46:19,2024-12-14 09:50:51,76.0,16.3,488.0,544.0,248.0,102.0,138.0,56.0,638.0,125.0,92.0,66.0
1,2024-12-13 00:09:59,2024-12-14 00:46:19,UTC+01:00,84.0,50.0,54.0,33.61,96.23,18.1,3756.0,172.0,77.0,2024-12-13 00:09:59,2024-12-13 07:23:22,61.0,16.4,385.0,433.0,217.0,83.0,85.0,48.0,636.0,116.0,89.0,89.0


In [18]:
#1) Rename Columns (if necessary)
df_phys_raw = df_phys_raw.rename({"Recovery score %": "recovery score"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Resting heart rate (bpm)": "rhr"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Heart rate variability (ms)": "hrv"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Skin temp (celsius)": "skin temp"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Blood oxygen %": "blood oxygen"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Energy burned (cal)": "cal burned"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Max HR (bpm)": "max hr"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Average HR (bpm)": "avg hr"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Cycle start time": "cycle start time"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Cycle end time": "cycle end time"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Sleep performance %": "sleep performance"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Sleep efficiency %": "sleep efficiency"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Sleep consistency %": "sleep consistency"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Respiratory rate (rpm)": "respiratory rate"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Asleep duration (min)": "asleep duration"}, axis=1)
df_phys_raw = df_phys_raw.rename({"In bed duration (min)": "in bed duration"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Light sleep duration (min)": "light sleep duration"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Deep (SWS) duration (min)": "deep sleep duration"}, axis=1)
df_phys_raw = df_phys_raw.rename({"REM duration (min)": "rem sleep duration"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Awake duration (min)": "awake duration"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Sleep need (min)": "sleep need"}, axis=1)
df_phys_raw = df_phys_raw.rename({"Sleep debt (min)": "sleep debt"}, axis=1)

In [19]:
#2) Make columns lowercase
df_phys_raw.columns = [x.lower() for x in df_phys_raw.columns]

In [20]:
#3) Change Datatype of Columns

# Date Columns to Datetime
df_phys_raw['cycle start time'] = pd.to_datetime(df_phys_raw['cycle start time'], format='mixed',  dayfirst = True)
df_phys_raw['cycle end time'] = pd.to_datetime(df_phys_raw['cycle end time'], format='mixed',  dayfirst = True)
df_phys_raw['sleep onset'] = pd.to_datetime(df_phys_raw['sleep onset'], format='mixed',  dayfirst = True)
df_phys_raw['wake onset'] = pd.to_datetime(df_phys_raw['wake onset'], format='mixed',  dayfirst = True)

# Percentage Columns to 0-1 format 
df_phys_raw['sleep performance'] = df_phys_raw['sleep performance'] /100
df_phys_raw['sleep efficiency'] = df_phys_raw['sleep efficiency'] /100
df_phys_raw['sleep consistency'] = df_phys_raw['sleep consistency'] /100
df_phys_raw['blood oxygen'] = df_phys_raw['blood oxygen'] /100
df_phys_raw['recovery score'] = df_phys_raw['recovery score']/100

In [21]:
#4) Create new columns
df_phys_raw['to bed date'] = pd.to_datetime(df_phys_raw['sleep onset'], format="mixed").dt.date.astype("datetime64[ns]")
df_phys_raw['date'] = pd.to_datetime(df_phys_raw['wake onset'], format="mixed").dt.date.astype("datetime64[ns]")
df_phys_raw.head()

Unnamed: 0,cycle start time,cycle end time,cycle timezone,recovery score,rhr,hrv,skin temp,blood oxygen,day strain,cal burned,max hr,avg hr,sleep onset,wake onset,sleep performance,respiratory rate,asleep duration,in bed duration,light sleep duration,deep sleep duration,rem sleep duration,awake duration,sleep need,sleep debt,sleep efficiency,sleep consistency,to bed date,date
0,2024-12-14 00:46:19,NaT,UTC+01:00,0.93,48.0,61.0,33.7,0.9718,,,,,2024-12-14 00:46:19,2024-12-14 09:50:51,0.76,16.3,488.0,544.0,248.0,102.0,138.0,56.0,638.0,125.0,0.92,0.66,2024-12-14,2024-12-14
1,2024-12-13 00:09:59,2024-12-14 00:46:19,UTC+01:00,0.84,50.0,54.0,33.61,0.9623,18.1,3756.0,172.0,77.0,2024-12-13 00:09:59,2024-12-13 07:23:22,0.61,16.4,385.0,433.0,217.0,83.0,85.0,48.0,636.0,116.0,0.89,0.89,2024-12-13,2024-12-13
2,2024-12-12 00:23:55,2024-12-13 00:09:59,UTC+01:00,0.68,51.0,48.0,33.9,0.964,19.1,4183.0,172.0,81.0,2024-12-12 00:23:55,2024-12-12 07:03:04,0.61,16.6,362.0,399.0,200.0,85.0,77.0,37.0,594.0,89.0,0.91,0.87,2024-12-12,2024-12-12
3,2024-12-11 00:30:36,2024-12-12 00:23:55,UTC+01:00,0.91,50.0,53.0,35.1,0.9656,16.9,3231.0,172.0,75.0,2024-12-11 00:30:36,2024-12-11 07:31:11,0.69,16.5,402.0,419.0,262.0,78.0,62.0,17.0,581.0,89.0,0.96,0.85,2024-12-11,2024-12-11
4,2024-12-09 23:28:01,2024-12-11 00:30:36,UTC+01:00,0.51,52.0,42.0,35.74,0.9275,15.2,2916.0,167.0,73.0,2024-12-09 23:28:01,2024-12-10 08:00:56,0.77,16.5,466.0,512.0,250.0,103.0,113.0,46.0,606.0,127.0,0.95,0.77,2024-12-09,2024-12-10


#### Checking Duplicates

In [22]:
#5) Check duplicates
df_phys_raw.duplicated().sum()

0

#### Dropping Rows & Columns

In [23]:
#6) Check for missing values (use the function)
null_count(df_phys_raw)

Unnamed: 0,Null Count
cycle end time,1
recovery score,17
rhr,17
hrv,17
skin temp,377
blood oxygen,376
day strain,6
cal burned,4
max hr,4
avg hr,4


In [24]:
#7) Dropping Rows
# dropping all rows where no sleep consistency was not recorded
df_phys_raw = df_phys_raw.dropna(subset="sleep consistency")
# dropping all rows where cycle end time has no entries
df_phys_raw = df_phys_raw.dropna(subset="cycle end time")
# dropping all rows where no sleep performance was not recorded
df_phys_raw = df_phys_raw.dropna(subset="sleep performance")

null_count(df_phys_raw)

Unnamed: 0,Null Count
skin temp,356
blood oxygen,355
day strain,2
cal burned,1
max hr,1
avg hr,1


In [25]:
#8) Dropping Columns
## nothing to drop

#### Final Steps

In [26]:
#9) Describe
df_phys_raw.describe(include="all")

Unnamed: 0,cycle start time,cycle end time,cycle timezone,recovery score,rhr,hrv,skin temp,blood oxygen,day strain,cal burned,max hr,avg hr,sleep onset,wake onset,sleep performance,respiratory rate,asleep duration,in bed duration,light sleep duration,deep sleep duration,rem sleep duration,awake duration,sleep need,sleep debt,sleep efficiency,sleep consistency,to bed date,date
count,1420,1420,1420,1420.0,1420.0,1420.0,1064.0,1065.0,1418.0,1419.0,1419.0,1419.0,1420,1420,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420.0,1420,1420
unique,,,4,,,,,,,,,,,,,,,,,,,,,,,,,
top,,,UTC+02:00,,,,,,,,,,,,,,,,,,,,,,,,,
freq,,,822,,,,,,,,,,,,,,,,,,,,,,,,,
mean,2022-12-23 14:03:30.824648192,2022-12-24 14:02:58.102113024,,0.627972,51.594366,54.458451,33.999511,0.967819,14.378491,2860.606061,163.915433,71.701903,2022-12-23 14:03:30.824648192,2022-12-23 22:08:43.759859200,0.749718,16.620282,416.307042,484.120423,246.902113,87.996479,81.408451,67.723239,556.235915,66.334507,0.890746,0.731239,2022-12-23 04:32:47.323943680,2022-12-23 13:46:28.732394496
min,2020-12-28 00:46:51,2020-12-29 01:10:26,,0.01,43.0,24.0,30.9,0.89,4.1,830.0,89.0,49.0,2020-12-28 00:46:51,2020-12-28 07:48:00,0.13,15.2,74.0,116.0,20.0,7.0,0.0,9.0,141.0,0.0,0.14,0.13,2020-12-28 00:00:00,2020-12-28 00:00:00
25%,2021-12-25 18:30:38.249999872,2021-12-26 18:27:29.249999872,,0.49,49.0,49.0,33.6,0.9627,12.3,2227.5,155.0,67.0,2021-12-25 18:30:38.249999872,2021-12-26 03:58:51.750000128,0.66,16.3,374.75,435.0,207.0,76.0,59.0,51.0,529.0,44.0,0.87,0.67,2021-12-25 18:00:00,2021-12-25 18:00:00
50%,2022-12-24 12:02:33,2022-12-25 12:41:32.500000,,0.63,51.0,55.0,34.085,0.9695,14.8,2669.0,164.0,71.0,2022-12-24 12:02:33,2022-12-24 22:05:09.500000,0.75,16.6,414.0,479.0,245.0,87.0,80.0,63.5,557.5,68.0,0.9,0.75,2022-12-24 00:00:00,2022-12-24 12:00:00
75%,2023-12-23 04:57:59,2023-12-24 05:39:00,,0.78,53.0,60.0,34.5,0.9746,17.3,3250.5,175.0,75.0,2023-12-23 04:57:59,2023-12-23 13:43:16.249999872,0.84,16.9,456.0,531.0,284.0,100.0,102.0,80.0,587.0,88.0,0.92,0.81,2023-12-22 06:00:00,2023-12-23 06:00:00
max,2024-12-13 00:09:59,2024-12-14 00:46:19,,0.98,71.0,101.0,37.12,0.9915,20.7,8150.0,206.0,105.0,2024-12-13 00:09:59,2024-12-13 07:23:22,1.0,19.0,703.0,793.0,513.0,187.0,238.0,460.0,672.0,196.0,0.98,0.96,2024-12-13 00:00:00,2024-12-13 00:00:00


In [27]:
#10) Set index
#Create column with ID for each entry
df_phys_raw['cycle id'] = range(1, len(df_phys_raw) + 1)

#set index
df_phys_raw.set_index("cycle id", inplace= True)

In [28]:
df_phys_raw

Unnamed: 0_level_0,cycle start time,cycle end time,cycle timezone,recovery score,rhr,hrv,skin temp,blood oxygen,day strain,cal burned,max hr,avg hr,sleep onset,wake onset,sleep performance,respiratory rate,asleep duration,in bed duration,light sleep duration,deep sleep duration,rem sleep duration,awake duration,sleep need,sleep debt,sleep efficiency,sleep consistency,to bed date,date
cycle id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
1,2024-12-13 00:09:59,2024-12-14 00:46:19,UTC+01:00,0.84,50.0,54.0,33.61,0.9623,18.1,3756.0,172.0,77.0,2024-12-13 00:09:59,2024-12-13 07:23:22,0.61,16.4,385.0,433.0,217.0,83.0,85.0,48.0,636.0,116.0,0.89,0.89,2024-12-13,2024-12-13
2,2024-12-12 00:23:55,2024-12-13 00:09:59,UTC+01:00,0.68,51.0,48.0,33.9,0.964,19.1,4183.0,172.0,81.0,2024-12-12 00:23:55,2024-12-12 07:03:04,0.61,16.6,362.0,399.0,200.0,85.0,77.0,37.0,594.0,89.0,0.91,0.87,2024-12-12,2024-12-12
3,2024-12-11 00:30:36,2024-12-12 00:23:55,UTC+01:00,0.91,50.0,53.0,35.1,0.9656,16.9,3231.0,172.0,75.0,2024-12-11 00:30:36,2024-12-11 07:31:11,0.69,16.5,402.0,419.0,262.0,78.0,62.0,17.0,581.0,89.0,0.96,0.85,2024-12-11,2024-12-11
4,2024-12-09 23:28:01,2024-12-11 00:30:36,UTC+01:00,0.51,52.0,42.0,35.74,0.9275,15.2,2916.0,167.0,73.0,2024-12-09 23:28:01,2024-12-10 08:00:56,0.77,16.5,466.0,512.0,250.0,103.0,113.0,46.0,606.0,127.0,0.95,0.77,2024-12-09,2024-12-10
5,2024-12-09 00:30:43,2024-12-09 23:28:01,UTC+01:00,0.61,50.0,47.0,34.82,0.9473,12.9,2280.0,175.0,70.0,2024-12-09 00:30:43,2024-12-09 07:00:26,0.57,16.3,349.0,389.0,220.0,70.0,59.0,40.0,612.0,111.0,0.96,0.8,2024-12-09,2024-12-09
6,2024-12-08 01:22:34,2024-12-09 00:30:43,UTC+01:00,0.65,53.0,46.0,35.1,0.9629,16.3,3043.0,169.0,74.0,2024-12-08 01:22:34,2024-12-08 08:37:58,0.64,16.4,395.0,435.0,259.0,92.0,44.0,40.0,618.0,114.0,0.95,0.72,2024-12-08,2024-12-08
7,2024-12-07 00:23:02,2024-12-08 01:22:34,UTC+01:00,0.37,56.0,39.0,34.7,0.9535,16.9,3418.0,167.0,73.0,2024-12-07 00:23:02,2024-12-07 08:01:59,0.63,16.1,395.0,458.0,238.0,95.0,62.0,63.0,624.0,109.0,0.92,0.84,2024-12-07,2024-12-07
8,2024-12-06 00:06:54,2024-12-07 00:23:02,UTC+01:00,0.59,53.0,47.0,34.81,0.9622,18.4,3735.0,174.0,78.0,2024-12-06 00:06:54,2024-12-06 07:00:28,0.6,16.6,330.0,413.0,191.0,62.0,77.0,83.0,548.0,69.0,0.8,0.87,2024-12-06,2024-12-06
9,2024-12-04 23:17:53,2024-12-06 00:06:54,UTC+01:00,0.61,53.0,46.0,33.61,0.9664,13.0,2406.0,167.0,68.0,2024-12-04 23:17:53,2024-12-05 07:38:45,0.76,16.3,448.0,500.0,321.0,101.0,26.0,52.0,588.0,85.0,0.94,0.82,2024-12-04,2024-12-05
10,2024-12-03 23:39:27,2024-12-04 23:17:53,UTC+01:00,0.68,52.0,48.0,33.54,0.97,16.7,3048.0,176.0,75.0,2024-12-03 23:39:27,2024-12-04 07:15:18,0.69,16.1,379.0,455.0,182.0,94.0,103.0,76.0,551.0,68.0,0.93,0.76,2024-12-03,2024-12-04


In [29]:
#11) Rearrange columns order 
df_phys_raw = df_phys_raw[['date','wake onset', 'to bed date', 'sleep onset', 'sleep performance', 'recovery score', 'day strain',  'rhr', 'hrv','respiratory rate',  
       'asleep duration','in bed duration','light sleep duration',
       'deep sleep duration', 'rem sleep duration','awake duration',
       'sleep need', 'sleep debt', 'sleep efficiency', 'sleep consistency', 
       'skin temp', 'blood oxygen','cal burned', 'max hr', 'avg hr',       
       'cycle start time', 'cycle end time', 'cycle timezone']]

In [30]:
# 12) Creating the final dataframe 
df_phys = df_phys_raw
df_phys.columns

Index(['date', 'wake onset', 'to bed date', 'sleep onset', 'sleep performance',
       'recovery score', 'day strain', 'rhr', 'hrv', 'respiratory rate',
       'asleep duration', 'in bed duration', 'light sleep duration',
       'deep sleep duration', 'rem sleep duration', 'awake duration',
       'sleep need', 'sleep debt', 'sleep efficiency', 'sleep consistency',
       'skin temp', 'blood oxygen', 'cal burned', 'max hr', 'avg hr',
       'cycle start time', 'cycle end time', 'cycle timezone'],
      dtype='object')

## Saving as .CSV

In [31]:
  '''
  df_phys.to_csv('df_phys.csv', index=True)
  '''

"\ndf_phys.to_csv('df_phys.csv', index=True)\n"

# Basic Analysis `df_sleep`

In [44]:
#1) How long do I sleep on average?
sleep_duration_mean = int(df_sleep["asleep duration"].mean().round())
sleep_duration_mean_h = sleep_duration_mean//60
sleep_duration_mean_min = sleep_duration_mean%60
print (f"On average, I sleep {sleep_duration_mean_h}h {sleep_duration_mean_min} mins per night")

On average, I sleep 6h 56 mins per night


In [45]:
#4) How efficient is my sleep normally?
sleep_efficiency_mean = int((df_sleep["sleep efficiency"].mean().round(2))*100)
print (f"On average, my sleep efficiency is {sleep_efficiency_mean} per cent per night")

On average, my sleep efficiency is 89 per cent per night


## Heart Rate Analysis for High Intensity Training Impact

Between 13. January 2022 and 10. April 2022, where I finished my first Half-Marathon, my run training was predominately high-intensity training. 

In [59]:
# Creating a dataframe that holds the physical data 4 weeks BEFORE HIT running (21. October 2021 - 13. January 2022)

start_date_b = '2021-12-16' 
end_date_b = '2022-01-13'

df_phys_before_hit = df_phys[(df_phys['date'] >= start_date_b) & (df_phys['date'] <= end_date_b)]
avg_hr_before = df_phys_before_hit["rhr"].mean()

print(f"the average resting heart rate 4 weeks BEFORE high-intensity run training was {avg_hr_before}")

# Creating a dataframe that holds the physical data 12 weeks DURING HIT running (13. January 2022 – 10. April 2022)

start_date_d = '2022-01-13' 
end_date_d = '2022-04-10'

df_phys_during_hit = df_phys[(df_phys['date'] >= start_date_d) & (df_phys['date'] <= end_date_d)]
avg_hr_during = df_phys_during_hit["rhr"].mean()

print(f"the average resting heart rate WHILE 12 weeks high-intensity run training was {avg_hr_during}")

# Creating a dataframe that holds the physical data 4 weeks AFTER HIT running (10. April 2022 – 3. July 2022)

start_date_a = '2022-04-10' 
end_date_a = '2022-05-08'

df_phys_after_hit = df_phys[(df_phys['date'] >= start_date_a) & (df_phys['date'] <= end_date_a)]
avg_hr_after = df_phys_after_hit["rhr"].mean()

print(f"the average resting heart rate AFTER 4 weeks high-intensity run training was {avg_hr_after}")

the average resting heart rate 4 weeks BEFORE high-intensity run training was 52.62068965517241
the average resting heart rate WHILE 12 weeks high-intensity run training was 51.895348837209305
the average resting heart rate AFTER 4 weeks high-intensity run training was 50.689655172413794
