## Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from scipy.stats import norm
import math
import re
# import sidetable
import seaborn as sns
from sklearn.impute import SimpleImputer

## Read in Survey.csv

In [None]:
survey = pd.read_csv("surveydata.csv")
#how many observations
print(survey.SubjectID.count())
#unique ID
print(len(pd.unique(survey['SubjectID'])))
#unique description, 5 questions
print(pd.unique(survey['description']))
#print(max(survey['Unnamed:0']))
survey.head()

## We will pick these 2 descriptions:
- 'Numeric answer for number of hours of sleep for participant on night before the run'
- 'RPE scale 1-10 of workout difficulty'

In [None]:
surveydf = survey[(survey.description == 'Numeric answer for number of hours of sleep for participant on night before the run') |
           (survey.description == 'RPE scale 1-10 of workout difficulty')]
print(pd.unique(surveydf['numerical_ans']))
print(pd.unique(surveydf['textual_ans']))


In [None]:
#Lets clean this data

#Drop 2 first columns
surveydf.drop(surveydf.columns[[0, 1]], axis=1, inplace=True)

In [None]:
#Create key variables

#For a unique survey, we can make a key variable concatenating  (SubjectID, Year and Date)
cols = ['SubjectID', 'Year', 'Date'] #This key variable might help to find same observation in the garmin database
surveydf['key'] = surveydf[cols].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)

#For a unique observation in the survey, we can make a key2 with (SubjectID, Year, Date and description)
cols2 = ['SubjectID', 'Year', 'Date','description']
surveydf['key2'] = surveydf[cols2].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)

#Count how many possible mistakes, duplicate observations may have
print(surveydf[surveydf.duplicated(subset=['key2'],keep=False)].SubjectID.count())
#53 from 5540 obs is not bad

#Lets just look to the first one
key_look = '424_2017_617'
surveydf[surveydf.key == key_look]


In [None]:
for col in surveydf.columns:
    pct_missing = np.mean(surveydf[col].isnull())
    print('{} - {}%'.format(col, round(pct_missing*100)))

## Split into 2 survey dfs - one for each description:
 Major task is to clean the textual data into numerical data

In [None]:
surveydf1 = surveydf[surveydf.description ==  'RPE scale 1-10 of workout difficulty']
surveydf1['cleaned_column'] = surveydf1['textual_ans'].apply(lambda x: [int(s) for s in x.split() if s.isdigit()][0])
surveydf1['numerical_final_workout'] = surveydf1.numerical_ans.combine_first(surveydf1.cleaned_column)
surveydf1.head()

In [None]:
surveydf1.textual_ans.unique()

In [None]:
surveydf2 = surveydf[surveydf.description ==  'Numeric answer for number of hours of sleep for participant on night before the run']
surveydf2.textual_ans.unique()

In [None]:
surveydf2['cleaned_column0'] = surveydf2['textual_ans'].apply(lambda x: re.sub('[a-zA-Z/(/)!,]','',str(x)))
surveydf2['cleaned_column0b'] = surveydf2['cleaned_column0'].apply(lambda x: re.sub('[:;-]',' ',str(x)))
surveydf2['cleaned_column1'] = surveydf2['cleaned_column0b'].apply(lambda x: [float(num) for num in x.split(' ') if re.match('[0-9]',num)])
surveydf2['cleaned_column1a'] = surveydf2['cleaned_column1'].apply(lambda x: ' | '.join(str(y) for y in x))
surveydf2['cleaned_column1b'] = surveydf2['cleaned_column1'].apply(lambda x: [np.nan] if len(x)==0 else x)
surveydf2['numerical_final_sleep'] = surveydf2['cleaned_column1b'].apply(lambda x: x[0]+x[1]/60 if len(x)>1 else x[0])

display(surveydf2.tail())
print(surveydf2['cleaned_column0'].unique())
print('*'*30)
print(surveydf2['numerical_final_sleep'].unique())

## Extract wanted surveydf columns

In [None]:
surveydf1.columns

In [None]:
surveydf1 = surveydf1[['description', 'key', 'numerical_final_workout']]

In [None]:
surveydf2.columns

In [None]:
surveydf2 = surveydf2[['description', 'key', 'numerical_final_sleep']]

In [None]:
sleep_data = survey[survey.description ==  'Numeric answer for number of hours of sleep for participant on night before the run']
temp_sleep = sleep_data['numerical_ans'].dropna()
rows_select=temp_sleep.index

for i in temp_sleep:
    surveydf2.loc[rows_select,'numerical_final_sleep'] = temp_sleep

In [None]:
plt.figure(figsize=(10,6))
sns.heatmap(surveydf2.isnull(), cbar=False)

surveydf2[surveydf2.isna().any(axis=1)]

In [None]:
import itertools
missing_sleep_1991 = surveydf2[surveydf2['key'].str.contains(r'1991')].mean()
missing_sleep_1991 = [missing_sleep_1991[1]]
missing_sleep_1995 =[5.5,7] # typo
missing_sleep_2064 = surveydf2[surveydf2['key'].str.contains(r'2064')].mean(skipna=True)
missing_sleep_2064=[missing_sleep_2064[1]]*30

sleep_values_to_add = missing_sleep_1991+missing_sleep_1995+missing_sleep_2064

In [None]:
temp_rows = surveydf2[surveydf2.isna().any(axis=1)].index
for i in sleep_values_to_add:
    surveydf2.loc[temp_rows,'numerical_final_sleep'] = sleep_values_to_add

In [None]:
plt.figure(figsize=(10,6))
sns.heatmap(surveydf2.isnull(), cbar=False)

## Read in Garmin Data

In [None]:
garmin = pd.read_csv('garmindata.csv')

#how many observations
print(garmin.SubjectID.count())
#unique ID
print(len(pd.unique(garmin['SubjectID'])))

garmin.head()

In [None]:
#Lets explore this data

#Drop 2 first columns
garmin.drop(garmin.columns[[0, 1]], axis=1, inplace=True)

In [None]:
#Create key variables

#For a unique observation, we can make a key variable concatenating  (SubjectID, Year and Date)
cols = ['SubjectID', 'Year', 'Date'] #This key variable might help to find same observation in the survey database
garmin['key'] = garmin[cols].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)

#unique ID
print(len(pd.unique(garmin['key'])))
#average observations per runner
print(len(pd.unique(garmin['key']))/37)
#data per observation
garmin.groupby('key')['SubjectID'].count()


In [None]:
#average data per observation
sum(garmin.groupby('key')['SubjectID'].count())/len(garmin.groupby('key')['SubjectID'].count())


In [None]:
garmin.info()

In [None]:
garmin.columns

In [None]:
garmin = garmin[['SubjectID', 'Year', 'Date', 'key', 'record.altitude[m]', 'record.distance[m]',
       'record.heart_rate[bpm]', 'record.position_lat[semicircles]',
       'record.position_long[semicircles]', 'record.speed[m/s]',
       'record.timestamp[s]']]

## Join garmin and surveydf tables - inner join for match on key with data from surveydf

In [None]:
combined_workout= garmin.merge(surveydf1, on = 'key', how = 'left')
combined_workout = combined_workout.dropna()
combined_workout.head(5)

In [None]:
combined_sleep = garmin.merge(surveydf2, on = 'key', how = 'left')
combined_sleep= combined_sleep.dropna()
combined_sleep.head(5)

### If we look at combined descriptions, match on same SubjectIDs and Keys to find Subjects that have entries for both descriptions (Sleep and Workout)

In [None]:
combined_workout2 = combined_workout.drop_duplicates(subset=['key', 'numerical_final_workout'])

In [None]:
combined_workout2[['key', 'numerical_final_workout']]

In [None]:
total_combined = pd.merge(combined_sleep, combined_workout2[['key', 'numerical_final_workout']], on='key')
total_combined.key.unique()

In [None]:
plt.figure(figsize=(10,6))
sns.heatmap(total_combined.isnull(), cbar=False)


In [None]:
total_combined.drop(['record.position_lat[semicircles]',
                    'record.position_long[semicircles]',
                    'description'],axis=1,inplace=True)

In [None]:
total_combined.head(10)

In [None]:
total_combined.info()

In [None]:
# total_combined.to_csv('garmin_sub.csv',index=False)

In [None]:
# sns.set_style("whitegrid")
# subject58 = total_combined[total_combined['SubjectID']==58]
# subject58
# sns.displot(data=subject58,y="record.timestamp[s]",x='record.heart_rate[bpm]')

In [None]:
total_combined.SubjectID.unique() # 16 runners

In [None]:
total_combined['pace[min/Km]'] = total_combined['record.timestamp[s]']/total_combined['record.distance[m]']*16.667
total_combined.head()

In [None]:
total_combined.describe()

In [None]:
total_combined[(total_combined == np.inf).any(axis=1)]

In [None]:
total_combined['pace[min/Km]'].replace(np.inf, 0,inplace=True)

In [None]:
total_combined.describe()

In [None]:
distance_bins = [0,5137.28,25000]
total_combined["binned_dist"] = pd.cut(total_combined['record.distance[m]'], distance_bins)

#bin the sleep survey datatable
sleep_bins = [0,5,8,18]
total_combined["binned_sleep"] = pd.cut(total_combined['numerical_final_sleep'], sleep_bins)
total_combined.head()

In [None]:
total_combined['binned_sleep'].value_counts()

In [None]:
total_combined['binned_dist'].value_counts()

In [None]:
runners_max = total_combined.groupby(['SubjectID','Date']).max()
runners_max

In [None]:
sns.set(style="darkgrid")
rpe_bins = np.arange(1,10,1)
slp_bins = np.arange(1,13,1)
fig, axs = plt.subplots(2, 2, figsize=(10, 10))

sns.histplot(data=runners_max,x='record.distance[m]',kde=True,ax=axs[0,0],color='#fb8500')
axs[0,0].set_xlabel("Distance Ran (m)")

sns.histplot(data=runners_max[runners_max['record.heart_rate[bpm]']>=100],x='record.heart_rate[bpm]',kde=True,
             ax=axs[0,1],color="#023047")
axs[0,1].set_xlabel("Heart Rate (bpm)")

sns.histplot(data=runners_max,x='numerical_final_sleep',ax=axs[1,0],color="#219ebc",bins=slp_bins)
plt.sca(axs[1, 0])
plt.xticks((slp_bins[:-1] +slp_bins[1:])/2,labels=((slp_bins[:-1] +slp_bins[1:])//2).astype(int))
plt.xlim(1,13)
axs[1,0].set_xlabel("Hours of sleep (Hr)")
axs[0,1].set(ylabel=None)

sns.histplot(data=runners_max,x='numerical_final_workout',ax=axs[1,1],bins=rpe_bins,color="#e63946")
plt.sca(axs[1, 1])
plt.xticks((rpe_bins[:-1] +rpe_bins[1:])/2,labels= ((rpe_bins[:-1] +rpe_bins[1:])//2).astype(int))
plt.xlim(1,9)
axs[1,1].set_xlabel("RPE")
axs[1,1].set(ylabel=None)

plt.show()


In [None]:
low_sleep = total_combined[total_combined['numerical_final_sleep']<=6]
# low_sleep[low_sleep['numerical_final_sleep']==4]['SubjectID'].unique()
# 514 1990 2065

low_sleep['SubjectID'].unique()

In [None]:
subject514 = total_combined[total_combined['SubjectID']==514]

subject514.describe() # 34 runs

In [None]:
subject1990 = total_combined[total_combined['SubjectID']==1990]

subject1990.describe() # 17 runs

In [None]:
subject2065 = total_combined[total_combined['SubjectID']==2065]

subject2065.describe() # 68

In [None]:
subject514_stats = pd.DataFrame({'Stats':['Distance','Speed','Pace','Heart Rate','Time','Sleep','RPE'],
                                 'Average':[9231.60,2.21,7.47,132.61,4125.83/60,7.21,3.77],
                                 'Max':[42299.55,6.55,28.49,197,18274/60,9,7]})
subject514_stats['proportion']=subject514_stats['Average']/subject514_stats['Max']
subject514_stats['total']=1
subject514_stats

In [None]:
f, ax = plt.subplots()
sns.barplot(x='total',y='Stats',data=subject514_stats,label="Max",color='#d6d6d6')
sns.barplot(x='proportion',y="Stats",data=subject514_stats,color='#57cc99')
sns.despine(left=True, bottom=True)
ax.set(xlim=(0, 1), ylabel="",title='Patient514')
ax.axes.xaxis.set_visible(False)

In [None]:
subject1990.groupby('Date').max() # 602 low sleep & 512 = 8 sleep

In [None]:
subject1990_sub = subject1990[(subject1990['Date']==602) | (subject1990['Date']==512)]

In [None]:
subject1990_sub['minute']= subject1990['record.timestamp[s]']/60

In [None]:
fig, axs = plt.subplots(figsize=(10,5))
ax = sns.lineplot(data=subject1990_sub,x="minute",y='record.heart_rate[bpm]',hue='Date',
             palette=['#ff6d00','#9b5de5'])

l1 = ax.lines[0]
l2 = ax.lines[1]
x1 = l1.get_xydata()[:,0]
y1 = l1.get_xydata()[:,1]
x2 = l2.get_xydata()[:,0]
y2 = l2.get_xydata()[:,1]
ax.fill_between(x2,y2, color="#9b5de5", alpha=0.3)
ax.fill_between(x1,y1, color="#ff6d00", alpha=0.3)
ax.set(ylabel="Heart Rate (bpm)",xlabel='Time(Minute)')
plt.title('Heart Rate Changes During the Run (Runner ID: 1990)',loc='left',fontsize=15)
plt.ylim(70,200)

# plt.legend(["8 Hours", "4 Hours"])

In [None]:
fig, axs = plt.subplots(figsize=(10,5))
ax = sns.lineplot(data=subject1990_sub,x="minute",y='pace[min/Km]',hue='Date',
             palette=['#f72585','#2c7da0'])

l1 = ax.lines[0]
l2 = ax.lines[1]
x1 = l1.get_xydata()[:,0]
y1 = l1.get_xydata()[:,1]
x2 = l2.get_xydata()[:,0]
y2 = l2.get_xydata()[:,1]
ax.fill_between(x2,y2, color="#2c7da0", alpha=0.3)
ax.fill_between(x1,y1, color="#f72585", alpha=0.3)
ax.set(ylabel="Pace [min/Km]",xlabel='Time(minutes)')
plt.title('Pace During the Run (Runner ID: 1990)',loc='left',fontsize=15)
plt.legend(["8 Hours", "4 Hours"])

In [None]:
fig, axs = plt.subplots(figsize=(15,5))
ax = sns.lineplot(data=subject1990_sub,x="record.timestamp[s]",y='record.altitude[m]',hue='Date',
             palette=['#f72585','#2c7da0'])

l1 = ax.lines[0]
l2 = ax.lines[1]
x1 = l1.get_xydata()[:,0]
y1 = l1.get_xydata()[:,1]
x2 = l2.get_xydata()[:,0]
y2 = l2.get_xydata()[:,1]
ax.fill_between(x2,y2, color="#2c7da0", alpha=0.3)
ax.fill_between(x1,y1, color="#f72585", alpha=0.3)
ax.set(ylabel="Pace[min/Km]",xlabel='Time(seconds)')
plt.title('Pace During the Run (Subject ID: 514)',loc='left',fontsize=20)
plt.legend(["8 Hours", "4 Hours"])

In [None]:
subject514.groupby('Date').max() # 616 609 low 526 804

In [None]:
subject514_sub = subject514[(subject514['Date']==609) | (subject514['Date']==804)]

In [None]:
subject514_sub['minute'] = subject514_sub['record.timestamp[s]']/60

In [None]:
fig, axs = plt.subplots(figsize=(10,5))
ax = sns.lineplot(data=subject514_sub,x="minute",y='record.heart_rate[bpm]',hue='Date',
             palette=['#9b5de5','#ff6d00'])

l1 = ax.lines[0]
l2 = ax.lines[1]
x1 = l1.get_xydata()[:,0]
y1 = l1.get_xydata()[:,1]
x2 = l2.get_xydata()[:,0]
y2 = l2.get_xydata()[:,1]
ax.fill_between(x2,y2, color="#ff6d00", alpha=0.3)
ax.fill_between(x1,y1, color="#9b5de5", alpha=0.3)
ax.set(ylabel="Heart Rate (bpm)",xlabel='Time(minutes)')
plt.title('Heart Rate Changes During the Run (Subject ID: 514)',loc='left',fontsize=20)
plt.ylim(40,180)
handles, labels = plt.gca().get_legend_handles_labels()
order = [1,0]
plt.legend([handles[idx] for idx in order],[labels[idx] for idx in order])

In [None]:
fig, axs = plt.subplots(figsize=(10,5))
ax = sns.lineplot(data=subject514_sub,x="minute",y='pace[min/Km]',hue='Date',
             palette=['#2c7da0','#f72585'])

l1 = ax.lines[0]
l2 = ax.lines[1]
x1 = l1.get_xydata()[:,0]
y1 = l1.get_xydata()[:,1]
x2 = l2.get_xydata()[:,0]
y2 = l2.get_xydata()[:,1]
ax.fill_between(x2,y2, color="#f72585", alpha=0.3)
ax.fill_between(x1,y1, color="#2c7da0", alpha=0.3)
ax.set(ylabel="Pace [min/Km]",xlabel='Time(minutes)')
# plt.title('Pace During the Run (Subject ID: 1990)',loc='left',fontsize=20)
# plt.legend(["4 Hours", "8 Hours"])
handles, labels = plt.gca().get_legend_handles_labels()
order = [1,0]
plt.legend([handles[idx] for idx in order],[labels[idx] for idx in order])

In [None]:
subject2065.groupby('Date').max().head(60)

In [None]:
subject2065_sub = subject2065[(subject2065['Date']==602) | (subject2065['Date']==616) |(subject2065['Date']==611) |
                              (subject2065['Date']==705) |(subject2065['Date']==922) |(subject2065['Date']==929) |
                              (subject2065['Date']==824) |(subject2065['Date']==1015) |(subject2065['Date']==1017)]

In [None]:
subject2065_sub.groupby('Date').max()

In [None]:
subject2065_low_sleep = subject2065[subject2065['numerical_final_sleep']<=6]
subject2065_low_sleep=subject2065_low_sleep.groupby('Date').max()
subject2065_low_sleep

In [None]:
subject2065_low_sleep_long_run = subject2065_low_sleep[(subject2065_low_sleep['record.distance[m]']>=8000) &
                                                       (subject2065_low_sleep['record.distance[m]']<=15000)]
subject2065_low_sleep_long_run # 2.8

In [None]:
subject2065_filter = subject2065[(subject2065['Date']==510) | (subject2065['Date']==526) |(subject2065['Date']==609) |
                              (subject2065['Date']==728) |(subject2065['Date']==1020) |(subject2065['Date']==1030)]
subject2065_filter.isna().sum()

In [None]:
subject2065_filter['minute']= subject2065_filter['record.timestamp[s]']/60

In [None]:
test = subject2065_filter.groupby('record.timestamp[s]').mean()

In [None]:
test['rolling'] = test['record.heart_rate[bpm]'].rolling(7).sum()

In [None]:
test['sleep_cat'] = 'Low'

In [None]:
fig, axs = plt.subplots(figsize=(10,5))
ax = sns.lineplot(data=test,x="minute", y='record.heart_rate[bpm]',color='#f72585')
# l1 = ax.lines[0]
# l2 = ax.lines[1]
# x1 = l1.get_xydata()[:,0]
# y1 = l1.get_xydata()[:,1]
# x2 = l2.get_xydata()[:,0]
# y2 = l2.get_xydata()[:,1]
# ax.fill_between(x2,y2, color="#9b5de5", alpha=0.3)
# ax.fill_between(x1,y1, color="#ff6d00", alpha=0.3)
ax.set(ylabel="Heart Rate (bpm)",xlabel='Time(Minute)')
plt.title('Average Heart Rate Changes over 6 Runs (Subject ID: 2065)',loc='left',fontsize=20)
# plt.ylim(70,200)
#
# plt.legend(["8 Hours", "4 Hours"])


In [None]:
subject2065_norm_sleep = subject2065[(subject2065['numerical_final_sleep']>=7)&
                                     (subject2065['numerical_final_sleep']<=8)]
subject2065_norm_sleep= subject2065_norm_sleep.groupby('Date').max()

In [None]:
subject2065_norm_sleep_long_run = subject2065_norm_sleep[(subject2065_norm_sleep['record.distance[m]']>=8000) &
                                                       (subject2065_norm_sleep['record.distance[m]']<=15000)]

subject2065_norm_sleep_long_run.groupby('Date').max() # 12 runs 3.333 rpe

In [None]:
subject2065_filter_norm = subject2065[(subject2065['Date']==503) | (subject2065['Date']==519)|(subject2065['Date']==521)
                                 | (subject2065['Date']==524) |(subject2065['Date']==607) |(subject2065['Date']==614) |
                                 (subject2065['Date']==705) |(subject2065['Date']==718) |(subject2065['Date']==815) |
                                 (subject2065['Date']==822) |(subject2065['Date']==901) |(subject2065['Date']==926) ]
subject2065_filter_norm.isna().sum()

In [None]:
subject2065_filter_norm['minute']= subject2065_filter_norm['record.timestamp[s]']/60

In [None]:
test2 = subject2065_filter_norm.groupby('record.timestamp[s]').mean()

In [None]:
test2['sleep_cat'] = 'Normal'

In [None]:
test2

In [None]:
fig, axs = plt.subplots(figsize=(10,5))
ax = sns.lineplot(data=test2,x="minute", y='record.heart_rate[bpm]',color='#f72585')
# l1 = ax.lines[0]
# l2 = ax.lines[1]
# x1 = l1.get_xydata()[:,0]
# y1 = l1.get_xydata()[:,1]
# x2 = l2.get_xydata()[:,0]
# y2 = l2.get_xydata()[:,1]
# ax.fill_between(x2,y2, color="#9b5de5", alpha=0.3)
# ax.fill_between(x1,y1, color="#ff6d00", alpha=0.3)
ax.set(ylabel="Heart Rate (bpm)",xlabel='Time(Minute)')
plt.title('Average Heart Rate Changes over 12 Runs (Subject ID: 2065)',loc='left',fontsize=20)
# plt.ylim(70,200)
#
# plt.legend(["8 Hours", "4 Hours"])

In [None]:
subject2065_high_sleep = subject2065[subject2065['numerical_final_sleep']>8]
subject2065_high_sleep= subject2065_high_sleep.groupby('Date').max()

In [None]:
subject2065_high_sleep_long_run = subject2065_high_sleep[subject2065_high_sleep['record.distance[m]']>=9000]
subject2065_high_sleep_long_run.groupby('Date').max() # 6 runs RPE =3

In [None]:
subject2065_filter_high = subject2065[(subject2065['Date']==711) | (subject2065['Date']==725)|
                                      (subject2065['Date']==801) | (subject2065['Date']==912) |
                                      (subject2065['Date']==1017)|(subject2065['Date']==1024)]
subject2065_filter_high.isna().sum()

In [None]:
subject2065_filter_high['minute']= subject2065_filter_high['record.timestamp[s]']/60

In [None]:
test3 = subject2065_filter_high.groupby('record.timestamp[s]').mean()

In [None]:
test3['sleep_cat'] = 'High'

In [None]:
fig, axs = plt.subplots(figsize=(10,5))
ax = sns.lineplot(data=test3,x="minute", y='record.heart_rate[bpm]',color='#f72585')
# l1 = ax.lines[0]
# l2 = ax.lines[1]
# x1 = l1.get_xydata()[:,0]
# y1 = l1.get_xydata()[:,1]
# x2 = l2.get_xydata()[:,0]
# y2 = l2.get_xydata()[:,1]
# ax.fill_between(x2,y2, color="#9b5de5", alpha=0.3)
# ax.fill_between(x1,y1, color="#ff6d00", alpha=0.3)
ax.set(ylabel="Heart Rate (bpm)",xlabel='Time(Minute)')
plt.title('Average Heart Rate Changes over 6 Runs (Subject ID: 2065)',loc='left',fontsize=20)
# plt.ylim(70,200)
#
# plt.legend(["8 Hours", "4 Hours"])

In [None]:
fig, axs = plt.subplots(figsize=(10,5))
ax = sns.lineplot(data=test,x="minute", y='record.heart_rate[bpm]',color='#2c7da0',label="<6")
ax1 = sns.lineplot(data=test2,x="minute", y='record.heart_rate[bpm]',color='#f72585',label='7-8')
ax2 = sns.lineplot(data=test3,x="minute", y='record.heart_rate[bpm]',color='#ff6200',label='>8')
l1 = ax.lines[0]
l2 = ax.lines[1]
l3 = ax.lines[2]
x1 = l1.get_xydata()[:,0]
y1 = l1.get_xydata()[:,1]
x2 = l2.get_xydata()[:,0]
y2 = l2.get_xydata()[:,1]
x3 = l3.get_xydata()[:,0]
y3 = l3.get_xydata()[:,1]

ax2.fill_between(x3,y3, color="#ff6200", alpha=0.3)
ax1.fill_between(x2,y2, color="#f72585", alpha=0.3)
ax.fill_between(x1,y1, color="#2c7da0", alpha=0.3)


plt.ylim(60,150)
ax.set(ylabel="Average Heart Rate (bpm)",xlabel='Time(Minute)')
plt.title('Average Heart Rate Changes During a Run (Runner ID: 2065)',loc='left',fontsize=15)
plt.legend(title="Hours of Sleep")

In [None]:
subject2065_filter['pace[min/Km]'] = subject2065_filter['pace[min/Km]'].fillna(0)
subject2065_filter_norm['pace[min/Km]'] =  subject2065_filter_norm['pace[min/Km]'].fillna(0)

In [None]:
fig, axs = plt.subplots(figsize=(10,5))
ax = sns.lineplot(data=test,x="minute", y='pace[min/Km]',color='#2c7da0',label="<6")
ax1 = sns.lineplot(data=test2,x="minute", y='pace[min/Km]',color='#f72585',label='7-8')
ax2 = sns.lineplot(data=test3,x="minute", y='pace[min/Km]',color='#ff6200',label='>8')
l1 = ax.lines[0]
l2 = ax.lines[1]
l3 = ax.lines[2]
x1 = l1.get_xydata()[:,0]
y1 = l1.get_xydata()[:,1]
x2 = l2.get_xydata()[:,0]
y2 = l2.get_xydata()[:,1]
x3 = l3.get_xydata()[:,0]
y3 = l3.get_xydata()[:,1]

ax.fill_between(x1,y1, color="#2c7da0", alpha=0.3)
ax1.fill_between(x2,y2, color="#f72585", alpha=0.3)
ax2.fill_between(x3,y3, color="#ff6200", alpha=0.3)

ax.set(ylabel="Average Pace (min/Km)",xlabel='Time(Minute)')
plt.title('Average Changes in Pace During a Run (Runner ID: 2065)',loc='left',fontsize=15)
plt.legend(title="Hours of Sleep")

In [None]:
fig, axs = plt.subplots(figsize=(10,5))
ax = sns.scatterplot(data=test,x="minute", y='record.heart_rate[bpm]',color='#2c7da0',label="<6",alpha=0.5)
ax1 = sns.scatterplot(data=test2,x="minute", y='record.heart_rate[bpm]',color='#f72585',label='7-8',alpha=0.5)
ax2 = sns.scatterplot(data=test3,x="minute", y='record.heart_rate[bpm]',color='#ff6200',label='>8',alpha=0.5)