# Description

Import, transform, and analysis (mean, mode, SD) of Google fit data.

# Import Python Libraries

## Visualization / Heatmap Libraries

In [2]:
#%matplotlib notebook
#%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

## Dataframe and Math Function Libraries

In [3]:
from __future__ import print_function
import numpy as np
import pandas as pd
#!pip install --upgrade pip
#!pip install xlrd

# Load Data And Explore

## Google Download (eventually API)

This code uses a single file fromGoogle's Fit downlad, a summary file that is aggegated from the other files. Download yor Google Fit data from the following link, https://takeout.google.com/, and then reference the summary file in the Import section.

## Import

In [4]:
import os
os_name = os.sys.platform

fileName = '2021-11-20 Daily Summaries.csv'
#Mac Path
if os_name == 'darwin':
    filePath = '/Users/igoeja/Documents/GitHub/DataAnalytics/Data/' + fileName
else:
    filePath = 'c:\\Users\\igoej\\OneDrive\\Documents\\GitHub\\DataAnalytics\\Data\\' + fileName

df_fit = pd.read_csv(filePath)
df_fit.tail()
df_fit.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3047 entries, 0 to 3046
Data columns (total 56 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   Date                                     3047 non-null   object 
 1   Move Minutes count                       1350 non-null   float64
 2   Average blood glucose (mmol/L)           581 non-null    float64
 3   Max blood glucose (mmol/L)               581 non-null    float64
 4   Min blood glucose (mmol/L)               581 non-null    float64
 5   Temporal relation to meal                0 non-null      float64
 6   Meal type                                0 non-null      float64
 7   Temporal relation to sleep               0 non-null      float64
 8   Blood glucose specimen source            581 non-null    object 
 9   Average systolic blood pressure (mmHg)   149 non-null    float64
 10  Max systolic blood pressure (mmHg)       149 non

# Shared Functions

## Basic

In [5]:
def MedianFunction(df, roundValue):
    import statistics as stat
    cleaned = df.dropna()
    return round(stat.median(cleaned), roundValue)

In [6]:
def AverageFunction(df, roundValue):
    cleaned = df.dropna()
    return round(sum(cleaned)/len(cleaned), roundValue)

# Data Clean and Prep

In [7]:
df_fit.dropna(axis=1, how='all', inplace=True)

# Google Specific Fixes

In [8]:
df_fit.set_index('Date')

Unnamed: 0_level_0,Move Minutes count,Average blood glucose (mmol/L),Max blood glucose (mmol/L),Min blood glucose (mmol/L),Blood glucose specimen source,Average systolic blood pressure (mmHg),Max systolic blood pressure (mmHg),Min systolic blood pressure (mmHg),Average diastolic blood pressure (mmHg),Max diastolic blood pressure (mmHg),...,Cross-country skiing duration (ms),Sleep duration (ms),Stair climbing machine duration (ms),Strength training duration (ms),Ergometer duration (ms),Other duration (ms),Light sleeping duration (ms),Deep sleeping duration (ms),Awake mid-sleeping duration (ms),Guided breathing duration (ms)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-09-18,,,,,,,,,,,...,,,,,,,,,,
2012-09-19,,,,,,,,,,,...,,,,,,,,,,
2012-09-20,,,,,,,,,,,...,,,,,,,,,,
2012-09-21,,,,,,,,,,,...,,,,,,,,,,
2012-09-22,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-16,28.0,,,,,,,,,,...,,,,,,,,,,
2021-11-17,86.0,,,,,,,,,,...,,,,,,,,,,
2021-11-18,117.0,,,,,,,,,,...,,,,,,,,,,
2021-11-19,64.0,,,,,,,,,,...,,,,,,,,,,


## Set Aggregate Values

In [9]:
df_fit['SleepSum'] = df_fit['Deep sleeping duration (ms)'] + df_fit['Light sleeping duration (ms)'] + df_fit['Sleep duration (ms)']
df_fit['sleepSumHours'] = df_fit['SleepSum'] / 3600000

In [10]:
df_fit['basketballMinutes'] = df_fit['Basketball duration (ms)'] / 60000
df_fit['rowingMachineMinutes'] = df_fit['Rowing machine duration (ms)']/ 60000

## Lag/Shift Values

In [11]:
df_fit['rowingMachineMinutesShifted'] = df_fit['rowingMachineMinutes'].shift()
df_fit['moveMinutesCountShifted'] = df_fit['Move Minutes count'].shift()
df_fit['heartPointsShifted'] = df_fit['Heart Points'].shift()
df_fit['distanceShifted'] = df_fit['Distance (m)'].shift()
df_fit['basketballMinutesShifted'] = df_fit['basketballMinutes'].shift()


In [12]:
#df_fit.columns

## Filter Data Set

In [13]:
#set data set for analysis
dfi = df_fit[df_fit['Date'] >= '2021-01-01']

df = dfi[['Date', 'Average heart rate (bpm)','Average weight (kg)','Average systolic blood pressure (mmHg)','Average diastolic blood pressure (mmHg)','Average blood glucose (mmol/L)','Heart Points','SleepSum', 'sleepSumHours','basketballMinutes', 'basketballMinutesShifted', 'rowingMachineMinutes','rowingMachineMinutesShifted', 'moveMinutesCountShifted','heartPointsShifted','Distance (m)', 'distanceShifted', 'Deep sleeping duration (ms)', 'Light sleeping duration (ms)', 'Max heart rate (bpm)','Min heart rate (bpm)']]

# Data Review

In [14]:
df.describe()

Unnamed: 0,Average heart rate (bpm),Average weight (kg),Average systolic blood pressure (mmHg),Average diastolic blood pressure (mmHg),Average blood glucose (mmol/L),Heart Points,SleepSum,sleepSumHours,basketballMinutes,basketballMinutesShifted,rowingMachineMinutes,rowingMachineMinutesShifted,moveMinutesCountShifted,heartPointsShifted,Distance (m),distanceShifted,Deep sleeping duration (ms),Light sleeping duration (ms),Max heart rate (bpm),Min heart rate (bpm)
count,296.0,87.0,54.0,54.0,249.0,267.0,62.0,62.0,3.0,3.0,88.0,88.0,324.0,267.0,324.0,324.0,121.0,121.0,296.0,296.0
mean,79.022016,107.036178,126.943885,80.18993,8.005302,59.797753,23412580.0,6.503495,22.194956,22.194956,34.994261,34.994261,82.240741,59.797753,3793.591678,3803.277431,7207934.0,11189260.0,136.826068,55.714487
std,12.253411,0.730802,8.212215,4.861625,1.059735,36.875174,4172271.0,1.158964,14.097672,14.097672,14.065926,14.065926,45.761504,36.875174,2777.477245,2770.22462,3360473.0,4895268.0,21.823367,9.734844
min,56.0,104.741997,103.0,68.0,3.130262,1.0,10920000.0,3.033333,8.1248,8.1248,0.123517,0.123517,2.0,1.0,15.58225,15.58225,300000.0,1140000.0,56.0,40.0
25%,72.459851,106.467999,123.125,78.0,7.336126,36.0,21390000.0,5.941667,15.132442,15.132442,24.852313,24.852313,49.75,36.0,1703.618018,1706.952044,4680000.0,8280000.0,125.147823,50.0
50%,75.165212,107.001999,128.0,80.0,8.032692,58.0,24120000.0,6.7,22.140083,22.140083,34.351033,34.351033,80.0,58.0,2824.264244,2834.207464,7320000.0,11640000.0,139.063301,53.0
75%,78.81457,107.5145,131.0,83.0,8.686735,80.5,25290000.0,7.025,29.230033,29.230033,42.258396,42.258396,106.25,80.5,5139.880964,5139.880964,9780000.0,14040000.0,153.0,56.141647
max,133.280962,108.709,147.0,91.0,11.059582,207.0,35520000.0,9.866667,36.319983,36.319983,74.8671,74.8671,224.0,207.0,12522.099504,12522.099504,14400000.0,36420000.0,190.0,98.0


# Measures

## Blood Pressure


In [15]:
dia = df['Average diastolic blood pressure (mmHg)'].dropna()
sys = df['Average systolic blood pressure (mmHg)'].dropna()
print('BP (avg):', AverageFunction(sys,1), '/', AverageFunction(dia,1))
print('BP (median):',MedianFunction(sys,1), '/', MedianFunction(dia,1))
print('BP (std):', round(sys.std(),1), '/', round(dia.std(),1))

BP (avg): 126.9 / 80.2
BP (median): 128.0 / 80.0
BP (std): 8.2 / 4.9


## Glucose

In [16]:
bg = df['Average blood glucose (mmol/L)']
bg2 = df['Average blood glucose (mmol/L)'] * 18
print('BG (mmol/L):', AverageFunction(bg, 1))
print('BG (mmoL/L, std):', round(bg.std(),5))
print('BG (mmg/dl):', round(AverageFunction(bg2, 1)))
print('BG (mmg/dl, std):', round(bg2.std(),5))

BG (mmol/L): 8.0
BG (mmoL/L, std): 1.05974
BG (mmg/dl): 144
BG (mmg/dl, std): 19.07523


## Weight

In [17]:
w = df['Average weight (kg)']
print('Weight (kg):', AverageFunction(w, 1))
print('Weight (lbs):', round(AverageFunction(w, 1) * 2.2,1))

Weight (kg): 107.0
Weight (lbs): 235.4


## Sleep

In [18]:
print('Sleep:', AverageFunction(df['sleepSumHours'],2))
print('Sleep (Median):', MedianFunction(df['sleepSumHours'], 2))

Sleep: 6.5
Sleep (Median): 6.7


## Activity

In [19]:
print('Distance (m):', AverageFunction(df['Distance (m)'],2))
print('Distance (m) (Median):', MedianFunction(df['Distance (m)'], 2))
print()
print('Heart Points:', AverageFunction(df['Heart Points'],2))
print('Heart Points (Median):', MedianFunction(df['Heart Points'], 2))
print()
print('Move Minutes:', AverageFunction(df['moveMinutesCountShifted'],2))
print('Move Minutes (Median):', MedianFunction(df['moveMinutesCountShifted'], 2))


Distance (m): 3793.59
Distance (m) (Median): 2824.26

Heart Points: 59.8
Heart Points (Median): 58.0

Move Minutes: 82.24
Move Minutes (Median): 80.0


## Heart Rate

In [20]:
print('Average heart rate (bpm):', AverageFunction(df['Average heart rate (bpm)'],2))
print('Max heart rate (bpm):', AverageFunction(df['Max heart rate (bpm)'], 2))
print('Min heart rate (bpm):', AverageFunction(df['Min heart rate (bpm)'], 2))

Average heart rate (bpm): 79.02
Max heart rate (bpm): 136.83
Min heart rate (bpm): 55.71
