In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates
from format_the_data import format_mi_band_data
from feature_engineering import get_wake_up_info, get_heartrate_data_for_interval, calculate_test_statistics_heartrate
from load_the_data import process_fitness_data
import json
from pprint import pprint
pd.options.mode.chained_assignment = None

In [2]:
### set os dir to data, same director but in folder data
os.chdir('data')
person1 = pd.read_csv('20231030_8210796956_MiFitness_hlth_center_fitness_data.csv')
person2 = pd.read_csv('20231030_8211531339_MiFitness_hlth_center_fitness_data.csv')
person3 = pd.read_csv('20231031_8210564343_MiFitness_hlth_center_fitness_data.csv')
person4 = pd.read_csv('20231110_8210586841_MiFitness_hlth_center_fitness_data.csv')

person1['Person ID'] = 1
person2['Person ID'] = 2
person3['Person ID'] = 3
person4['Person ID'] = 4

master_frame = pd.concat([person1, person2, person3, person4], ignore_index=True)

behaviour_tracking_data = pd.read_csv('Behavioural data app.csv')

In [3]:
aggregated_df = behaviour_tracking_data.copy()

In [4]:
unique_keys = ['pai',
               'valid_stand', 
               'calories',
               'steps',
               'heart_rate',
               'intensity',
               'dynamic',
               'single_heart_rate',
               'single_spo2',
               'training_load',
               'single_stress',
               'stress',
               'watch_night_sleep',
               'resting_heart_rate',
               'watch_daytime_sleep',
               'weight']
key_dataframes = format_mi_band_data(unique_keys, master_frame)


# separate dataframes:
pai_df = key_dataframes['pai']
valid_stand_df = key_dataframes['valid_stand']
calories_df = key_dataframes['calories']
steps_df = key_dataframes['steps']
heart_rate_df = key_dataframes['heart_rate']
intensity_df = key_dataframes['intensity']
dynamic_df = key_dataframes['dynamic']
single_heart_rate_df = key_dataframes['single_heart_rate']
single_spo2_df = key_dataframes['single_spo2']
training_load_df = key_dataframes['training_load']
single_stress_df = key_dataframes['single_stress']
stress_df = key_dataframes['stress']
watch_night_sleep_df = key_dataframes['watch_night_sleep']
resting_heart_rate_df = key_dataframes['resting_heart_rate']
watch_daytime_sleep_df = key_dataframes['watch_daytime_sleep']
weight_df = key_dataframes['weight']

# Intuition

I think we need to include control variables such as the number of hours sleep and the quality of sleep as this significantly influences your mental state after awaking.

I guess we can check whether someone has slept after their alarm right?

We must include variable for smart alarm or not. Can we find this in the settings from the mi band?

We are going to add features from our watch data to the behavioural data.

Features for our analysis:
- Average of the first 20 minutes after waking
- Whether the smart alarm actually woke us up in light sleep (not in deep or REM)

Hence we need:
- Waking time
- Heartrate data
- State when awaking

In [5]:
aggregated_df = get_wake_up_info(watch_night_sleep_df, aggregated_df)
aggregated_df

Unnamed: 0,Person ID,Drinks,Fastfood,Sports,Food 23 before sleep,Medication,Date created,Woke up by (smart) alarm,Woke up by external factors,Yesterday,Slept again after alarm,Smart alarm,Date,time_of_awakening,state_before_awakening
0,1,False,True,True,False,False,2023-10-01T08:41:41.000Z,True,False,False,False,False,2023-10-01,05:14:00,3.0
1,1,False,True,False,False,False,2023-10-02T07:00:32.000Z,False,False,False,False,False,2023-10-02,05:04:00,3.0
2,1,False,False,False,False,False,2023-10-03T05:31:28.000Z,True,False,False,False,False,2023-10-03,05:32:00,3.0
3,1,False,False,False,False,False,2023-10-04T03:12:57.000Z,True,False,False,False,False,2023-10-04,03:12:00,3.0
4,1,False,False,False,False,False,2023-10-05T07:00:29.000Z,True,False,False,False,False,2023-10-05,06:00:00,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,2,False,False,True,False,False,2023-10-24T08:35:00.000Z,True,False,False,False,False,2023-10-24,04:56:00,3.0
115,2,True,False,True,True,False,2023-10-25T07:12:00.000Z,True,False,False,False,False,2023-10-25,04:34:00,3.0
116,2,False,True,True,False,False,2023-10-26T05:30:00.000Z,True,False,False,True,False,2023-10-26,04:37:00,3.0
117,2,False,False,False,False,False,2023-10-27T08:03:00.000Z,True,False,False,False,False,2023-10-27,05:00:00,3.0


In [6]:
print(len(aggregated_df))
# filter out the rows where the time_of_awakening is NaN
aggregated_df = aggregated_df[~aggregated_df['time_of_awakening'].isna()].reset_index(drop=True)

119


In [7]:
time_interval = 60  # Assuming a 60 minute interval

# Add new columns for the statistics in behaviour_tracking_data
aggregated_df['Number of Measurements'] = pd.NA
aggregated_df['Average Heart Rate'] = pd.NA
aggregated_df['Average Lowest Three obs'] = pd.NA
aggregated_df['Average First Ten min'] = pd.NA
aggregated_df['Average First Thirty min'] = pd.NA

for index, row in aggregated_df.iterrows():
    # Get heart rate data for the interval
    heart_rate_data = get_heartrate_data_for_interval(heart_rate_df, row['Person ID'], row['Date'], row['time_of_awakening'], time_interval)
    
    # Calculate the test statistics
    num_measurements, avg_hr, avg_lowest_three, avg_first_ten, avg_first_thirty = calculate_test_statistics_heartrate(heart_rate_data)

    # Update the aggregated_df DataFrame with the new statistics
    aggregated_df.at[index, 'Number of Measurements'] = num_measurements
    aggregated_df.at[index, 'Average Heart Rate'] = avg_hr
    aggregated_df.at[index, 'Average Lowest Three obs'] = avg_lowest_three
    aggregated_df.at[index, 'Average First Ten min'] = avg_first_ten
    aggregated_df.at[index, 'Average First Thirty min'] = avg_first_thirty

In [8]:
aggregated_df.head()

Unnamed: 0,Person ID,Drinks,Fastfood,Sports,Food 23 before sleep,Medication,Date created,Woke up by (smart) alarm,Woke up by external factors,Yesterday,Slept again after alarm,Smart alarm,Date,time_of_awakening,state_before_awakening,Number of Measurements,Average Heart Rate,Average Lowest Three obs,Average First Ten min,Average First Thirty min
0,1,False,True,True,False,False,2023-10-01T08:41:41.000Z,True,False,False,False,False,2023-10-01,05:14:00,3.0,13,77.307692,63.333333,71.625,76.9
1,1,False,True,False,False,False,2023-10-02T07:00:32.000Z,False,False,False,False,False,2023-10-02,05:04:00,3.0,17,72.705882,67.333333,73.8,72.357143
2,1,False,False,False,False,False,2023-10-03T05:31:28.000Z,True,False,False,False,False,2023-10-03,05:32:00,3.0,15,77.4,64.0,70.0,71.0
3,1,False,False,False,False,False,2023-10-04T03:12:57.000Z,True,False,False,False,False,2023-10-04,03:12:00,3.0,17,73.882353,60.666667,71.2,69.928571
4,1,False,False,False,False,False,2023-10-05T07:00:29.000Z,True,False,False,False,False,2023-10-05,06:00:00,3.0,15,82.8,76.0,84.333333,84.090909


In [9]:
# replace all False values with 0 and True values with 1 in aggregated_df
aggregated_df = aggregated_df.replace(False, 0)
aggregated_df = aggregated_df.replace(True, 1)


In [10]:
# Save the aggregated_df DataFrame to a csv file
aggregated_df.to_csv('aggregated_df.csv', index=False)

# Until here

In [11]:
aggregated_df['number_of_measurements'] = split_results_df['number_of_measurements']
aggregated_df['average_heart_rate'] = split_results_df['average_heart_rate']
aggregated_df['average_lowest_three'] = split_results_df['average_lowest_three']
behaviour_tracking_data

NameError: name 'split_results_df' is not defined

In [None]:
# CHECK FOR OUMAIMAS DATA AFTER 2023-10-01
temp = pd.DataFrame()
temp['DateTime'] = pd.to_datetime(watch_night_sleep_df['Time'], unit='s')
watch_night_sleep_df['obs_date'] = temp['DateTime'].dt.date
watch_night_sleep_df['obs_time'] = temp['DateTime'].dt.time

date = pd.to_datetime('2023-10-01').date()
filtered_df = watch_night_sleep_df[(watch_night_sleep_df['Person'] == 'oumaima') & (watch_night_sleep_df['obs_date'] >= date)]
len(filtered_df)

In [None]:
behaviour_tracking_data[behaviour_tracking_data['Name'] == 'tom'].head()


In [None]:
# SAVE total_df to csv
behaviour_tracking_data.to_csv('PREPROCESSED_DATA.csv', index=False)

In [None]:
# plot the average heartrate for each person over time
total_df = behaviour_tracking_data
plt.figure(figsize=(20,10))
plt.plot(total_df[total_df['Name'] == 'andy']['Date'], total_df[total_df['Name'] == 'andy']['average_heart_rate'], label='andy')
plt.plot(total_df[total_df['Name'] == 'tom']['Date'], total_df[total_df['Name'] == 'tom']['average_heart_rate'], label='tom')
plt.plot(total_df[total_df['Name'] == 'huub']['Date'], total_df[total_df['Name'] == 'huub']['average_heart_rate'], label='huub')
plt.plot(total_df[total_df['Name'] == 'oumaima']['Date'], total_df[total_df['Name'] == 'oumaima']['average_heart_rate'], label='oumaima')
plt.legend()
plt.show()


In [None]:
# CHECK IF ALL DATA IS MATCHED WITH THE FORM DATA
print('behaviour_tracking_data has {} rows'.format(len(behaviour_tracking_data)))
print('')

# print the number of observations for each name in behaviour_tracking_data
print(behaviour_tracking_data['Name'].value_counts())
print('')
print(behaviour_tracking_data['Smart alarm'].value_counts())
print('')
# split the dataframe for smart alarm true and false
smart_alarm_true = behaviour_tracking_data[behaviour_tracking_data['Smart alarm'] == True]
smart_alarm_false = behaviour_tracking_data[behaviour_tracking_data['Smart alarm'] == False]
print('')
# print state of awakening for smart alarm true and false
print(smart_alarm_true['state_before_awakening'].value_counts())
print('')
print(smart_alarm_false['state_before_awakening'].value_counts())

# display the rows in behaviour_tracking_data where state_before_awakening is NaN
behaviour_tracking_data[behaviour_tracking_data['state_before_awakening'].isna()]

## Conclusie

Als we smartwake gebruiken worden we wakker in light sleep en als we een normaal alarm gebruiken ook.
Dit kan een gevolg zijn van het geval dat het smart alarm van ons horloge niet beter werkt dan normaal.
Systematic failure. -> naja we worden juist wel wakker in light sleep, dus t werkt wel.

Mochten we geen significante resultaten vinden, zou dit een oorzaak kunnen zijn.

Now we need to get save the wake time into the behavioural dataframe