# Desafios posteriores:

- OK - Agrupar os valores de bpm de 5 em 5 minutos
- OK - Selecionar os bpms que fazem parte do sleep time
- OK - Combinar os dados awake no sleep time
- OK - Preencher os gaps
- Remover os dias em que há uma soneca durante o dia
- OK - Otimizar a função que preenche os gaps
- Agrupar dados da Letônia e do Brasil
- Entender como passar dados nulos para o modelo
- Separar dia-a-dia
- Fazer um grande subplot de cada dia

# Imports

In [49]:
import requests
import datetime
import collections

import pandas as pd
import numpy as np
from tqdm import tqdm

import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

pio.renderers.default = 'iframe'

# Requests

In [50]:
def data_request(start, end):   
    """
    Description
    -----------
    This function gets requests from the sleep and heart routes, in a specific date range

    Parameters
    ----------
    start : <string>
        Beginning of the interval. The day is in the format YYYY-MM-DD

    end : <string>
        End of the interval. The day is in the format YYYY-MM-DD

    Returns
    -------
    sleep_response : <class 'requests.models.Response'>
        Response from the sleep route

    heart_response : <class 'requests.models.Response'>
        Response from the heart route
    """
    
    # Personal Access Token used to access the user data via the Oura Cloud API
    headers = {'Authorization': 'Bearer Q3E2ETZRM4AKZULORX6LJNQOKSIOWOYG'}
    
    # Parent route
    url = f'https://api.ouraring.com/v2/usercollection/'
    
    # Define the parameters of each request
    sleep_params = {'start_date': start, 
                    'end_date': end}    
    heart_params={ 'start_datetime': f'{start}T00:00:01+03:00', 
                   'end_datetime': f'{end}T23:59:59+03:00'}
    
    # Request Sleep data and Heart data
    sleep_response = requests.request('GET', url+'sleep', headers = headers, params = sleep_params)
    heart_response = requests.request('GET', url+'heartrate', headers = headers, params = heart_params)
    
    return sleep_response, heart_response

In [51]:
START = '2023-07-25'
END = '2023-08-23'

sleep_response, heart_response = data_request(START, END)

# Heart Preprocessing

In [52]:
def heart_route_preprocessing(response):
    """
    Description
    -----------
    This function extracts the bpm and time from the json 

    Parameters
    ----------
    response : <class 'requests.models.Response'>
        Response from the heart route

    Returns
    -------
    heart_data : <class 'pandas.core.frame.DataFrame'>
        DataFrame with integer index and ['time', 'bpm'] columns 
    """
    
    # List that will be filled with the response data
    time, bpm = [], []

    # Extract the information from the response
    for data in response.json()['data']:
        
        bpm.append(data['bpm'])
        time.append(data['timestamp'])

    # Create a Empty DataFrame
    heart_data = pd.DataFrame(columns = ["time", "bpm"])
    
    # Store the response data in the DataFrame
    heart_data["time"], heart_data["bpm"] = time, bpm

    return heart_data

In [82]:
heart_data = heart_route_preprocessing(heart_response)

In [83]:
def time_preprocessing(time):
    """
    Description
    -----------
    This function changes the format of a date and adjusts the time according to the Latvian or Brazilian timezone. OBS: These timezones were chosen according to the countries in which Igor lived.

    Parameters
    ----------
    time : <str>
        Time in the format YYYY-MM-DDThh:mm:ss+00:00 (Example: 2023-07-24T21:04:37+00:00)

    Returns
    -------
    new_time : <class 'datetime.datetime'>
        Time in the format YYYY-MM-DD hh:mm:ss±03:00 (Example: 2023-07-25 00:04:37+03:00)
    """

    # Set the Latvian and Brazilian timezones (UTC +3 and UTC -3, respectively) 
    LV_TIMEZONE = datetime.timezone(offset = datetime.timedelta(hours=3))
    BR_TIMEZONE = datetime.timezone(offset = datetime.timedelta(hours=-3))

    # Covert string to Datetime
    new_time = datetime.datetime.strptime(time, "%Y-%m-%dT%H:%M:%S%z")
    
    # Choose the correct timezone, according with the date that Igor comeback to Brazil
    if (new_time <= datetime.datetime(2023, 8, 24, tzinfo=datetime.timezone.utc)):
        new_time = new_time.astimezone(LV_TIMEZONE)
        
    else:
        new_time = new_time.astimezone(BR_TIMEZONE)
    
    return new_time

In [84]:
heart_data['time'] = heart_data['time'].apply(time_preprocessing)

In [95]:
def groups_5min(data):
    """
    Description
    -----------
    Every 5 minutes, the Oura Ring measures the heart rate for 60 consecutive seconds. However, only the reliable measures are stored in the API. 
    Therefore, this function groups all the heart rates that were collected in the same 60s batch.

    Parameters
    ----------
    data : <class 'pandas.core.frame.DataFrame'>
        Heart data. DataFrame with integer index and ['time', 'bpm'] columns

    Returns
    -------
    new_data : <class 'pandas.core.frame.DataFrame'>
        DataFrame with integer index and ['time', 'bpm','state'] columns
    """
    
    # Set the time difference between the current row and the previous one
    mask = heart_data['time'].diff().dt.seconds

    # Every time that a time diff is greater than 60s, add +1 to the label 
    mask = mask.gt(60).cumsum()

    # Group the batches according to the mask, maintaining the time of the first measure, and the mean of the bpm    
    new_data = data.groupby(mask, as_index=True)[['time','bpm']].agg({'time':'first', 'bpm':'mean'}).round(1)
    
    # Set the awake state (will be important during the 4-stages classification)
    new_data['state'] = 'awake' 
    
    return new_data

In [98]:
heart_data = groups_5min(heart_data)

# Sleep Preprocessing

In [126]:
def heart_rate_extractor(day_data):
    """
    Description
    -----------
    This function extracts the heart rate from the sleep route and gives back the DataFrame in the same format as the heart route Dataframe. 
   
    Parameters
    ----------
    day_data : <dict>
        JSON with data of one-night sleep time

    Returns
    -------
    new_data : <class 'pandas.core.frame.DataFrame'>
        Heart data during the sleep time. DataFrame with integer index and ['time', 'bpm','state'] columns
    """
    
    # Create the columns 
    time = []
    bpm = day_data['heart_rate']['items']
    state = ['sleep']*len(bpm)
    
    # Extract the start and end of the sleep time
    start = datetime.datetime.strptime(day_data['bedtime_start'], "%Y-%m-%dT%H:%M:%S%z")
    end = datetime.datetime.strptime(day_data['bedtime_end'], "%Y-%m-%dT%H:%M:%S%z")
    
    # Create a spaced timelist within the sleep time interval
    aux = start
    while aux < end:
        time.append(aux)
        aux += datetime.timedelta(minutes=5)
        
    # Padding of the bpm length according to time length
    while len(bpm) != len(time):
        bpm.append(None)
        state.append('sleep')
        
    # Create a DataFrame with the sleep data of a unique day
    heart_data = pd.DataFrame(columns = ["time", "bpm", "state"])
    heart_data["time"], heart_data["bpm"], heart_data["state"] = time, bpm, state

    return heart_data

In [124]:
append_list = []

for day_data in sleep_response.json()['data']:
    
    if day_data['heart_rate'] != None:
        if len(day_data['heart_rate']['items']) > 40:

            append_list.append(heart_rate_extractor(day_data)) 
            
sleep_data = pd.concat(append_list)

In [128]:
max(sleep_data.index)

162

# Filling the gaps

In [11]:
def time_rounder(full_data):
    
    full_data['new_time'] = full_data['time'].round('5min')
    
    duplicate = [item for item, count in collections.Counter(full_data["new_time"]).items() if count > 1]
    
    for dupl in duplicate:
        
        idx = full_data[full_data['new_time'] == dupl].index
        
        floor_round = full_data.iloc[idx[0]]['time'].floor('5min')
        ceil_round = full_data.iloc[idx[1]]['time'].ceil('5min')

        if floor_round != full_data.iloc[idx[0]-1]['new_time']:
            full_data.at[idx[0],'new_time'] = floor_round

        elif ceil_round != full_data.iloc[idx[1]+1]['new_time']:
            full_data.at[idx[1],'new_time'] = ceil_round

        else:
            full_data = full_data.drop(axis=0,index=idx[0]).reset_index(drop=True)
    
    full_data = full_data.drop('time',axis=1)
    
    return full_data

In [12]:
def gap_filler(full_data, start,end):

    start_time = datetime.datetime.strptime(start + "T00:00:00+03:00", "%Y-%m-%dT%H:%M:%S%z")
    end_time = datetime.datetime.strptime(end + "T23:55:00+03:00", "%Y-%m-%dT%H:%M:%S%z")
    
    time_list = set()
    aux = start_time
    
    while aux <= end_time:
        time_list.add(aux)
        aux += datetime.timedelta(minutes=5)
    
    gaps = time_list - time_list.intersection(full_data['new_time'])
    new_rows = {'bpm':[None]*len(gaps), 'state':[None]*len(gaps), 'new_time': list(gaps)}

    full_data = pd.concat([full_data, pd.DataFrame(new_rows)], ignore_index=True)
    full_data.index = full_data['new_time']
    full_data = full_data.drop('new_time', axis=1).sort_index()
    full_data['state'] = full_data['state'].ffill()
    
    return full_data

# Day Separator

Rule: The day finish after the last sleep label, and start 5 minutes after the last sleep label of the previous day

In [13]:
def day_batcher(df):
    
    sleep_rows = df[df['state'] == 'sleep']
    sleep_rows['time_diff'] = sleep_rows['new_time'].shift(-1).diff()
    
    day_end = sleep_rows[sleep_rows['time_diff'] > datetime.timedelta(hours=3)].index  
    day_batch = []

    for idx in range(len(day_end) - 1):

        if idx == 0:
            day_batch.append(df.iloc[0:day_end[idx]])
        else:
            day_batch.append(df.iloc[day_end[idx]+1: day_end[idx+1]])
            
    return day_batch

# Main Function

In [14]:
full_data = pd.concat([sleep_data, heart_data]).dropna(ignore_index=True)
full_data = time_rounder(full_data)
full_data = gap_filler(full_data,START,END)
full_data = full_data.reset_index()

In [15]:
# generate color list
colors=['red' if val == 'awake' else 'blue' for val in full_data['state']]

fig = go.Figure(go.Scatter(
    x = full_data.index,
    y = full_data['bpm'],
    mode='lines',  
    line={'color': 'gray'},
    name="Combined"
))

fig.add_trace(go.Scatter(
    x = full_data[full_data['state'] == 'awake'].index,
    y = full_data[full_data['state'] == 'awake']['bpm'],
    mode='markers',  
    line={'color': 'green'},
    name="Awake"
))

fig.add_trace(go.Scatter(
    x = full_data[full_data['state'] == 'sleep'].index,
    y = full_data[full_data['state'] == 'sleep']['bpm'],
    mode='markers',  
    line={'color': 'blue'},
    name="Seep"
))

fig.update_layout(title = 'BPM Time Series')
fig.show()