In [None]:
from src.data.data_loader import load_data
from src.data.data_cleaner import clean_data
import pandas as pd
import numpy as np
import datetime
from datetime import datetime  # noqa: F811
from scipy.stats import norm

In [64]:
data = clean_data(data=load_data(), data_source_name="kaggle_brisT1D")

  return pd.read_csv(file_path, usecols=keep_columns)


In [None]:
def calculate_sleep_probability(row: pd.Series) -> float:
    """Calculates the probability of sleep based on heart rate, steps, and calories at the current time.

    Args:
        row (pd.Series): A pandas Series containing time, heart rate, steps, and calories data.

    Returns:
        float: The calculated sleep probability, clipped to the range [0, 1].

    Notes:
        - The time-based probability is highest between 0:00-6:00.
        - Activity-based probabilities are calculated for heart rate, steps, and calories.
        - Steps are given the highest weight as they are the strongest indicator of sleep.
    """
    # Convert time to hour (float)
    time_str = row["time"]
    hour = (
        datetime.strptime(time_str, "%H:%M:%S").hour
        + datetime.strptime(time_str, "%H:%M:%S").minute / 60.0
    )

    # Time-based probability (highest between 0:00-6:00)
    time_prob = norm.pdf(hour, loc=3, scale=2) / norm.pdf(3, loc=3, scale=2)
    if hour > 12:  # Handle evening hours
        time_prob = norm.pdf(hour, loc=24, scale=2) / norm.pdf(24, loc=24, scale=2)

    # Get current values
    hr = row["hr-0:00"]
    steps = row["steps-0:00"]
    cals = row["cals-0:00"]

    # Calculate activity-based probabilities
    # For each metric, higher values mean lower probability of sleep
    hr_prob = np.exp(-hr / 100)  # Normalize heart rate
    steps_prob = (
        1.0 if steps == 0 else np.exp(-steps / 100)
    )  # Strong indicator if steps = 0
    cals_prob = np.exp(-cals / 100)  # Normalize calories

    # Combine probabilities with weights
    # Give more weight to steps as it's the strongest indicator
    activity_prob = 0.5 * steps_prob + 0.3 * hr_prob + 0.2 * cals_prob

    # Combine with time probability
    final_prob = 0.7 * activity_prob + 0.3 * time_prob

    # Ensure probability is between 0 and 1
    return np.clip(final_prob, 0, 1)


def analyze_sleep_patterns(data: pd.DataFrame) -> pd.DataFrame:
    """Analyzes sleep patterns for each row in the DataFrame.

    Args:
        data (pd.DataFrame): A DataFrame containing time, heart rate, steps, and calories data.

    Returns:
        pd.DataFrame: The input DataFrame with an additional column 'sleep_probability'
                      representing the calculated sleep probability for each row.
    """
    # Calculate sleep probability for each row
    data["sleep_probability"] = data.apply(calculate_sleep_probability, axis=1)

    return data


def main(data: pd.DataFrame) -> pd.DataFrame:
    """Main workflow function to calculate sleep probabilities for the input data.

    Args:
        data (pd.DataFrame): A DataFrame containing time, heart rate, steps, and calories data.

    Returns:
        pd.DataFrame: The input DataFrame with an additional column 'sleep_probability'
                      representing the calculated sleep probability for each row.
    """
    # 2. Calculate sleep probabilities
    results = analyze_sleep_patterns(data)

    return results


# Example usage:
results = main(data)

In [69]:
results.head(20)

Unnamed: 0,id,p_num,time,bg-0:00,insulin-0:00,carbs-0:00,hr-0:00,steps-0:00,cals-0:00,sleep_probability
0,p01_0,p01,06:10:00,15.1,0.0417,48.01897,79.335216,53.052685,9.36896,0.514024
1,p01_1,p01,06:25:00,14.4,0.0417,48.01897,79.335216,53.052685,9.36896,0.498097
2,p01_2,p01,06:40:00,13.9,0.0417,48.01897,79.335216,53.052685,9.36896,0.484252
3,p01_3,p01,06:55:00,13.8,0.0417,48.01897,79.335216,53.052685,9.36896,0.472461
4,p01_4,p01,07:10:00,13.4,0.0417,48.01897,79.335216,53.052685,9.36896,0.462619
5,p01_5,p01,07:25:00,12.8,0.0417,48.01897,79.335216,53.052685,9.36896,0.454562
6,p01_6,p01,07:40:00,15.5,0.0417,20.0,79.335216,53.052685,9.36896,0.448089
7,p01_7,p01,07:55:00,14.8,0.0417,48.01897,79.335216,53.052685,9.36896,0.442986
8,p01_8,p01,08:10:00,12.7,0.0583,48.01897,79.335216,53.052685,9.36896,0.439036
9,p01_9,p01,08:25:00,11.4,0.0583,48.01897,79.335216,53.052685,9.36896,0.436033


1.0