In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Get all CSV files in the folder that start with "LBG Reboot 2025"
folder_path = 'LBG Reboot 2025 LDN - Health Demo Data'
csv_files = sorted([file for file in os.listdir(folder_path) if file.endswith('.csv')])
# Read each CSV file into a dataframe and store them in a list
data_map = {int(file[0]): pd.read_csv(os.path.join(folder_path, file)) for file in csv_files}

display(csv_files)

# 1. Sleep Data

In [None]:
df = data_map[1]

In [None]:
list(df)

In [None]:
# Select sleep-related columns
sleep_columns = ['Date', 'Time', 'Sleep Duration (hours)', 'Time Awake (minutes)', 'REM Sleep (%)', 'Core Sleep (%)', 'Deep Sleep (%)']
df_sleep = df.copy()[sleep_columns]

# make a datetime column
df_sleep['Datetime'] = pd.to_datetime(df_sleep['Date'] + ' ' + df_sleep['Time'], format='%d-%b-%Y %H:%M:%S')
df_sleep.set_index('Datetime', inplace=True)
df_sleep['asleep'] = df_sleep['Sleep Duration (hours)'] > 0

# Ensure 'Date' is converted to datetime before reusing it
df_sleep['Date'] = pd.to_datetime(df_sleep['Date'])
df_sleep['Hour'] = df_sleep.index.hour  # Use the index directly for 'Hour'
df_sleep['DayOfWeek'] = df_sleep.index.day_name()  # Use the index directly for 'DayOfWeek'

# Create a new column for the time of day morning, afternoon, evening, night
def time_of_day(hour):
    if hour < 6:
        return 'night'
    elif hour < 12:
        return 'morning'
    elif hour < 18:
        return 'afternoon'
    else:
        return 'evening'

df_sleep['time_of_day'] = df_sleep.index.hour.map(time_of_day)

df_sleep.head()

## 1.1 Data Quality Check

In [None]:
# Missing values – Check if any of the columns have missing or zero values.
missing_values = df_sleep.isnull().sum()

In [None]:
# Data consistency – Ensure sleep percentages (REM, Core, Deep) sum to ~100% or make sense relative to sleep duration for each row.
# Check if the sum of sleep percentages is close to 100%
def check_sleep_percentages(row):
    if row['Sleep Duration (hours)'] > 0:
        total_percentage = row['REM Sleep (%)'] + row['Core Sleep (%)'] + row['Deep Sleep (%)']
        return total_percentage == 100
    return True  # Valid if sleep duration is 0

df_sleep['valid_sleep'] = df_sleep.apply(check_sleep_percentages, axis=1)
# Check for any invalid sleep entries
invalid_sleep_entries = df_sleep[~df_sleep['valid_sleep']]
# Display the invalid entries
invalid_sleep_entries = df_sleep[~df_sleep['valid_sleep']]
invalid_sleep_entries.head()

In [None]:
# Check for duplicate timestamps
df_sleep[df_sleep.index.duplicated(keep=False)]

## 1.2 EDA

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='Hour', y='Sleep Duration (hours)', data=df_sleep)
plt.title('Sleep Duration by Hour of Day')
plt.show()

In [None]:
# Sleep by day of the week
plt.figure(figsize=(10, 6))
sns.boxplot(x='DayOfWeek', y='Sleep Duration (hours)', data=df_sleep,
            order=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
plt.title('Sleep Duration by Day of Week')
plt.xticks(rotation=45)
plt.show()

In [None]:
def plot_sleep_phases_for_night(date_str, df):
    """
    Plot a 24-hour sleep phase chart from 12PM of a given date to 12PM next day.

    Args:
    - date_str (str): Date string in 'YYYY-MM-DD' format
    - df (pd.DataFrame): Sleep data with 'Datetime' as the index and % columns
    """
    date = pd.to_datetime(date_str)
    start = date + pd.Timedelta(hours=12)  # 12 PM of selected day
    end = start + pd.Timedelta(hours=24)   # 12 PM of the next day

    df_window = df[(df.index >= start) & (df.index < end)]

    if df_window.empty:
        print(f"No data found for night of {date_str}")
        return

    # Ensure sorting
    df_window = df_window.sort_index()

    # Extract values
    times = df_window.index.strftime('%m-%d %H:%M')
    rem = df_window['REM Sleep (%)']
    core = df_window['Core Sleep (%)']
    deep = df_window['Deep Sleep (%)']

    # Plot
    plt.figure(figsize=(14, 6))
    plt.bar(times, rem, label='REM Sleep', color='royalblue')
    plt.bar(times, core, bottom=rem, label='Core Sleep', color='mediumseagreen')
    plt.bar(times, deep, bottom=rem + core, label='Deep Sleep', color='slateblue')

    plt.title(f'Sleep Phases from {start.strftime("%Y-%m-%d %H:%M")} to {end.strftime("%Y-%m-%d %H:%M")}')
    plt.xlabel('Time (Hour)')
    plt.ylabel('Sleep Phase Percentage (%)')
    plt.xticks(rotation=45)
    plt.ylim(0, 100)
    plt.legend()
    plt.tight_layout()
    plt.show()


plot_sleep_phases_for_night('2025-03-20', df_sleep)