# Sleep Grid

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import pickle
import numpy as np

## Load Datasets

In [3]:
df_awake = pd.read_csv('data/awake_sleep.csv')
df_light = pd.read_csv('data/light_sleep.csv')
df_deep = pd.read_csv('data/deep_sleep.csv')
df_deep.columns

Index(['userId', 'summaryId', 'startTimeInSeconds', 'endTimeInSeconds'], dtype='object')

## Data Preprocessing

In [4]:
# add total seconds to each df
df_deep['totalSeconds'] = df_deep['endTimeInSeconds'] - df_deep['startTimeInSeconds']
df_light['totalSeconds'] = df_light['endTimeInSeconds'] - df_light['startTimeInSeconds']
df_awake['totalSeconds'] = df_awake['endTimeInSeconds'] - df_awake['startTimeInSeconds']

In [5]:
# convert startTimeInSeconds to a date + time
df_deep['startTimeDate'] = pd.to_datetime(df_deep['startTimeInSeconds'], unit='s')
df_light['startTimeDate'] = pd.to_datetime(df_light['startTimeInSeconds'], unit='s')
df_awake['startTimeDate'] = pd.to_datetime(df_awake['startTimeInSeconds'], unit='s')

In [6]:
# get the ids of the soldiers
df_heartrate = pd.read_csv('data/heart_rate_daily.csv')
unique_ids = df_heartrate['userId'].unique()

In [7]:
def generate_stats(df):
    # Assuming your DataFrame is named df and the column is 'timestamp_column'
    # Convert the 'timestamp_column' to pandas datetime format
    df['startTimeDate'] = pd.to_datetime(df['startTimeDate'])
    date_counts = df['startTimeDate'].dt.date.value_counts()
    unique_days = len(date_counts)
    data_points_per_day = df.groupby(df['startTimeDate'].dt.date)['startTimeDate'].nunique()
    # Calculate the average number of timings per day
    average_data_points_per_day = data_points_per_day.mean()
    return unique_days, average_data_points_per_day

def unify_sleep_data(light_sleep, awake_sleep, deep_sleep):
    # Create a new DataFrame for the unified data
    unified_sleep = pd.DataFrame(columns=['Date', 'Hour', 'Minute', 'SleepState'])

    # Process each sleep DataFrame
    unified_sleep = process_sleep_df(light_sleep, 'Light Sleep', unified_sleep)
    unified_sleep = process_sleep_df(awake_sleep, 'Awake', unified_sleep)
    unified_sleep = process_sleep_df(deep_sleep, 'Deep Sleep', unified_sleep)

    # Fill in missing minutes
    unique_dates = unified_sleep['Date'].unique()
    for date in unique_dates:
        for hour in range(24):
            for minute in range(60):
                if not ((unified_sleep['Date'] == date) & (unified_sleep['Hour'] == hour) & (
                        unified_sleep['Minute'] == minute)).any():
                    unified_sleep = pd.concat([unified_sleep, pd.DataFrame([{'Date': date, 'Hour': hour, 'Minute': minute, 'SleepState': '?'}])], ignore_index=True)
                    # append is deprecated
                    # unified_sleep = unified_sleep.append(
                    #     {'Date': date, 'Hour': hour, 'Minute': minute, 'SleepState': '?'}, ignore_index=True)

    # Sort the DataFrame
    unified_sleep = unified_sleep.sort_values(by=['Date', 'Hour', 'Minute']).reset_index(drop=True)

    return unified_sleep

# Function to process each sleep DataFrame
def process_sleep_df(df, sleep_state, unified_df):
    for index, row in df.iterrows():
        start_minute = row['startTimeInSeconds'] // 60
        end_minute = row['endTimeInSeconds'] // 60
        # print(start_minute, end_minute)
        date = row['startTimeDate']
        for minute in range(start_minute, end_minute):
            minute_of_hour = minute % 60
            unified_df = pd.concat([unified_df, pd.DataFrame([{'Date': str(date).split(" ")[0], 'Hour': date.hour, 'Minute': minute_of_hour,
                'SleepState': sleep_state}])], ignore_index=True)
            
            # append is deprecated
            # unified_df = unified_df.append(
            #     {'Date': str(date).split(" ")[0], 'Hour': date.hour, 'Minute': minute_of_hour,
            #     'SleepState': sleep_state}, ignore_index=True)

    return unified_df

def sleep_by_weeks(df):
    df['Date'] = pd.to_datetime(df['Date'])

    # Define a custom function to calculate the week number with Sunday as the first day
    def get_week_number(date):
        # Adjust the date so that Sunday is the first day of the week
        adjusted_date = date - pd.Timedelta(days=(date.weekday() + 1) % 7)
        # Return the ISO week number of the adjusted date
        return adjusted_date.isocalendar()[1]

    # Apply the custom function to calculate the week number for each row
    df['WeekNumber'] = df['Date'].apply(get_week_number)
    # Initialize an empty dictionary to hold the DataFrames for each week
    weekly_dataframes = {}
    # Group by 'WeekNumber' and create a DataFrame for each week
    for week_number, group in df.groupby('WeekNumber'):
        weekly_dataframes[week_number] = group
    return weekly_dataframes



In [19]:
users_weekly_sleep = {}
# Code takes a very long time to run (over 2 days)
count = 0
for id in unique_ids:
    filtered_light_sleep = df_light[df_light['userId'] == id].reset_index().drop(columns=['index', 'summaryId'], inplace=False)
    filtered_awake_sleep = df_awake[df_awake['userId'] == id].reset_index().drop(columns=['index', 'summaryId'], inplace=False)
    filtered_deep_sleep = df_deep[df_deep['userId'] == id].reset_index().drop(columns=['index', 'summaryId'], inplace=False)

    unified_sleep = unify_sleep_data(filtered_light_sleep, filtered_awake_sleep, filtered_deep_sleep)
    users_weekly_sleep[id] = sleep_by_weeks(unified_sleep)
    
    count += 1
    print(count)
    

134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209


In [20]:
with open('sleeping.pkl', 'wb') as file:
    pickle.dump(users_weekly_sleep, file)

In [28]:
len(users_weekly_sleep)

210

In [35]:
users_weekly_sleep['b1f802a7-b049-47db-8bd9-21d77cfbbd69'][51]

Unnamed: 0,Date,Hour,Minute,SleepState,WeekNumber
109834,2022-12-25,0,0,Light Sleep,51
109835,2022-12-25,0,0,Deep Sleep,51
109836,2022-12-25,0,0,Deep Sleep,51
109837,2022-12-25,0,1,Light Sleep,51
109838,2022-12-25,0,1,Deep Sleep,51
...,...,...,...,...,...
121537,2022-12-31,23,55,Light Sleep,51
121538,2022-12-31,23,56,Light Sleep,51
121539,2022-12-31,23,57,Light Sleep,51
121540,2022-12-31,23,58,Light Sleep,51


In [28]:
len(users_weekly_sleep.keys())
users_weekly_sleep['0fa9bebd-47a8-4979-9ae4-54f8288dc150'][3]['SleepState'].value_counts()

SleepState
?              8085
Light Sleep    2132
Deep Sleep     1261
Awake           407
Name: count, dtype: int64