In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv("sleeps.csv")
df.rename(columns={'index_date':'timestamp'}, inplace=True)

In [None]:
df_enddate = pd.read_csv("patient_end_date.csv")

In [None]:
import datetime

def fill_enddate(value):
    if pd.isna(value):
        return datetime.datetime(9999,12,31)
    else:
        return value

df = pd.merge(df, df_enddate, left_on='fitbit_id', right_on='patient_id', how='left')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['end_date'] = pd.to_datetime(df['end_date'])
df['start_time'] = pd.to_datetime(df['start_time'])

df['end_date'] = df['end_date'].apply(fill_enddate)
del df['patient_id']
df.rename(columns={'fitbit_id':'patient_id'}, inplace=True)
df = df[df['timestamp'] <= df['end_date']]

In [None]:
df_patient = pd.read_csv("all_patient.csv")
df = df[df['patient_id'].isin(df_patient['patient_id'])]

In [None]:
# Calculate sunrise and sunset times based on Korean local time

df_suntime = pd.read_excel("sun_2015_2023_with_wake_up.xlsx")
df_suntime['양력(일)'] = pd.to_datetime(df_suntime['양력(일)'])
df_suntime['일출몰(출)'] = pd.to_datetime(df_suntime['일출몰(출)'])
df_suntime['일출몰(몰)'] = pd.to_datetime(df_suntime['일출몰(몰)'])
df = pd.merge(df, df_suntime, left_on='timestamp', right_on='양력(일)')
del df['양력(일)']
del df['recommend_wake_up']
df.rename(columns={'일출몰(출)':'sun_time', '일출몰(몰)':'night_time'}, inplace=True)

In [None]:
df.sort_values(['patient_id', 'timestamp'], inplace=True)

In [None]:
import datetime

def make_correct_date(row):
    new_date = datetime.datetime(year = row['start_time'].year, month = row['start_time'].month, day = row['start_time'].day)
    sun_time =  row['sun_time'].hour + row['sun_time'].minute / 60
    night_time =  row['night_time'].hour + row['night_time'].minute / 60
    current_time =  row['start_time'].hour + row['start_time'].minute / 60

    if current_time <= sun_time:
        new_date = new_date - datetime.timedelta(days=1)
    return new_date

df['timestamp'] = df.apply(make_correct_date, axis=1)
df.sort_values(['patient_id', 'timestamp', 'start_time'], inplace=True)

In [None]:
my_dict = {}
arr = df.values

In [None]:
from tqdm import tqdm

for i in tqdm(range(len(arr)), miniters=1, mininterval=1):
    p = arr[i][0]
    if p not in my_dict:
        my_dict[p] = {}
    current_date = datetime.datetime(arr[i][1].year,arr[i][1].month,arr[i][1].day)
    if current_date not in my_dict[p] :
        my_dict[p][current_date] = {}
        my_dict[p][current_date]['efficiency'] = []
        my_dict[p][current_date]['minutes_sleep'] = []
        my_dict[p][current_date]['minutes_awake'] = []
        my_dict[p][current_date]['time_in_bed'] = []
        my_dict[p][current_date]['sleep_onset'] = []
        my_dict[p][current_date]['sleep_offset'] = []
    
    my_dict[p][current_date]['efficiency'].append(arr[i][2])
    my_dict[p][current_date]['minutes_sleep'].append(arr[i][3])
    my_dict[p][current_date]['minutes_awake'].append(arr[i][4])
    my_dict[p][current_date]['time_in_bed'].append(arr[i][5])
    my_dict[p][current_date]['sleep_onset'].append(arr[i][6])
    my_dict[p][current_date]['sleep_offset'].append(arr[i][6]+datetime.timedelta(minutes=arr[i][5]))

In [None]:
my_dict_all_day = {}

for p in my_dict:
    keys = sorted(my_dict[p])
    start_date = keys[0]
    end_date = keys[-1]

    now = start_date
    my_dict_all_day[p] = {}
    while now <= end_date:
        my_dict_all_day[p][now] = {}
        my_dict_all_day[p][now]['efficiency'] = np.nan
        my_dict_all_day[p][now]['minutes_sleep'] = np.nan
        my_dict_all_day[p][now]['minutes_awake'] = np.nan
        my_dict_all_day[p][now]['time_in_bed'] = np.nan
        my_dict_all_day[p][now]['main_sleep_onset'] = np.nan
        my_dict_all_day[p][now]['main_sleep_offset'] = np.nan
        my_dict_all_day[p][now]['main_midsleep'] = np.nan
        my_dict_all_day[p][now]['main_sleep_time'] = np.nan
        now = now + datetime.timedelta(days=1)

In [None]:
for p in my_dict:
    keys = sorted(my_dict[p])
    
    for day in keys:
        my_dict_all_day[p][day]['efficiency'] = np.mean(my_dict[p][day]['efficiency'])
        my_dict_all_day[p][day]['minutes_sleep'] = np.sum(my_dict[p][day]['minutes_sleep'])
        my_dict_all_day[p][day]['minutes_awake'] = np.sum(my_dict[p][day]['minutes_awake'])
        my_dict_all_day[p][day]['time_in_bed'] = np.sum(my_dict[p][day]['time_in_bed'])

        if len(my_dict[p][day]['sleep_onset'] ) == 1:
            my_dict_all_day[p][day]['main_sleep_onset'] =  pd.to_datetime(my_dict[p][day]['sleep_onset'][0])
            my_dict_all_day[p][day]['main_sleep_offset'] =  pd.to_datetime(my_dict[p][day]['sleep_offset'][0])

            start = my_dict_all_day[p][day]['main_sleep_onset'].hour + my_dict_all_day[p][day]['main_sleep_onset'].minute / 60
            end = my_dict_all_day[p][day]['main_sleep_offset'].hour + my_dict_all_day[p][day]['main_sleep_offset'].minute / 60

            midsleep = ((start + end)%24) / 2
            my_dict_all_day[p][day]['main_midsleep'] = midsleep

            
            timestamp = pd.to_datetime( my_dict[p][day]['sleep_onset'][0] )
            
            if timestamp.hour <= 6:
                my_dict_all_day[p][day]['main_sleep_time'] = 1
            elif timestamp.hour <= 12:
                my_dict_all_day[p][day]['main_sleep_time'] = 2
            elif timestamp.hour <= 18:
                my_dict_all_day[p][day]['main_sleep_time'] = 3
            elif timestamp.hour <= 24:
                my_dict_all_day[p][day]['main_sleep_time'] = 4
                
        elif len(my_dict[p][day]['sleep_onset'] ) > 1 :
            # Combine any fragments of sleep into a single period
            main_sleep_onset_list = [ my_dict[p][day]['sleep_onset'][0]]
            main_sleep_offset_list = [ my_dict[p][day]['sleep_offset'][0]]

            last_offset = pd.to_datetime( main_sleep_offset_list[0] )

            for i in range(1, len(my_dict[p][day]['sleep_offset'])):
                tmp_onset = pd.to_datetime( my_dict[p][day]['sleep_onset'][i] )
                tmp_offset = pd.to_datetime( my_dict[p][day]['sleep_offset'][i] )

                if tmp_onset - last_offset <= datetime.timedelta(hours=1):
                    main_sleep_offset_list[-1] = my_dict[p][day]['sleep_offset'][i]
                else:
                    main_sleep_onset_list.append( my_dict[p][day]['sleep_onset'][i] )
                    main_sleep_offset_list.append( my_dict[p][day]['sleep_offset'][i] )
                    last_offset = pd.to_datetime( my_dict[p][day]['sleep_offset'][i] )

            index_of_main_sleep = 0
            onset_time = pd.to_datetime(main_sleep_onset_list[0])
            onset_time = onset_time.hour + onset_time.minute/60

            offset_time = pd.to_datetime(main_sleep_offset_list[0])
            offset_time = offset_time.hour + offset_time.minute/60
            
            # Calculate main sleep time
            max_sleep_time = offset_time - onset_time
            for i in range(1, len(main_sleep_onset_list)):
                onset_time = pd.to_datetime(main_sleep_onset_list[i])
                onset_time = onset_time.hour + onset_time.minute/60
    
                offset_time = pd.to_datetime(main_sleep_offset_list[i])
                offset_time = offset_time.hour + offset_time.minute/60
                current_sleep_time = offset_time - onset_time

                if current_sleep_time >= max_sleep_time:
                    max_sleep_time = current_sleep_time
                    index_of_main_sleep = i

            my_dict_all_day[p][day]['main_sleep_onset'] =  main_sleep_onset_list[index_of_main_sleep]
            my_dict_all_day[p][day]['main_sleep_offset'] = main_sleep_offset_list[index_of_main_sleep]
            
            main_sleep_onset = pd.to_datetime(main_sleep_onset_list[index_of_main_sleep])
            main_sleep_offset = pd.to_datetime(main_sleep_offset_list[index_of_main_sleep])
            
            start = main_sleep_onset.hour +main_sleep_onset.minute / 60
            end = main_sleep_offset.hour + main_sleep_offset.minute / 60

            midsleep = ((start + end)%24) / 2
            my_dict_all_day[p][day]['main_midsleep'] = midsleep

            if main_sleep_onset.hour <= 6:
                my_dict_all_day[p][day]['main_sleep_time'] = 1
            elif main_sleep_onset.hour <= 12:
                my_dict_all_day[p][day]['main_sleep_time'] = 2
            elif main_sleep_onset.hour <= 18:
                my_dict_all_day[p][day]['main_sleep_time'] = 3
            elif main_sleep_onset.hour <= 24:
                my_dict_all_day[p][day]['main_sleep_time'] = 4

In [None]:
my_list = []
for p in my_dict_all_day:
    for day in sorted(my_dict_all_day[p]):
        row = [p,day.date()]
        row.append(my_dict_all_day[p][day]['efficiency'])
        row.append(my_dict_all_day[p][day]['minutes_sleep'])
        row.append(my_dict_all_day[p][day]['minutes_awake'])
        row.append(my_dict_all_day[p][day]['time_in_bed'])
        row.append(my_dict_all_day[p][day]['main_sleep_onset'])
        row.append(my_dict_all_day[p][day]['main_sleep_offset'])
        row.append(my_dict_all_day[p][day]['main_midsleep'])
        row.append(my_dict_all_day[p][day]['main_sleep_time'])
        my_list.append(row)

df = pd.DataFrame(my_list, columns=["patient_id","timestamp","sleep_efficiency","sleep_minutes","sleep_minutes_awake",
                                    "sleep_time_in_bed", "main_sleep_onset", "main_sleep_offset", "main_midsleep", "main_sleep_time"])

In [None]:
df_midsleep = df[['patient_id', 'timestamp', 'main_midsleep']]

In [None]:
my_dlmo  = []
for p in my_dict_all_day:
    arr = df_midsleep[df_midsleep['patient_id']==p].values
    for i in range(len(arr)):
        if i < 2:
            my_dlmo.append([np.nan, np.nan, np.nan, np.nan])
            continue
            
        before_index = max(i - 14, 0)
        tmp = arr[before_index:i+1, 2]
        tmp = tmp[~pd.isna(tmp)]
        if len(tmp) < 3:
            my_dlmo.append([np.nan, np.nan, np.nan, np.nan])
            continue
    
        midsleep = np.mean(tmp) + 24
        dlmo_method1 = midsleep - 6.27
        wakup1 = dlmo_method1 + 9.5
        while wakup1 >= 24:
            wakup1 = wakup1 - 24
        
        dlmo_method2 = 0.72 * midsleep + 1.5
        wakup2 = dlmo_method2 + 9.5
        while wakup2 >= 24:
            wakup2 = wakup2 - 24
            
        my_dlmo.append([dlmo_method1, wakup1, dlmo_method2, wakup2])

my_dlmo = np.array(my_dlmo)

In [None]:
df_midsleep = pd.DataFrame(np.concatenate([df_midsleep,my_dlmo], axis=1), columns=['patient_id', 'timestamp', 'main_midsleep', 'dlmo_fixed_interval', 'dlmo_fixed_interval_wakeup_time', 'dlmo_linear_regression', 'dlmo_linear_regression_wakeup_time'])

In [None]:
df_midsleep.drop('main_midsleep', axis=1, inplace=True)

In [None]:
def make_dlmo1_diff(row):
    if pd.isna(row['dlmo_fixed_interval_wakeup_time']):
        return np.nan
    offset = row['main_sleep_offset'].hour + row['main_sleep_offset'].minute/60
    return row['dlmo_fixed_interval_wakeup_time'] - offset

def make_dlmo2_diff(row):
    if pd.isna(row['dlmo_linear_regression_wakeup_time']):
        return np.nan
    offset = row['main_sleep_offset'].hour + row['main_sleep_offset'].minute/60
    return row['dlmo_linear_regression_wakeup_time'] - offset
    
df = pd.merge(df, df_midsleep, on=["patient_id","timestamp"], how='left')

In [None]:
df['main_sleep_offset'] = pd.to_datetime(df['main_sleep_offset'])
df['diff_wakeup_to_fixed_interval_wakeup_time'] = df.apply(make_dlmo1_diff, axis=1)
df['diff_wakeup_to_linear_regression_wakeup_time'] = df.apply(make_dlmo2_diff, axis=1)

In [None]:
df.to_csv("feature_sleep_DLMO.csv", encoding="utf-8-sig", index=False)