In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm


def make_index_date(value):
    return pd.Timestamp(year = value.year, month = value.month, day = value.day)

tqdm.pandas()

df = pd.read_csv("lights(id).csv", header=None, names=["id","fitbit_id","timestamp","lux"])
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['index_date'] = df['timestamp'].progress_apply(make_index_date)

del df['id']
df['fitbit_id'] = df['fitbit_id'] * -1

In [None]:
# Calculate sunrise and sunset times based on Korean local time

df_suntime = pd.read_excel("sun_2015_2023_with_wake_up.xlsx")
df_suntime['양력(일)'] = pd.to_datetime(df_suntime['양력(일)'])
df_suntime['일출몰(출)'] = pd.to_datetime(df_suntime['일출몰(출)'])
df_suntime['일출몰(몰)'] = pd.to_datetime(df_suntime['일출몰(몰)'])

In [None]:
df = pd.merge(df, df_suntime, left_on='index_date', right_on='양력(일)')
del df['양력(일)']
del df['recommend_wake_up']
df.rename(columns={'일출몰(출)':'sun_time', '일출몰(몰)':'night_time'}, inplace=True)

In [None]:
df_enddate = pd.read_csv("patient_end_date.csv")

In [None]:
import datetime

def fill_enddate(value):
    if pd.isna(value):
        return datetime.datetime(9999,12,31)
    else:
        return value

df = pd.merge(df, df_enddate, left_on='fitbit_id', right_on='patient_id', how='left')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['end_date'] = pd.to_datetime(df['end_date'])

df['end_date'] = df['end_date'].apply(fill_enddate)
del df['patient_id']
df.rename(columns={'fitbit_id':'patient_id'}, inplace=True)
df = df[df['timestamp'] <= df['end_date']]

In [None]:
df_patient = pd.read_csv("all_patient.csv")
df = df[df['patient_id'].isin(df_patient['patient_id'])]

In [None]:
df.sort_values(['patient_id', 'timestamp'], inplace=True)

In [None]:
my_dict = {}
arr = df.values

In [None]:
from tqdm import tqdm

for i in tqdm(range(len(arr)), miniters=1, mininterval=1):
    p = arr[i][0]
    if p not in my_dict:
        my_dict[p] = {}
    current_date = datetime.datetime(arr[i][1].year,arr[i][1].month,arr[i][1].day)
    hour = arr[i][1].hour
    if current_date not in my_dict[p] :
        my_dict[p][current_date] = {}
        my_dict[p][current_date]['lux'] = []
        my_dict[p][current_date]['wakeup_guess_time'] = np.nan

    now = arr[i][1].hour + arr[i][1].minute / 60
    sun_start = arr[i][-3].hour + arr[i][-3].minute / 60
    sun_end = arr[i][-2].hour + arr[i][-2].minute / 60
    sun_mid = (sun_start + sun_end) / 2
    sun_mid_half = (sun_start + sun_mid) / 2
    
    if sun_start <= now <= sun_mid_half:
        my_dict[p][current_date]['lux'].append(arr[i][2])

    if sun_start <= now and arr[i][2] >= 140 and np.isnan(my_dict[p][current_date]['wakeup_guess_time']):
        my_dict[p][current_date]['wakeup_guess_time'] = hour + (arr[i][1].minute / 60)

In [None]:
my_dict_all_day = {}

for p in my_dict:
    keys = sorted(my_dict[p])
    start_date = keys[0]
    end_date = keys[-1]

    now = start_date
    my_dict_all_day[p] = {}
    while now <= end_date:
        my_dict_all_day[p][now] = {}
        my_dict_all_day[p][now]['lux_mean'] = np.nan
        my_dict_all_day[p][now]['lux_median'] = np.nan
        my_dict_all_day[p][now]['wakeup_guess_time'] = np.nan
        now = now + datetime.timedelta(days=1)

In [None]:
for p in my_dict:
    keys = sorted(my_dict[p])
    for day in keys:
        if len(my_dict[p][day]['lux']) == 0:
            continue
        my_dict_all_day[p][day]['lux_mean'] = np.mean(my_dict[p][day]['lux'])
        my_dict_all_day[p][day]['lux_median'] = np.quantile(my_dict[p][day]['lux'], 0.5)
        my_dict_all_day[p][day]['wakeup_guess_time'] = my_dict[p][day]['wakeup_guess_time']

In [None]:
my_list = []
for p in my_dict_all_day:
    for day in sorted(my_dict_all_day[p]):
        row = [p,day.date()]
        row.append(my_dict_all_day[p][day]['lux_mean'])
        row.append(my_dict_all_day[p][day]['lux_median'])
        row.append(my_dict_all_day[p][day]['wakeup_guess_time'])
        my_list.append(row)

df = pd.DataFrame(my_list, columns=["patient_id","timestamp","lux_morning_mean","lux_morning_median","wakeup_guess_time_based_on_lux"])

In [None]:
df.to_csv("feature_light.csv", index=False, encoding="utf-8-sig")