In [None]:
# from google.colab import drive
# drive.mount('/gdrive')

In [None]:
import os
from os import listdir, makedirs
from os.path import join, isfile, isdir
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
from tqdm import tqdm

In [None]:
root_folder = 'drive/My Drive/LaTrobe/Projects/Accelerometer/OA activity data_La Trobe'
sleep_df_file = join(root_folder, 'Analytics_Scripts/Rashmika_analysis/metadata/sleep_time_modified.xlsx')
outout_filename = join(root_folder, 'Analytics_Scripts/Rashmika_analysis/metadata/record_availability_final.csv')

In [None]:
hkoa_df = pd.read_excel(sleep_df_file, sheet_name='HOA-KOA')
oa_df = pd.read_excel(sleep_df_file, sheet_name='OA')

### Process HKOA datasets

In [None]:
hkoa_out_df = None

for user_id in tqdm(list(hkoa_df['Participant'].unique())):

  user_df = hkoa_df.loc[hkoa_df['Participant']==user_id]

  in_time = user_df.iloc[0]['av_inbedtime']
  i_h, i_m, i_s = str(in_time).split(':')
  i_h, i_m, i_s = int(i_h), int(i_m), int(i_s)
  out_time = user_df.iloc[0]['av_outofbed_time']
  o_h, o_m, o_s = str(out_time).split(':')
  o_h, o_m, o_s = int(o_h), int(o_m), int(o_s)

  d1 = user_df.iloc[0]['actigraph_Start_time']
  d2 = user_df.iloc[0]['actigraph_End_time']
  dd = [d1 + timedelta(days=x) for x in range((d2-d1).days + 2)]

  begin_times = []
  end_times = []
  for i, d in enumerate(dd):

    if i == 0:
      begin_time = d1
      end_time = d.replace(hour=i_h, minute=i_m, second=i_s)
    elif i == len(dd)-1:
      begin_time = d.replace(hour=o_h, minute=o_m, second=o_s)
      end_time = d2
    else:
      begin_time = d.replace(hour=o_h, minute=o_m, second=o_s)
      end_time = d.replace(hour=i_h, minute=i_m, second=i_s)

    if begin_time > end_time:
      print("Error in {} \n {} to {}".format(user_id, begin_time, end_time))
      continue

    begin_times.append(begin_time)
    end_times.append(end_time)

  newdf = pd.DataFrame(np.repeat(user_df.values, len(begin_times), axis=0))
  newdf.columns = user_df.columns
  newdf['begin_time'] = begin_times
  newdf['end_time'] = end_times

  del newdf['av_inbedtime']
  del newdf['av_outofbed_time']
  
  if hkoa_out_df is None:
    hkoa_out_df = newdf
  else:
    hkoa_out_df = hkoa_out_df.append(newdf, ignore_index=True)

In [None]:
hkoa_out_df.to_csv('hkoa.csv', index=None)

### Process OA data

In [None]:
oa_out_df = None

for user_id in tqdm(list(oa_df['Participant'].unique())):

  user_df = oa_df.loc[oa_df['Participant']==user_id]

  all_start_time = user_df.iloc[0]['actigraph_Start_time']
  all_end_time = user_df.iloc[0]['actigraph_End_time']
  sleep_start_times = user_df['av_inbedtime'].tolist()
  sleep_end_times = user_df['av_outofbed_time'].tolist()

  sleep_end_times.insert(0, str(all_start_time))
  sleep_start_times.append(str(all_end_time))

  start_processed = []
  end_processed = []
  for b, e in zip(sleep_end_times, sleep_start_times):

    b = datetime.strptime(b, '%Y-%m-%d %H:%M:%S')
    e = datetime.strptime(e, '%Y-%m-%d %H:%M:%S')

    if (e-b).seconds//3600 < 1:
      continue

    start_processed.append(b)
    end_processed.append(e)

  # Create a new dataframe
  newdf = pd.DataFrame(np.repeat(user_df[0:1].values, len(start_processed), axis=0))
  newdf.columns = user_df.columns
  newdf['begin_time'] = start_processed
  newdf['end_time'] = end_processed

  del newdf['av_inbedtime']
  del newdf['av_outofbed_time']

  if oa_out_df is None:
    oa_out_df = newdf
  else:
    oa_out_df = oa_out_df.append(newdf, ignore_index=True)

100%|██████████| 82/82 [00:00<00:00, 106.92it/s]


In [None]:
oa_out_df.to_csv('oa.csv', index=None)

### Combine both types of usrs

In [None]:
final_df = hkoa_out_df.append(oa_out_df, ignore_index=True)
final_df.to_csv(outout_filename, index=None)