In [3]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from utils.tfrecord_utils import create_dataset
from utils.helpers import list_all_subject_ids

# Stuff - 1

In [71]:
# Participation dates for raster plots

pred_path = 'Results/Predictions/Wave-2/ds_best_on_all'
output_path = 'data/Wave-2'

participation_dates = {}
for id in list_all_subject_ids(pred_path):
    # fn = fn[0]
    pred_df = pd.read_csv(f"{pred_path}/sub_{id}.csv")
    pred_df['epoch_ts'] = pd.to_datetime(pred_df['epoch_ts'])

    participation_dates[id] = {
        'subject_id': id,
        'start_timestamp': pred_df['epoch_ts'].min().replace(hour=12, minute=0, second=0),
        'end_timestamp': pred_df['epoch_ts'].max().replace(hour=12, minute=0, second=0),
    }
    
dates_df = pd.DataFrame.from_dict(participation_dates, orient='index').reset_index(drop=True)
dates_df.sort_values('subject_id').to_csv(f"{output_path}/participation_dates.csv", index=False)

In [74]:
psg_label_path = 'data/Wave-2/PSG-Labels'
out_path = psg_label_path + '/processed'
files = [fn for fn in os.listdir(psg_label_path) if fn.startswith('Sleep profile')]
os.makedirs(out_path, exist_ok=True)

for fn in files:
    with open(f"{psg_label_path}/{fn}", 'r') as f:
        lines = f.readlines()[7:]
    with open(f"{out_path}/{fn}", 'w') as f:
        f.writelines(lines)


In [93]:
# Concat the biobank output files for each subejct

path = 'data/Wave-2/Toolbox Outputs/raw'
outpath = 'data/Wave-2/Toolbox Outputs'
subject_ids = list_all_subject_ids(path, file_type='csv.gz')

for subject_id in subject_ids:
    subject_files = [fn for fn in os.listdir(path) if (fn.find(subject_id) >= 0) and (fn.find('.csv') >= 0)]

    subject_dfs = []
    for fn in subject_files:
        subject_dfs.append(pd.read_csv(f"{path}/{fn}", compression='gzip'))
    
    subject_df = pd.concat(subject_dfs).sort_values('time')

    subject_df.to_csv(f'{outpath}/biobank_{subject_id}.csv', index=False)

# 2

Unnamed: 0,subject_id,lights_off,lights_on
0,D001,2023-03-08 23:00:00,2023-03-09 05:15:00
1,D001,2023-03-09 21:15:00,2023-03-10 05:15:00
2,D001,2023-03-10 22:10:00,2023-03-11 05:44:00
3,D001,2023-03-11 22:15:00,2023-03-12 06:14:00
4,D001,2023-03-12 22:14:00,2023-03-13 05:30:00
...,...,...,...
580,P014,2024-04-19 00:15:00,2024-04-19 07:00:00
581,P014,2024-04-20 00:00:00,2024-04-20 08:00:00
582,P014,2024-04-21 00:00:00,2024-04-21 07:00:00
583,P014,2024-04-22 00:00:00,2024-04-22 07:00:00
