# UMEBOSHI dataset

The following notebook generate synthetic data, supposed to have been acquired during the fictional "Umeboshi" study. This dataset comprises 6 files containing synthetic data and
is meant to illustrate the various issues encountered during
actigraphy data acquisition.

* UMEBOSHI_01.AWD: regular acquisition;
* UMEBOSHI_02.AWD: delayed acquisition ("false start");
* UMEBOSHI_03.AWD: missing data ("no wear period (short&recurrent)");
* UMEBOSHI_04.AWD: regular acquisition;
* UMEBOSHI_05.AWD: delayed acquisition ("false stop");
* UMEBOSHI_06.AWD: missing data ("no wear period (long)").

## Import packages

In [None]:
import numpy as np
import pandas as pd

In [None]:
import pyActigraphy

In [None]:
from pyActigraphy.tests.generate_dataset import generate_series, generate_squarewave

In [None]:
import plotly.graph_objects as go

## Synthetic data generation

These synthetic data are generated with a square wave, corrupted by a white noise.

### Settings

In [None]:
N=10800  # Number of points to generate: 7+1/2 days at a sampling period of 60 sec.

In [None]:
T=86400  # Period (in seconds) of the square wave signal

In [None]:
Ts=60  # Sampling period (sec)

In [None]:
A=1000  # Amplitude of the signal (u.a)

In [None]:
add_noise=True
noise_power=(A/10)  # Amplitude of the white Gaussian noise

In [None]:
offset=True  # If set to True, the signal is comprised between [0,2A] and between [-A,A] otherwise.

In [None]:
# Set seed for reproducibility
rng = np.random.default_rng(seed=0)

### Helper function

In [None]:
def create_awd_header(subject_id, start_time):

    # AWD file header
    header = [
        '{}'.format(subject_id),
        pd.to_datetime(start_time, dayfirst=True).strftime("%d-%b-%Y"),
        pd.to_datetime(start_time, dayfirst=True).strftime("%H:%M"),
        '4',
        '0',
        'Vff1234', # V: Actiwatch Plus, AW4 or AW64.
        'X'
    ]

    return header

In [None]:
def create_awd_data(start_time, N, T, Ts, A, add_noise, noise_power, offset):

    # Generate a square wave signal
    data = generate_series(
        np.abs(generate_squarewave(
            N=N,  # number of samples
            T=T,  # period in sec: 24*60*60
            Ts=Ts,  # sampling rate (sec.)
            A=A,  # oscillation amplitude
            add_noise=add_noise,  # add gaussian noise
            noise_power=noise_power,
            offset=offset  # offset oscillations between 0 and +2A
        )),
        start=start_time,
        sampling_period=Ts
    ).astype(int)

    return data

In [None]:
def create_awd_file(subject_id, header, data):
    
    # Write AWD file to disk
    with open('{}.AWD'.format(subject_id), 'w') as f:
        f.writelines('\n'.join(header))
        f.write('\n')
        f.write(data.to_csv(index=False))

### UMEBOSHI file headers

In [None]:
# Create a dictionary with k: subject ID and v: start time
start_times = {
    'UMEBOSHI_{:02d}'.format(idx+1):'{:02d}/01/2000 08:00:00'.format(rint)
    for idx, rint in enumerate(rng.integers(1, high=31, size=6, dtype=int))
}

In [None]:
start_times

In [None]:
headers = {
    k: create_awd_header(k, v) for k,v in start_times.items()
}

### UMEBOSHI data

In [None]:
data = {
    k: create_awd_data(v, N, T, Ts, A, add_noise, noise_power, offset)
    for k, v in start_times.items()
}

### UMEBOSHI 01 ("regular acquisition")

In [None]:
# Write data file
create_awd_file('UMEBOSHI_01', headers['UMEBOSHI_01'], data['UMEBOSHI_01'])

In [None]:
# Read created file with pyActigraphy (check if file is a valid AWD file)
raw_01 = pyActigraphy.io.read_raw_awd('UMEBOSHI_01.AWD')

In [None]:
# Visualize data
go.Figure(go.Scatter(x=raw_01.data.index.astype(str),y=raw_01.data))

### UMEBOSHI 02 ("delayed acquisition: false start")

In [None]:
# Modify the original data:
# False start (3h)
data['UMEBOSHI_02'].loc[:data['UMEBOSHI_02'].index[0]+pd.Timedelta("3h")] = 0

In [None]:
# Write data file
create_awd_file('UMEBOSHI_02', headers['UMEBOSHI_02'], data['UMEBOSHI_02'])

In [None]:
# Read created file with pyActigraphy (check if file is a valid AWD file)
raw_02 = pyActigraphy.io.read_raw_awd('UMEBOSHI_02.AWD')

In [None]:
# Visualize data
go.Figure(go.Scatter(x=raw_02.data.index.astype(str),y=raw_02.data))

### UMEBOSHI 03 ("missing data: no wear period (short&recurrent)")

In [None]:
shower_start_time = '09:10:00'
shower_stop_time = '09:25:00'

In [None]:
# Modify the original data:
# no-wear period of 15 min every day at the same time of day
data['UMEBOSHI_03'].loc[
    data['UMEBOSHI_03'].between_time(shower_start_time,shower_stop_time).index
] = 0

In [None]:
# Write data file
create_awd_file('UMEBOSHI_03', headers['UMEBOSHI_03'], data['UMEBOSHI_03'])

In [None]:
# Read created file with pyActigraphy (check if file is a valid AWD file)
raw_03 = pyActigraphy.io.read_raw_awd('UMEBOSHI_03.AWD')

In [None]:
# Visualize data
go.Figure(go.Scatter(x=raw_03.data.index.astype(str),y=raw_03.data))

### UMEBOSHI 04 ("regular acquisition")

In [None]:
# Write data file
create_awd_file('UMEBOSHI_04', headers['UMEBOSHI_04'], data['UMEBOSHI_04'])

In [None]:
# Read created file with pyActigraphy (check if file is a valid AWD file)
raw_04 = pyActigraphy.io.read_raw_awd('UMEBOSHI_04.AWD')

In [None]:
# Visualize data
go.Figure(go.Scatter(x=raw_04.data.index.astype(str),y=raw_04.data))

### UMEBOSHI 05 ("delayed acquisition: false stop")

In [None]:
# Modify the original data:
# False stop (3h)
data['UMEBOSHI_05'].loc[data['UMEBOSHI_05'].index[-1]-pd.Timedelta("3h"):] = 0

In [None]:
# Write data file
create_awd_file('UMEBOSHI_05', headers['UMEBOSHI_05'], data['UMEBOSHI_05'])

In [None]:
# Read created file with pyActigraphy (check if file is a valid AWD file)
raw_05 = pyActigraphy.io.read_raw_awd('UMEBOSHI_05.AWD')

In [None]:
# Visualize data
go.Figure(go.Scatter(x=raw_05.data.index.astype(str),y=raw_05.data))

### UMEBOSHI 06 ("missing data: no wear period (long)")

In [None]:
# Modify the original data:
# Create a long no-wear period at the middle of the recording
middle_time = data['UMEBOSHI_06'].index[len(data['UMEBOSHI_06'])//2]

In [None]:
data['UMEBOSHI_06'].loc[middle_time:middle_time+pd.Timedelta("16h")] = 0

In [None]:
# Write data file
create_awd_file('UMEBOSHI_06', headers['UMEBOSHI_06'], data['UMEBOSHI_06'])

In [None]:
# Read created file with pyActigraphy (check if file is a valid AWD file)
raw_06 = pyActigraphy.io.read_raw_awd('UMEBOSHI_06.AWD')

In [None]:
# Visualize data
go.Figure(go.Scatter(x=raw_06.data.index.astype(str),y=raw_06.data))