# Testing for the readers module

In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


  class WereableData:


In [None]:
import pandas as pd
from fastcore.test import *
from circadian.readers import *

# Pandas Accessor

In [None]:
#| hide
# test WereableData's column validation
df_only_datetime = pd.DataFrame({'datetime': pd.date_range('2020-01-01', periods=10)})
test_fail(lambda: WereableData._validate_columns(df_only_datetime),
          contains="DataFrame must have at least one wereable data column from: ['steps',")
df_only_wereable = pd.DataFrame({'steps': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})
test_fail(lambda: WereableData._validate_columns(df_only_wereable),
          contains="DataFrame must have 'datetime' column")
# test WereableData's metadata validation
test_fail(lambda: WereableData._validate_metadata({'data_id': 1, 'subject_id': 'test'}),
          contains="Metadata values must be strings")
test_fail(lambda: WereableData._validate_metadata({'test': 'test'}),
          contains="Metadata must have at least one of the following keys: data_id, subject_id")
# test column renaming
df = pd.DataFrame({'DateTime': pd.date_range('2020-01-01', periods=10),
                     'Steps': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})
WereableData.rename_columns(df, inplace=True)
test_eq(df.columns.tolist(), ['datetime', 'steps'])
new_df = WereableData.rename_columns(df)
test_eq(new_df.columns.tolist(), ['datetime', 'steps'])
# test is_valid
df = pd.DataFrame({'datetime': pd.date_range('2020-01-01', periods=10),
                   'steps': [0, 10, 0, 0, 0, 0, 20, 28, 0, 0],
                   'heartrate': [120, 90, 100, 80, 120, 100, 140, 130, 120, 110]})
test_eq(df.wereable.is_valid(), True)
df = pd.DataFrame({'Datetime': pd.date_range('2020-01-01', periods=10),
                   'Steps': [0, 10, 0, 0, 0, 0, 20, 28, 0, 0],
                   'Heartrate': [120, 90, 100, 80, 120, 100, 140, 130, 120, 110]})
test_fail(lambda: df.wereable.is_valid(),
          contains="DataFrame must have 'datetime'")
# test add_metadata
df = pd.DataFrame({'datetime': pd.date_range('2020-01-01', '2020-01-02', periods=10),
                   'steps': [0, 10, 0, 0, 0, 0, 20, 28, 0, 0],
                   'heartrate': [120, 90, 100, 80, 120, 100, 140, 130, 120, 110]})
df.wereable.add_metadata({'data_id': 'test', 'subject_id': 'test'}, inplace=True)
test_eq(df.attrs['data_id'], 'test')
test_eq(df.attrs['subject_id'], 'test')

# Load files

In [None]:
#| hide
# tests for load_json
# test error handling
test_fail(lambda: load_json(1), contains="Filepath must be a string.")
data_path = '../../circadian/sample_data/sample_data.json'
test_fail(lambda: load_json(data_path, metadata={'data_id': 1, 'subject_id': 'test'}),
            contains="Metadata values must be strings.")
# TODO: test when all keys are not valid
# TODO: test that excluded message is printed
# test loading a json
df_dict = load_json(data_path)
df_dict = load_json(data_path, metadata={'data_id': 'sample_data', 'subject_id': 'sample_subject'})
# test that metadata was added
test_eq(df_dict['steps'].attrs['data_id'], 'sample_data')
test_eq(df_dict['steps'].attrs['subject_id'], 'sample_subject')
# test that datetime column was added
test_eq('datetime' in df_dict['heartrate'].columns, True)
test_eq('start' in df_dict['steps'].columns, True)

In [None]:
#| hide
# test loading a csv
data_path = '../../circadian/sample_data/sample_data.csv'
df = load_csv(data_path, timestamp_col='timestamp')
df = load_csv(data_path, 
              metadata={'data_id': 'sample_data', 'subject_id': 'sample_subject'},
              timestamp_col='timestamp')
# test loading a csv with no datetime column
test_fail(lambda: load_csv(data_path), 
          contains="CSV file must have a column named 'datetime' or a timestamp column must be provided.")
test_fail(lambda: load_csv(data_path, metadata={'data_id': '1', 'subject_id': 'sample_subject'}),
          contains="CSV file must have a column named 'datetime' or a timestamp column must be provided.")
# test input validation
test_fail(lambda: load_csv(1),
          contains="Filepath must be a string.")
test_fail(lambda: load_csv(data_path, timestamp_col=1),
          contains="Timestamp column must be a string.")
test_fail(lambda: load_csv(data_path, metadata=1),
          contains="Metadata must be a dictionary.")

In [None]:
#| hide
# test loading an actiwatch csv
data_path = '../../circadian/sample_data/sample_actiwatch.csv'
df = load_actiwatch(data_path)
# test input validation
test_fail(lambda: load_actiwatch(1),
          contains="Filepath must be a string.")
test_fail(lambda: load_actiwatch(data_path, metadata=1),
          contains="Metadata must be a dictionary.")

# Resampling

In [None]:
#| hide
# test resampling wereable dataframes
# interval data
df = pd.DataFrame({
    'start': [
        pd.to_datetime('2020-01-01 00:00:00'),
        pd.to_datetime('2020-01-01 00:05:00'),
        pd.to_datetime('2020-01-01 00:10:00'),
        pd.to_datetime('2020-01-01 00:20:00'),
        pd.to_datetime('2020-01-01 00:50:00'),
    ],
    'end': [
        pd.to_datetime('2020-01-01 00:04:00'),
        pd.to_datetime('2020-01-01 00:10:00'),
        pd.to_datetime('2020-01-01 00:13:00'),
        pd.to_datetime('2020-01-01 01:00:00'),
        pd.to_datetime('2020-01-01 01:05:00'),
    ],
    'steps': [4, 10, 9, 22, 15],
})
name = 'steps'
method = 'sum'
# 20 min freq
twenty_min_groud_truth = pd.DataFrame({
    'datetime': [
        pd.to_datetime('2020-01-01 00:00:00'),
        pd.to_datetime('2020-01-01 00:20:00'),
        pd.to_datetime('2020-01-01 00:40:00'),
        pd.to_datetime('2020-01-01 01:00:00'),
    ],
    'steps': [6.55, 0.55, 1.55, 1],
})
freq = '20min'
new_df = resample_df(df, name, freq, method)
time_diff = (new_df.datetime - new_df.datetime.shift()).unique()
time_diff = time_diff[~pd.isnull(time_diff)][0]
test_eq(time_diff.seconds / 60.0, 20.0)
test_eq(new_df, twenty_min_groud_truth)
# 1 min freq
freq = '1min'
new_df = resample_df(df, name, freq, method)
time_diff = (new_df.datetime - new_df.datetime.shift()).unique()
time_diff = time_diff[~pd.isnull(time_diff)][0]
test_eq(time_diff.seconds / 60.0, 1.0)
# per datetime data
df = pd.DataFrame({
    'datetime': [
        pd.to_datetime('2020-01-01 00:00:00'),
        pd.to_datetime('2020-01-01 00:15:00'),
        pd.to_datetime('2020-01-01 00:22:00'),
        pd.to_datetime('2020-01-01 00:40:00'),
        pd.to_datetime('2020-01-01 01:02:00'),
    ],
    'heartrate': [90, 110, 80, 90, 100],
})
twenty_min_groud_truth = pd.DataFrame({
    'datetime': [
        pd.to_datetime('2020-01-01 00:00:00'),
        pd.to_datetime('2020-01-01 00:20:00'),
        pd.to_datetime('2020-01-01 00:40:00'),
        pd.to_datetime('2020-01-01 01:00:00'),
    ],
    'heartrate': [100.0, 85.0, 90.0, 100.0],
})
name = 'heartrate'
method = 'mean'
# 20 min freq
freq = '20min'
new_df = resample_df(df, name, freq, method)
time_diff = (new_df.datetime - new_df.datetime.shift()).unique()
time_diff = time_diff[~pd.isnull(time_diff)][0]
test_eq(time_diff.seconds / 60.0, 20.0)
test_eq(new_df, twenty_min_groud_truth)

# Combine

In [None]:
#| hide
# test combining wereable dataframes
steps_df = pd.DataFrame({
    'start': [
        pd.to_datetime('2020-01-01 00:01:00'),
        pd.to_datetime('2020-01-01 00:11:00'),
        pd.to_datetime('2020-01-01 00:15:00'),
        pd.to_datetime('2020-01-01 00:20:00'),
        pd.to_datetime('2020-01-01 00:50:00'),
    ],
    'end': [
        pd.to_datetime('2020-01-01 00:11:00'),
        pd.to_datetime('2020-01-01 00:15:00'),
        pd.to_datetime('2020-01-01 00:25:00'),
        pd.to_datetime('2020-01-01 00:30:00'),
        pd.to_datetime('2020-01-01 01:00:00'),
    ],
    'steps': [9, 18, 15, 30, 10],
})
heartrate_df = pd.DataFrame({
    'datetime': [
        pd.to_datetime('2020-01-01 00:09:00'),
        pd.to_datetime('2020-01-01 00:15:00'),
        pd.to_datetime('2020-01-01 00:22:00'),
        pd.to_datetime('2020-01-01 00:40:00'),
        pd.to_datetime('2020-01-01 00:58:00'),
    ],
    'heartrate': [90, 110, 80, 90, 100],
})
df_dict = {
    'steps': steps_df,
    'heartrate': heartrate_df,
}
metadata = {'data_id': 'combined_dataframe', 'subject_id': 'test'}
resample_freq = '10min'
df = combine_wereable_dataframes(df_dict, metadata, resample_freq)
ground_truth = pd.DataFrame({
    'datetime': [
        pd.to_datetime('2020-01-01 00:01:00'),
        pd.to_datetime('2020-01-01 00:11:00'),
        pd.to_datetime('2020-01-01 00:21:00'),
        pd.to_datetime('2020-01-01 00:31:00'),
        pd.to_datetime('2020-01-01 00:41:00'),
        pd.to_datetime('2020-01-01 00:51:00'),
    ],
    'steps': [5.4, 9.0, 4.5, 0.0, 1.0, 1.0],
    'heartrate': [90.0, 110.0, 80.0, 90.0, 0.0, 100.0]
})
test_eq(df, ground_truth)