In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import os

# Installation

# Overview

Currently, the package supports
    - Download all types of data
    - format the activity data to the following figure

![alt text](./visualisation/figures/activity_data.png "Title")

Following codes illustrate:

- [How to download weekly data (activity data only)](#weekly_download)

- [How to download arbitrary types of data](#download)

- [How to categories the data](#formatting)

- [How to pre-process the data](#preprocessing)

- [Other utils](#utils)

# Download weekly data <a id='weekly_download'></a>

### download the data

The following script will download the data in the current week and all previous data.
Note:
    - if reload_weekly or reload_all is True, the cooresponding data will be downloaded
    - if reload_weekly and reload_all are False:
        - the code will check the data, and download if it is not exist.

In [3]:
from scirpts.weekly_loader import Weekly_dataloader
loader = Weekly_dataloader(num_days_extended=3)
loader.load_data(reload_weekly=True, reload_all=True)

Deleting Existing export request
Creating new export request
Exporting the  {'raw_activity_pir': {}, 'raw_door_sensor': {}, 'raw_appliance_use': {}, 'device_types': {}}
Waiting the server to complete the job ...
Waiting the server to complete the job ...
Waiting the server to complete the job ...
Job is completed, start to download the data
Start to export job f56539ea-e66f-4f32-99a0-435988a6ec59
Exporting 1/4        device_types         Success
Exporting 2/4        raw_door_sensor      Success
Exporting 3/4        raw_activity_pir     Success
Exporting 4/4        raw_appliance_use    Success
Processing: raw_door_sensor                       Finished in 0.33 seconds
Processing: raw_appliance_use                     Finished in 0.08 seconds
Processing: device_types                          Finished in 0.00 seconds
Processing: raw_activity_pir                      

KeyboardInterrupt: 

The script will save the data into the ./data/weekly_test/**. To access the data:

In [4]:
# Access the formated, all previous activity data
unlabelled = np.load(os.path.join(loader.previous_data, 'unlabelled.npy'))
X = np.load(os.path.join(loader.previous_data, 'X.npy'))
y = np.load(os.path.join(loader.previous_data, 'y.npy'))

To access the weekly data

In [5]:
weekly_data = np.load(os.path.join(loader.weekly_data, 'unlabelled.npy'))
p_ids = np.load(os.path.join(loader.weekly_data, 'patient_id.npy'))
dates = np.load(os.path.join(loader.weekly_data, 'dates.npy'))

# Download data <a id='download'></a>

To download specific types of data:

In [18]:
from download.download import Downloader
data_downloader = Downloader()

To download `raw_activity_pir` since `2020-01-01`

In [19]:
data_downloader.export(since='2021-10-10', reload=True, save_path='./data/yesterday/', categories=['raw_activity_pir'])

Deleting Existing export request
Creating new export request
Exporting the  {'raw_activity_pir': {}, 'device_types': {}}
Waiting the server to complete the job ...
Waiting the server to complete the job ...
Waiting the server to complete the job ...
Job is completed, start to download the data
Start to export job b3da2575-c358-41b0-add6-a91d126ef5bf
Exporting 1/2        device_types         Success
Exporting 2/2        raw_activity_pir     Success


Note: if you want to download the previous data (you have downloaded before), set `reload=False` in case of creating duplicated export request.

In [22]:
data_downloader.export(reload=False)

Multiple export requests exist, please choose one to download
ID:  883ea5e9-84dc-4604-8b2b-c8b358ee1a6d
Transaction Time 2021-09-06T02:31:42.718Z
Export sensors: homes issue patients encounter procedure raw_light raw_light raw_light raw_light device_types raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_sleep_mat raw_heart_rate raw_behavioural raw_body_weight raw_door_sensor raw_door_sensor raw_sleep_event raw_activity_pir raw_activity_pir raw_activity_pir raw_activity_pir raw_activity_pir raw_activity_pir observation_notes raw_appliance_use raw_blood_pressure raw_total_body_fat raw_body_mass_index raw_total_bone_mass raw_body_muscle_mass raw_body_temperature raw_skin_temperature raw_total_body_water raw_wearable_walking raw_ox

KeyboardInterrupt: 

# Categorise the data <a id='formatting'></a>

The data in the specific directory will be filtered and categorised into following:
- physiological_data
- activity_data
- environmental_data

In [25]:
from formatting.formatting import Formatting
formater = Formatting(path='./data/raw_data/')

Processing: raw_sleep_event                       TODO
Processing: raw_body_weight                       Finished in 0.03 seconds
Processing: homes                                 TODO
Processing: raw_body_temperature                  Finished in 0.09 seconds
Processing: raw_skin_temperature                  Finished in 0.06 seconds
Processing: raw_door_sensor                       Finished in 1.57 seconds
Processing: raw_body_muscle_mass                  Finished in 0.02 seconds
Processing: raw_appliance_use                     Finished in 0.33 seconds
Processing: raw_heart_rate                        Finished in 0.05 seconds
Processing: raw_oxygen_saturation                 Finished in 0.03 seconds
Processing: raw_behavioural                       TODO
Processing: raw_total_body_fat                    Finished in 0.02 seconds
Processing: procedure                             TODO
Processing: observation_notes                     TODO
Processing: raw_ambient_temperature               

  


Finished in 3.82 seconds
Processing: device_types                          Finished in 0.00 seconds
Processing: encounter                             TODO
Processing: issue                                 TODO
Processing: raw_light                             Finished in 7.92 seconds
Processing: raw_body_mass_index                   Finished in 0.02 seconds
Processing: raw_wearable_walking                  TODO
Processing: raw_blood_pressure                    Finished in 0.05 seconds
Processing: raw_total_body_water                  Finished in 0.02 seconds
Processing: patients                              Finished in 0.00 seconds
Processing: raw_total_bone_mass                   Finished in 0.02 seconds
Processing: raw_activity_pir                      

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmp_data['value'] = tmp_data[value]


Finished in 8.27 seconds
Processing: raw_sleep_mat                         TODO


To access the data:

In [26]:
formater.activity_data

Unnamed: 0,id,time,location,value
0,8rcEsVEy53VBG9eJTMqoK4,2021-04-30T00:04:38.000000Z,front door,1
1,8rcEsVEy53VBG9eJTMqoK4,2021-04-30T00:04:49.000000Z,front door,1
2,8rcEsVEy53VBG9eJTMqoK4,2021-04-30T01:21:13.000000Z,fridge door,1
3,8rcEsVEy53VBG9eJTMqoK4,2021-04-30T01:21:30.000000Z,fridge door,1
4,8rcEsVEy53VBG9eJTMqoK4,2021-04-30T01:31:47.000000Z,fridge door,1
...,...,...,...,...
2695310,JYN9EVX3wyv76VbubFPpUB,2021-10-05T18:06:56.000000Z,kitchen,1
2695311,GUPoZt87k3uxPXtBJX9Q4Y,2021-10-05T18:07:04.000000Z,corridor1,1
2695312,PtSJCv5bDWvZe3f1V7wRgQ,2021-10-05T18:07:08.000000Z,lounge,1
2695313,Mhy2uUxJnCtsEZbToCDDEE,2021-10-05T18:07:11.000000Z,kitchen,1


# Download weekly data <a id='preprocessing'></a>

### activity data

The data will be aggregated hourly by the following script

In [43]:
from formatting.standardisation import standardise_activity_data
activity_data = standardise_activity_data(formater.activity_data)

normalise the data by l2 normalisation and labelled (uti labels)

In [44]:
from dataloader.dataloader import Dataloader
dataloader = Dataloader(activity_data, 3, label_data=True)

To access the unlabelled data

In [45]:
unlabelled_data, unlabelled_patient_ids, unlablled_dates = dataloader.get_unlabelled_data()

To access the labelled data

In [46]:
data, label, patient_ids = [], [], []
for i, j, k in dataloader.iterate_data():
    data.append(i)
    label.append(j)
    patient_ids.append(k)

  return self._getitem_tuple(key)
  return self._getitem_tuple(key)
  return self._getitem_tuple(key)
  return self._getitem_tuple(key)
  return self._getitem_tuple(key)
  return self._getitem_tuple(key)
  return self._getitem_tuple(key)
  return self._getitem_tuple(key)
  return self._getitem_tuple(key)


# Utilisations <a id='utils'></a>

### evaluate

The function will run the model multiple times and
    - split the train/test data by patient ids
    - return a dataframe with mean and variance of sensitivity, specificity, accuracy and f1 score
Note the model must implement:
    - re_initialise(): initialise the model
    - fit: learn the data
    - predict: give predictions 

In [15]:
from evaluate.evaluate_models import evaluate
evaluate(model, X, y, p_ids, num_runs=10)