# UCI Daphnet dataset (Freezing of gait for Parkinson's disease patients)

In [1]:
import numpy as np
import pandas as pd
import os
from typing import List
from pathlib import Path
from config import data_raw_folder, data_processed_folder
from timeeval import Datasets
import matplotlib
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline
plt.rcParams['figure.figsize'] = (20, 10)

In [13]:
dataset_collection_name = "Daphnet"
source_folder = Path(data_raw_folder) / "UCI ML Repository/Daphnet/dataset"
target_folder = Path(data_processed_folder)

print(f"Looking for source datasets in {source_folder.absolute()} and\nsaving processed datasets in {target_folder.absolute()}")

Looking for source datasets in /home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset and
saving processed datasets in /home/projects/akita/data/benchmark-data/data-processed


In [9]:
train_type = "unsupervised"
train_is_normal = False
input_type = "multivariate"
datetime_index = True
dataset_type = "real"

# create target directory
dataset_subfolder = os.path.join(input_type, dataset_collection_name)
target_subfolder = os.path.join(target_folder, dataset_subfolder)
try:
    os.makedirs(target_subfolder)
    print(f"Created directories {target_subfolder}")
except FileExistsError:
    print(f"Directories {target_subfolder} already exist")
    pass

dm = Datasets(target_folder)
experiments = [f for f in source_folder.iterdir()]
experiments

Directories /home/projects/akita/data/benchmark-data/data-processed/multivariate/Daphnet already exist


[PosixPath('/home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S02R02.txt'),
 PosixPath('/home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S03R03.txt'),
 PosixPath('/home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S10R01.txt'),
 PosixPath('/home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S01R02.txt'),
 PosixPath('/home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S09R01.txt'),
 PosixPath('/home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S07R01.txt'),
 PosixPath('/home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S06R01.txt'),
 PosixPath('/home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S03R01.txt'),
 PosixPath('/home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S02R01.txt'),
 PosixPath

In [10]:
columns = ["timestamp", "ankle_horiz_fwd", "ankle_vert", "ankle_horiz_lateral", "leg_horiz_fwd", "leg_vert", "leg_horiz_lateral",
          "trunk_horiz_fwd", "trunk_vert", "trunk_horiz_lateral", "is_anomaly"]

def transform_experiment_file(path: Path) -> List[pd.DataFrame]:
    df = pd.read_csv(path, sep=" ", header=None)
    df.columns = columns
    df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms")
    # slice out experiments (0 annotation shows unrelated data points (preparation/briefing/...))
    s_group = df["is_anomaly"].isin([1, 2])
    s_diff = s_group.shift(-1) - s_group

    starts = (df[s_diff == 1].index + 1).values  # first point has annotation 0 --> index + 1
    ends = df[s_diff == -1].index.values
    
    dfs = []
    for start, end in zip(starts, ends):
        df1 = df.iloc[start:end].copy()
        df1["is_anomaly"] = (df1["is_anomaly"] == 2).astype(int)
        dfs.append(df1)
    return dfs

In [11]:
for exp in experiments:
    # transform file to get datasets
    datasets = transform_experiment_file(exp)
    for i, df in enumerate(datasets):
        # get target filenames
        experiment_name = os.path.splitext(exp.name)[0]
        dataset_name = f"{experiment_name}E{i}"
        filename = f"{dataset_name}.test.csv"
        path = os.path.join(dataset_subfolder, filename)
        target_filepath = os.path.join(target_subfolder, filename)

        # calc length and save in file
        dataset_length = len(df)
        df.to_csv(target_filepath, index=False)
        print(f"Processed source dataset {exp} -> {target_filepath}")

        # save metadata
        dm.add_dataset((dataset_collection_name, dataset_name),
            train_path = None,
            test_path = path,
            dataset_type = dataset_type,
            datetime_index = datetime_index,
            split_at = None,
            train_type = train_type,
            train_is_normal = train_is_normal,
            input_type = input_type,
            dataset_length = dataset_length
        )

dm.save()

Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S02R02.txt -> /home/projects/akita/data/benchmark-data/data-processed/multivariate/Daphnet/S02R02E0.test.csv
Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S03R03.txt -> /home/projects/akita/data/benchmark-data/data-processed/multivariate/Daphnet/S03R03E0.test.csv
Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S10R01.txt -> /home/projects/akita/data/benchmark-data/data-processed/multivariate/Daphnet/S10R01E0.test.csv
Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S10R01.txt -> /home/projects/akita/data/benchmark-data/data-processed/multivariate/Daphnet/S10R01E1.test.csv
Processed source dataset /home/projects/akita/data/benchmark-data/data-raw/UCI ML Repository/Daphnet/dataset/S01R02.txt -> /home

In [12]:
dm.refresh()
dm.df().loc[(slice(dataset_collection_name,dataset_collection_name), slice(None))]

Unnamed: 0_level_0,Unnamed: 1_level_0,train_path,test_path,dataset_type,datetime_index,split_at,train_type,train_is_normal,input_type,length
collection_name,dataset_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Daphnet,S01R01E0,,multivariate/Daphnet/S01R01E0.test.csv,real,True,,unsupervised,False,multivariate,19200
Daphnet,S01R01E1,,multivariate/Daphnet/S01R01E1.test.csv,real,True,,unsupervised,False,multivariate,73600
Daphnet,S01R02E0,,multivariate/Daphnet/S01R02E0.test.csv,real,True,,unsupervised,False,multivariate,28800
Daphnet,S02R01E0,,multivariate/Daphnet/S02R01E0.test.csv,real,True,,unsupervised,False,multivariate,25600
Daphnet,S02R02E0,,multivariate/Daphnet/S02R02E0.test.csv,real,True,,unsupervised,False,multivariate,64960
Daphnet,S03R01E0,,multivariate/Daphnet/S03R01E0.test.csv,real,True,,unsupervised,False,multivariate,55040
Daphnet,S03R01E1,,multivariate/Daphnet/S03R01E1.test.csv,real,True,,unsupervised,False,multivariate,35840
Daphnet,S03R02E0,,multivariate/Daphnet/S03R02E0.test.csv,real,True,,unsupervised,False,multivariate,16640
Daphnet,S03R03E0,,multivariate/Daphnet/S03R03E0.test.csv,real,True,,unsupervised,False,multivariate,21120
Daphnet,S04R01E0,,multivariate/Daphnet/S04R01E0.test.csv,real,True,,unsupervised,False,multivariate,99840


## Experimentation

Annotations

- `0`: not part of the experiment.
  For instance the sensors are installed on the user or the user is performing activities unrelated to the experimental protocol, such as debriefing
- `1`: experiment, no freeze (can be any of stand, walk, turn)
- `2`: freeze

In [None]:
columns = ["timestamp", "ankle_horiz_fwd", "ankle_vert", "ankle_horiz_lateral", "leg_horiz_fwd", "leg_vert", "leg_horiz_lateral",
          "trunk_horiz_fwd", "trunk_vert", "trunk_horiz_lateral", "annotation"]
df1 = pd.read_csv(source_folder / "S01R01.txt", sep=' ', header=None)
df1.columns = columns
df1["timestamp"] = pd.to_datetime(df1["timestamp"], unit="ms")
df1

In [None]:
columns = [c for c in columns if c not in ["timestamp", "annotation"]]
df_plot = df1.set_index("timestamp", drop=True)#.loc["1970-01-01 00:15:00":"1970-01-01 00:16:00"]
df_plot.plot(y=columns, figsize=(20,10))
df_plot["annotation"].plot(secondary_y=True)
plt.legend()
plt.show()

In [None]:
s_group = df1["annotation"].isin([1, 2])
s_diff = s_group.shift(-1) - s_group

starts = (df1[s_diff == 1].index + 1).values
ends = df1[s_diff == -1].index.values
starts, ends

In [None]:
dfs = [df1.iloc[start:end] for start, end in zip(starts, ends)]
len(dfs)

In [None]:
columns = [c for c in columns if c not in ["timestamp", "annotation"]]
for df in dfs:
    df = df.set_index("timestamp", drop=True)
    df.plot(y=columns, figsize=(20,10))
    df["annotation"].plot(secondary_y=True)
plt.show()