In [0]:
#@title Download and extract the dataset

URL = "https://ieee-dataport.s3.amazonaws.com/open/11167/Training.zip?response-content-disposition=attachment%3B%20filename%3D%22Training.zip%22&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJOHYI4KJCE6Q7MIQ%2F20200616%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20200616T082632Z&X-Amz-SignedHeaders=Host&X-Amz-Expires=86400&X-Amz-Signature=945354b7acb38f2172b8978a73ceffbf5943e91a14442c7314dbb666430fb079" #@param {type : "string"} 
savepath = "Training.zip" #@param {type : 'string'}
extractpath = "/content/dataset/" #@param {type : 'string'}

from zipfile import ZipFile
from urllib.request import urlretrieve
import os

if not os.path.isfile(savepath):
  urlretrieve(URL, savepath)
with ZipFile(savepath, 'r') as zip_file:
  zip_file.extractall(extractpath)

!rm -rf /content/sample_data/

In [0]:
#@title Install Dependencies
from google.colab import files

in_file = files.upload()

if len(in_file.keys()) == 1:
  for fn in in_file.keys():
    requirement = "/content/" + str(fn)

!pip install -r $requirement

<h1> Import Dependencies </h1>

In [0]:
import numpy as np
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw
from dtaidistance import dtw
import matplotlib.pyplot as plt
import math
import datetime
import dateutil
import csv
import array
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from numpy import savetxt

# Loading the dataset

In [0]:
# lab data
lab_accel_dataset = pd.read_csv("/content/dataset/Lab/bigact_raw_lab_acc.csv", na_filter=False, parse_dates=[1], infer_datetime_format=True, date_parser=lambda col : pd.to_datetime(col, utc=True))
lab_label_dataset = pd.read_csv("/content/dataset/Lab/labels_lab_2users.csv", na_filter=False, parse_dates=[2, 3], infer_datetime_format=True, date_parser=lambda col : pd.to_datetime(col, utc=True))

In [0]:
print(lab_accel_dataset)
print(lab_label_dataset)

# Data Processing

<h3> Sort the values according to datetime </h3>

In [0]:
lab_accel_dataset = pd.DataFrame.sort_values(lab_accel_dataset, ['datetime'], ignore_index=True)
lab_label_dataset = pd.DataFrame.sort_values(lab_label_dataset, ['start', 'finish'], ignore_index=True)

<h3> Truncate rows from accelerometer dataset whose datetime that do not correspond with label dataset datetime </h3>

In [0]:
lab_label_first_entry = lab_label_dataset.iloc[0].loc['start']
lab_label_last_entry = lab_label_dataset.iloc[-1].loc['start']

trunc_lab_accel_dataset = lab_accel_dataset[lab_accel_dataset['datetime'] >= lab_label_first_entry]
trunc_lab_accel_dataset = trunc_lab_accel_dataset[trunc_lab_accel_dataset['datetime'] <= lab_label_last_entry]
trunc_lab_accel_dataset.reset_index(drop=True, inplace=True)

<h3> Extract accelerometer data that corresponds to label datetime </h3>

In [0]:
final_aligned_dataset = []

for i in lab_label_dataset.index.values:
  start_date = lab_label_dataset.at[i, 'start']
  end_date = lab_label_dataset.at[i, 'finish']  

  mask = ((trunc_lab_accel_dataset['datetime']  >= start_date) & (trunc_lab_accel_dataset['datetime'] <= end_date))
  
  search_dataset = trunc_lab_accel_dataset.loc[mask].loc[:, ['x', 'y', 'z']]
  if not search_dataset.empty:
    act_series = pd.Series(lab_label_dataset.at[i, 'act_id']).repeat(search_dataset.shape[0])
    # must reset index for concat to succeed
    act_series.reset_index(drop=True, inplace=True)
    search_dataset.reset_index(drop=True, inplace=True)
    
    chunk = pd.concat([search_dataset, act_series], ignore_index=True, axis=1)
    final_aligned_dataset.append(chunk)

<h3> Generate aligned dataframe </h3>

In [0]:
final_aligned_dataset = pd.concat(final_aligned_dataset)
final_aligned_dataset.columns = ['x', 'y', 'z', 'act_id']

In [0]:
#@title Save aligned data as csv

savepath = "/content/processed" #@param {type : 'string'}
savename = "final_lab_dataset.csv" #@param {type : 'string'}

import os
os.makedirs(savepath, exist_ok=True)

complete_savename = savepath + "/" + savename
final_aligned_dataset.to_csv(complete_savename)