<a href="https://colab.research.google.com/github/Niloy28/NurseActivityRecognition/blob/master/lab_data_preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Download and extract the dataset

URL = "https://ieee-dataport.s3.amazonaws.com/open/11167/Training.zip?response-content-disposition=attachment%3B%20filename%3D%22Training.zip%22&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJOHYI4KJCE6Q7MIQ%2F20200616%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20200616T082632Z&X-Amz-SignedHeaders=Host&X-Amz-Expires=86400&X-Amz-Signature=945354b7acb38f2172b8978a73ceffbf5943e91a14442c7314dbb666430fb079" #@param {type : "string"} 
savepath = "Training.zip" #@param {type : 'string'}
extractpath = "/content/dataset/" #@param {type : 'string'}

from zipfile import ZipFile
from urllib.request import urlretrieve
import os

if not os.path.isfile(savepath):
  urlretrieve(URL, savepath)
with ZipFile(savepath, 'r') as zip_file:
  zip_file.extractall(extractpath)

!rm -rf /content/sample_data/

In [None]:
#@title Install Dependencies
from google.colab import files

in_file = files.upload()

if len(in_file.keys()) == 1:
  for fn in in_file.keys():
    requirement = "/content/" + str(fn)

!pip install -r $requirement

<h1> Import Dependencies </h1>

In [1]:
import numpy as np
import pandas as pd
import math
import datetime
import dateutil

In [2]:
#@title Loading the lab dataset
accel_dataset_file = "/content/drive/My Drive/nurse care data/training/Lab/bigact_raw_lab_acc.csv" #@param {type : "string"}
label_dataset_file = "/content/drive/My Drive/nurse care data/training/Lab/labels_lab_2users.csv" #@param {type : "string"}
ignore_seconds = True #@param {type : "boolean"}

accel_dataset = pd.read_csv(accel_dataset_file)
accel_dataset['datetime'] = [x[0:-5] for x in accel_dataset['datetime']]
accel_dataset['datetime'] = pd.to_datetime(accel_dataset['datetime'])

label_dataset = pd.read_csv(label_dataset_file, na_filter=False, parse_dates=[2, 3], infer_datetime_format=True)

if ignore_seconds:
    accel_dataset['datetime'] = [pd.Timestamp.replace(x, second=0, microsecond=0) for x in accel_dataset['datetime']]
    label_dataset['start'] = [pd.Timestamp.replace(x, second=0, microsecond=0) for x in label_dataset['start']]
    label_dataset['finish'] = [pd.Timestamp.replace(x, second=0, microsecond=0) for x in label_dataset['finish']]


## Data Processing

In [3]:
accel_user1 = accel_dataset[accel_dataset['user_id'] == 1]
accel_user19 = accel_dataset[accel_dataset['user_id'] == 19]

label_user1 = label_dataset[label_dataset['user_id'] == 1]
label_user19 = label_dataset[label_dataset['user_id'] == 19]

label_user1.reset_index(drop=True, inplace=True)
label_user19.reset_index(drop=True, inplace=True)
accel_user1.reset_index(drop=True, inplace=True)
accel_user19.reset_index(drop=True, inplace=True)

### Extract accelerometer data that corresponds to label datetime


In [4]:
final_aligned_dataset = []

# user 1
for i in label_user1.index.values:
  start_date = label_user1.at[i, 'start']
  end_date = label_user1.at[i, 'finish']

  mask = ((accel_user1['datetime']  >= start_date) & (accel_user1['datetime'] <= end_date))
  
  masked_dataset = accel_user1.loc[mask].loc[:, ['x', 'y', 'z']]
  if not masked_dataset.empty:
    act_series = pd.Series(label_user1.iat[i, 1]).repeat(masked_dataset.shape[0])
    
    # must reset index for concat to succeed
    act_series.reset_index(drop=True, inplace=True)
    masked_dataset.reset_index(drop=True, inplace=True)
    
    chunk = pd.concat([masked_dataset, act_series], ignore_index=True, axis=1)
    final_aligned_dataset.append(chunk)

# user 19
for i in label_user19.index.values:
  start_date = label_user19.at[i, 'start']
  end_date = label_user19.at[i, 'finish']

  mask = ((accel_user19['datetime']  >= start_date) & (accel_user19['datetime'] <= end_date))
  
  masked_dataset = accel_user19.loc[mask].loc[:, ['x', 'y', 'z']]
  if not masked_dataset.empty:
    act_series = pd.Series(label_user19.iat[i, 1]).repeat(masked_dataset.shape[0])
    
    # must reset index for concat to succeed
    act_series.reset_index(drop=True, inplace=True)
    masked_dataset.reset_index(drop=True, inplace=True)
    
    chunk = pd.concat([masked_dataset, act_series], ignore_index=True, axis=1)
    final_aligned_dataset.append(chunk)

### Generate aligned dataframe

In [5]:
final_aligned_dataset = pd.concat(final_aligned_dataset)
final_aligned_dataset.columns = ['x', 'y', 'z', 'act_id']
final_aligned_dataset.drop_duplicates(inplace=True, ignore_index=True)

In [6]:
#@title Save aligned data as csv

savepath = "/content/processed" #@param {type : 'string'}
savename = "final_lab.csv" #@param {type : 'string'}

import os
os.makedirs(savepath, exist_ok=True)

complete_savename = savepath + "/" + savename
final_aligned_dataset.to_csv(complete_savename, index=False)