In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

root_dir = '/content/gdrive/MyDrive/Data'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
import os
import random
import shutil
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
import tensorflow as tf
from tensorflow import keras

In [25]:
from pandas.core.frame import DataFrame
from time import sleep

def PreprocessAWSFile(aws_file, sheet_name):
  aws = pd.read_excel(aws_file, sheet_name)
  aws = aws.drop(['Line', 'Epoch', 'Day', 'Seconds', 'Off-Wrist Status', 'Marker', 
                  'White Light', 'Red Light', 'Green Light', 'Blue Light', 
                  'Sleep/Wake', 'Mobility', 'Interval Status', 'S/W Status'], axis=1)

  aws_complete_minute = pd.DataFrame(aws['Activity'])
  aws_complete_minute['Label'] = aws.apply(
      lambda x: str(x['Date'])[:10] + ' ' + x['Time'].strftime("%H:%M:%S"), axis=1)

  aws_complete_half_minute = pd.DataFrame({'Activity': [np.nan] * aws.shape[0]})
  aws_complete_half_minute['Label'] = aws.apply(
      lambda x: str(x['Date'])[:10] + ' ' + x['Time'].strftime("%H:%M")+':30', axis=1)

  aws = pd.concat([aws_complete_half_minute, aws_complete_minute]).sort_values(
      by=['Label']).reset_index(drop=True)

  aws['Activity'] = aws['Activity'].interpolate().replace(np.nan, 0)
  return aws

def PreprocessAX3File(ax3_file, chunks = 2):
  ax3 = pd.read_csv(ax3_file, header=None, squeeze=True)
  section_size = int(ax3.shape[0] / chunks)
  for i in range(chunks):
    ax3_processed_part = PreprocessAX3Data(ax3[i * section_size: section_size * (i + 1)])
    ax3_processed_part.to_pickle('part' + str(i+1) + '.pkl')
    ax3_processed_part = None
    print(i+1)
    sleep(60) # Waiting for RAM to get emptied
  ax3 = None

def GetProcessedAX3Data(chunks = 2):
  ax3 = None
  for i in range(chunks):
    file_name = 'part' + str(i+1) + '.pkl'
    temp = pd.read_pickle(file_name)
    ax3 = pd.concat([ax3, temp])
    os.remove(file_name)
    temp = 0
  ax3 = ax3.reset_index()
  return ax3

def PreprocessAX3Data(ax3):
  ax3.columns = (['Date_time', 'X', 'Y', 'Z', 'Light', 'Temp'])
  ax3 = ax3.drop(["Light"], axis=1)
  ax3 = ax3[:-1]

  ax3['Label'] = ax3['Date_time'].map(lambda x : 
                x[0:17] + str(int(float(x[17:])>=30)*3) + "0")

  ax3 = ax3.groupby(['Label']).agg({'X': lambda x : list(x), 
                                    'Y': lambda x : list(x), 
                                    'Z': lambda x : list(x), 
                                    'Temp': lambda x : list(x)}).reset_index()
  print(len(ax3.iloc[10,2]))
  return ax3

def FixArraySize(arr):
  arr = arr[:3000]
  arr.extend(max(3000 - len(arr), 0) * [np.nan])
  arr = pd.Series(arr).interpolate()
  return arr.tolist()

def FixAX3ListValueSize(ax3):
  ax3['X'] = ax3['X'].apply(lambda x: FixArraySize(x))
  ax3['Y'] = ax3['Y'].apply(lambda x: FixArraySize(x))
  ax3['Z'] = ax3['Z'].apply(lambda x: FixArraySize(x))
  ax3['Temp'] = ax3['Temp'].apply(lambda x: FixArraySize(x))
  return ax3

Merging AX3 and AWS Data

In [11]:
ax3_extracted_dir = os.path.join(root_dir, "18 participants/AX3/Extracted")
ax3_saving_dir = os.path.join(root_dir, "18 participants/AX3/Preprocessed")
aws_file = os.path.join(root_dir, "18 participants/AWS/SDRI001_AWS_ALL_ALLV4_N1.xlsx")
psg_dir = os.path.join(root_dir, "18 participants/PSG")

participant = 13

aws = PreprocessAWSFile(aws_file, participant - 1)
ax3_file = os.path.join(ax3_extracted_dir, 
                        "AX3_step1_sub" + str(participant) + ".csv")
save_file = os.path.join(ax3_saving_dir, 
                        "AX3_step1_sub" + str(participant) + ".pkl")
PreprocessAX3File(ax3_file, 4)
sleep(60)
ax3 = GetProcessedAX3Data(4)
sleep(60)
ax3 = FixAX3ListValueSize(ax3)
merged = pd.merge(ax3, aws, on="Label")
merged.to_pickle(save_file)
print(merged.head(5))



3064
1
3052
2
3046
3
3028
4
Empty DataFrame
Columns: [index, Label, X, Y, Z, Temp, Activity]
Index: []


In [27]:
aws = PreprocessAWSFile(aws_file, 12)
print(aws.head(5))



   Activity                Label
0       0.0  2020-02-28 14:16:00
1       0.0  2020-02-28 14:16:30
2       0.0  2020-02-28 14:17:00
3       0.0  2020-02-28 14:17:30
4       0.0  2020-02-28 14:18:00


In [28]:
merged = pd.merge(ax3, aws, on="Label")
merged.to_pickle(save_file)
print(merged.head(5))
print(aws.head(5))


   index                Label  \
0     86  2020-02-28 14:16:00   
1     87  2020-02-28 14:16:30   
2     88  2020-02-28 14:17:00   
3     89  2020-02-28 14:17:30   
4     90  2020-02-28 14:18:00   

                                                   X  \
0  [0.9375, 0.9375, 0.9375, 0.9375, 0.9375, 0.937...   
1  [0.46875, 0.4375, 0.4375, 0.4375, 0.453125, 0....   
2  [0.9375, 0.921875, 0.921875, 0.921875, 0.92187...   
3  [0.921875, 0.921875, 0.921875, 0.921875, 0.937...   
4  [0.921875, 0.921875, 0.90625, 0.90625, 0.90625...   

                                                   Y  \
0  [0.078125, 0.078125, 0.078125, 0.078125, 0.078...   
1  [0.734375, 0.765625, 0.78125, 0.78125, 0.76562...   
2  [0.15625, 0.15625, 0.15625, 0.15625, 0.15625, ...   
3  [0.125, 0.125, 0.125, 0.125, 0.109375, 0.10937...   
4  [0.109375, 0.09375, 0.09375, 0.109375, 0.10937...   

                                                   Z  \
0  [0.171875, 0.171875, 0.171875, 0.171875, 0.187...   
1  [0.125, 0.09