In [1]:
%load_ext autoreload 
%autoreload 2

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import config

from preprocessing import dataPreProcess

In [11]:
def read_files(activity):
    dfs = []
    for file in config.DATA_FILES[activity]:
        filepath = Path(config.DATA_FOLDER, activity, file);
        with open(filepath, 'r') as file:
            raw_data = file.readlines()
            dfs.append((dataPreProcess(raw_data), activity))
            
    return dfs

In [10]:
def split_dataframes(df, sample_count):
    dfs = np.array_split(df, range(sample_count, len(df), sample_count), axis = 0)
    dfs = [_df.copy().reset_index(drop = True) for _df in dfs]
    return dfs

In [19]:
def preprocess(dfs, sample_count=20):
    preprocessed_dfs = []
    all_labels = []
    for df, label in dfs:
        split_df = split_dataframes(df, sample_count)
        preprocessed_dfs.extend(split_df)
        all_labels.extend([label for _ in range(len(split_df))])
        
    return preprocessed_dfs, all_labels

In [26]:
def write_dfs(dfs, labels):
    for index, df in enumerate(dfs):
        folder = Path(config.DESTINATION_FOLDER, labels[index])
        folder.mkdir(parents = True, exist_ok = True)
        df.to_csv(Path(folder, f'{labels[index]}_{index}.csv'), index = False)

In [20]:
idle_dfs = read_files('idle')

In [21]:
jogging_dfs = read_files('jogging')

In [22]:
after_exer_dfs = read_files('after_exercise')

In [31]:
exercise_dfs = read_files('exercise')

There are no valid SPO values, returning None


In [34]:
exercise_dfs

[(None, 'exercise'),
 (      HeartRate  SPO_Values
  0         115.0       100.0
  17        117.5       100.0
  42        120.0       100.0
  67        122.5       100.0
  117       125.0       100.0
  ...         ...         ...
  5242      115.0        92.0
  5267      132.5        92.0
  5292      150.0        92.0
  5317      115.0        92.0
  5342      136.0        92.0
  
  [182 rows x 2 columns],
  'exercise'),
 (       HeartRate  SPO_Values
  0     100.000000        89.0
  26    136.000000        89.0
  51    140.666667        89.0
  101   145.333333        89.0
  151   150.000000        89.0
  ...          ...         ...
  4788  136.000000        95.5
  4813  150.000000        96.0
  4838  125.000000        96.5
  4888  125.000000        97.0
  4913  125.000000        97.0
  
  [155 rows x 2 columns],
  'exercise')]

In [32]:
walking_dfs = read_files('walking')

In [35]:
[len(df) for df,_ in idle_dfs], [len(df) for df,_ in jogging_dfs], [len(df) for df,_ in after_exer_dfs], [len(df) for df,_ in exercise_dfs[1:]], [len(df) for df,_ in walking_dfs]

([128, 151, 148], [193, 190, 184, 347], [147, 180], [182, 155], [169, 169])

In [36]:
preprocessed_dfs, all_labels = preprocess(idle_dfs + jogging_dfs + after_exer_dfs + exercise_dfs[1:] + walking_dfs, 40)

In [37]:
write_dfs(preprocessed_dfs, all_labels)