# Data Cleaning

For each IMU file, clean the IMU data, adjust the labels, and output these as CSV files.

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook

import pathlib
import matplotlib.pyplot as pyplot
import pandas as pd

import numpy as np
from numpy import mean
from numpy import std

from scipy import signal
from scipy.fft import fft, fftfreq, rfft, rfftfreq

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier
import joblib

from data_processing.labels_util import get_labels_data, LabelCol, get_workouts_row_bounds, get_workouts_epoch_bounds, get_workouts_sensor
from data_processing.imu_util import (
    get_sensor_file, ImuCol, load_imu_data, Sensor, fix_epoch, resample_uniformly, time_to_row_range, get_data_chunk,
    normalize_with_bounds, data_to_features, list_imu_abspaths, clean_imu_data
)
from data_processing.data_util import find_nearest, find_nearest_index, shift, low_pass_filter, add_col
from data_processing.enums import Activity
from visualization.visualize import multiplot
from config import CLEAN_DIR, CLEAN_SUFFIX

# import data types
from pandas import DataFrame
from numpy import ndarray
from typing import List, Tuple, Optional

## Clean IMU data

In [2]:
sensor_files = list_imu_abspaths(sensor_type=Sensor.Accelerometer)

for file in sensor_files:
    print("Cleaning file '%s'..." % file.name)
    
    raw_imu = load_imu_data(file)
    
    clean_imu = clean_imu_data(raw_imu)

    np.savetxt(CLEAN_DIR / ("%s%s.csv" % (file.stem, CLEAN_SUFFIX)), clean_imu, delimiter=",")

Cleaning file '10L_2020-08-13T09.44.56.359_D592640E9A77_Accelerometer.csv'...
