# Data Cleaning

For each IMU file, clean the IMU data, adjust the labels, and output these as CSV files.

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook

import pathlib
import matplotlib.pyplot as pyplot
import pandas as pd

import numpy as np
from numpy import mean
from numpy import std

from scipy import signal
from scipy.fft import fft, fftfreq, rfft, rfftfreq

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier
import joblib

from src.data.labels_util import (
    get_labels_data, LabelCol, get_workouts_row_bounds, get_workouts_epoch_bounds, get_workouts_sensor
)
from src.data.imu_util import (
    get_sensor_file, ImuCol, load_imu_data, Sensor, fix_epoch, resample_uniformly, time_to_row_range, get_data_chunk,
    normalize_with_bounds, data_to_features, list_imu_abspaths, clean_imu_data
)
from src.data.data_util import find_nearest, find_nearest_index, shift, low_pass_filter, add_col
from src.data.enums import Activity
from src.data.make_dataset import main as make_dataset
from src.visualization.visualize import multiplot
from src.config import CLEAN_DIR, CLEAN_SUFFIX

# import data types
from pandas import DataFrame
from numpy import ndarray
from typing import List, Tuple, Optional

## Clean IMU data

In [2]:
make_dataset()

Cleaning file '10L_2020-08-13T09.44.56.359_D592640E9A77_Accelerometer.csv'...
Cleaning file '17R_2020-08-13T10.34.27.662_F7025F7111F1_Accelerometer.csv'...
Cleaning file '11L_2020-08-13T09.48.23.503_E8E376103A59_Accelerometer.csv'...
Cleaning file '16R_2020-08-13T10.51.24.151_F14724A81B5E_Accelerometer.csv'...
Cleaning file '4R_2020-08-13T09.37.23.452_C47D949C3933_Accelerometer.csv'...
Cleaning file '12L_2020-08-13T14.54.40.903_FD3613415C08_Accelerometer.csv'...
Cleaning file '9L_2020-08-13T09.45.57.395_C9936E243F1B_Accelerometer.csv'...
Cleaning file '1R_2020-08-12T10.42.36.957_E6506ED62C8A_Accelerometer.csv'...
Cleaning file '9R_2020-08-09T15.16.11.857_DC1A3C126A85_Accelerometer.csv'...
Cleaning file '14L_2020-08-13T09.43.57.053_DFA47D398ABD_Accelerometer.csv'...
Cleaning file '7R_2020-08-13T09.40.34.504_D04FC054D444_Accelerometer.csv'...
Cleaning file '1L_2020-08-12T14.17.19.067_DC5656563C03_Accelerometer.csv'...
Cleaning file '15R_2020-08-13T09.48.26.452_E4A89F39693B_Accelerometer.