## Imports

In [1]:
import pickle
from preprocessing import *

## Preprocessing
For data pre-processing, make sure datasets are extracted into the 'datasets' directory in the project root. The structure should be: \
`datasets/1 December 2017 Dataset/*.dat`

In [2]:
dataset_dir = 'datasets'
datasets_batches = get_dataset_batches(dataset_dir)

spectrograms_MTI = []
labels = []

for batch in datasets_batches:
    files = get_dataset_files(dataset_dir, batch)
    for i, file in enumerate(files):
        if 'Copy' in file or '(' in file:
            print(f'{batch} | [{i + 1}/{len(files)}]: {file} ignored for unrecognized file name.')
            continue

        t = time.time()
        dataset_file_path = os.path.join(dataset_dir, batch, file)
        person, activity, repetition = get_labels(file)
        Data_spec_MTI, velocity_axis, time_axis = preprocess_file(dataset_file_path, th_type='triangle', plot_range_mti=False, plot_spec_mti=False)

        spectrograms_MTI.append((Data_spec_MTI, velocity_axis, time_axis))
        # Only interested in activity
        labels.append(activity)

        print(f'{batch} | [{i + 1}/{len(files)}]: {file} in {time.time() - t:.2f} seconds. Activity: {activity}.')

os.makedirs('preprocessed_data', exist_ok=True)
with open(os.path.join('preprocessed_data', 'spectrograms.pkl'), 'wb') as f:
    pickle.dump((spectrograms_MTI, np.array(labels)), f)

1 December 2017 Dataset | [1/360]: 1P36A01R01.dat in 1.50 seconds. Activity: 1.
1 December 2017 Dataset | [2/360]: 1P36A01R02.dat in 1.51 seconds. Activity: 1.
1 December 2017 Dataset | [3/360]: 1P36A01R03.dat in 1.76 seconds. Activity: 1.
1 December 2017 Dataset | [4/360]: 1P37A01R01.dat in 1.78 seconds. Activity: 1.
1 December 2017 Dataset | [5/360]: 1P37A01R02.dat in 1.40 seconds. Activity: 1.
1 December 2017 Dataset | [6/360]: 1P37A01R03.dat in 1.49 seconds. Activity: 1.
1 December 2017 Dataset | [7/360]: 1P38A01R01.dat in 1.60 seconds. Activity: 1.
1 December 2017 Dataset | [8/360]: 1P38A01R02.dat in 1.60 seconds. Activity: 1.
1 December 2017 Dataset | [9/360]: 1P38A01R03.dat in 1.45 seconds. Activity: 1.
1 December 2017 Dataset | [10/360]: 1P39A01R01.dat in 1.36 seconds. Activity: 1.
1 December 2017 Dataset | [11/360]: 1P39A01R02.dat in 1.43 seconds. Activity: 1.
1 December 2017 Dataset | [12/360]: 1P39A01R03.dat in 1.79 seconds. Activity: 1.
1 December 2017 Dataset | [13/360]: 1

## Load pre-processed data
```
spectrograms_MTI:     list of (2D array, 1D array, 1D array)
labels:               1D array of integers
```

In [None]:
with open(os.path.join('preprocessed_data', 'spectrograms.pkl'), 'rb') as f:
    # Each item in spectrograms_MTI is (spec, velocity_axis, time_axis)
    denoised_spectrograms_MTI, labels = pickle.load(f)

    # Option 1: Extract into separate lists
    spectrograms = [i[0] for i in denoised_spectrograms_MTI]
    velocity_axes = [i[1] for i in denoised_spectrograms_MTI]
    time_axes = [i[2] for i in denoised_spectrograms_MTI]

    # Option 2: Loop through each item
    for idx, (spectrogram, velocity_axis, time_axis) in enumerate(denoised_spectrograms_MTI):
        # Apply thresholding algorithm to spectrograms and update them in place
        spectrograms[idx][:] = denoise_spectrogram_per_timebin_basis(spectrogram)


## Feature extraction

In [None]:
# See options above for loading the data