# Purpose: To compare the accuracy levels of a model trained with and without the augmented data set
- Steps:
    0. Initialization
    1. Split testing/training data
    2. Build/reuse a LSTM based classification model, output: 1 out of 3 classes for each test sample.
    3. Use original dataset, train and test the accuracy level.
    4. Use augmented dataset (augmented rare classes + 80% of original data) for training, use 20% of original data for testing.
    5. Compare accuracy levels of 3 and 4. 

# 0. Initialization

In [1]:
########
# Imports
########
import tensorflow as tf
import keras
import os
import pandas as pd
import src.mutils as util

In [2]:
########
# Check for GPU
########
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# 1. Split testing/training data

In [2]:
########
# Get the number of data rows in the sample dataset
########
util.get_num_labels_in_folder(".\ScooterData", "num_labels", "non_aug_labels_1")
util.get_num_labels_in_folder(".\AugData", "num_labels", "aug_labels_1")

In [2]:
########
# Split the data into train/test samples appropriately -----
########
path_norm = ".\ScooterData"    # Folder path for the non-augmented data
path_aug = ".\AugData"         # Folder path for the augmented data

file_names_norm = util.get_filenames(path_norm)
file_names_aug = util.get_filenames(path_aug)

# Save non-augmented data as a dataframe 
df_normal = pd.concat(
    map(pd.read_csv, file_names_norm), ignore_index=True)
# Remove extra index column
df_normal.pop(df_normal.columns[0])


# Save augmented data as a dataframe 
df_aug = pd.concat(
    map(pd.read_csv, [file for file in file_names_aug]), ignore_index=True)
# Remove extra index column
df_aug.pop(df_aug.columns[0])

X_train_norm, y_train_norm, X_valid_norm, y_valid_norm, X_test_norm, y_test_norm = util.get_train_test_data(df_normal, 0.2, 0)

X_train_aug, y_train_aug, X_valid_aug, y_valid_aug, X_test_aug, y_test_aug = util.get_train_test_data(df_normal, 0.2, 0, True, df_aug)
    
    
test_1 = len(y_train_aug) + len(y_test_aug)
print(test_1)
print(test_1 - len(y_train_norm) - len(y_test_norm))



--------------------------
             x0        y0        z0        x1        y1        z1        x2  \
0      0.276371 -0.544338 -0.215563  0.316652 -0.357965  0.075728  0.083266   
1      0.343582 -0.522834 -0.140957  0.316629 -0.346788  0.047645  0.040785   
2      0.298896 -0.466891 -0.164636  0.302799 -0.330828  0.030200  0.071863   
3      0.342617 -0.527306 -0.309691  0.277172 -0.385516  0.013740  0.016897   
4      0.342506 -0.590904 -0.199543  0.281872 -0.392835  0.062075  0.061822   
...         ...       ...       ...       ...       ...       ...       ...   
12027 -0.068764 -0.497492 -0.201677  0.164586 -0.533366 -0.079943 -0.110369   
12028 -0.066676 -0.497683 -0.202467  0.164614 -0.533343 -0.079573 -0.110369   
12029 -0.068761 -0.501456 -0.203680  0.160980 -0.534479 -0.081749 -0.112862   
12030 -0.067264 -0.506827 -0.204637  0.160022 -0.536045 -0.081494 -0.114240   
12031 -0.067264 -0.506827 -0.204637  0.160022 -0.536045 -0.081494 -0.114240   

             y2        z

AttributeError: module 'src.mutils' has no attribute 'getTrainTestData'

# 2. Train the LSTM classification model w/ original dataset

(array([], dtype=float64),
 array([], dtype=float64),
 array([], dtype=float64),
 array([], dtype=float64),
 array([[ 0.00000000e+00,  9.08007920e-02, -6.12982869e-01, ...,
         -1.10081643e-01, -1.71017200e-02, -7.83752091e-03],
        [ 1.00000000e+00,  9.09924135e-02, -6.13051951e-01, ...,
         -1.10074483e-01, -1.73101872e-02, -7.88121857e-03],
        [ 2.00000000e+00,  9.09468457e-02, -6.13150060e-01, ...,
         -1.10040940e-01, -1.73895899e-02, -7.42972549e-03],
        ...,
        [ 4.21300000e+03,  1.68868661e-01, -5.80009401e-01, ...,
         -1.10891759e-01, -2.62079947e-02, -3.10749505e-02],
        [ 4.21400000e+03,  1.66271180e-01, -5.84010363e-01, ...,
         -1.11008853e-01, -2.50091739e-02, -3.10690124e-02],
        [ 4.21500000e+03,  1.64620250e-01, -5.84033012e-01, ...,
         -1.11168511e-01, -2.44276859e-02, -3.07505187e-02]]),
 array(['Stable', 'Stable', 'Stable', ..., 'Minimum Sway', 'Minimum Sway',
        'Minimum Sway'], dtype='<U12'))