# Data Augmentation

In [1]:
import sys, os, logging
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
def single_graph(df):
    fig = go.Figure()
    for col in df.columns:
      fig.add_trace(go.Scatter(x=df.index,y=df[col],name=col))
    return fig

def triple_graph(df):
    col_num = 1
    fig = make_subplots(rows=1, cols=3)
    for col in df.columns:
      fig.add_trace(go.Scatter(x=df.index,y=df[col],name=col), row=1, col=col_num)
      col_num += 1
    fig.update_layout(height=1080/2, width=1920/2, title_text="Test")
    return fig

In [3]:
DATE = "2023_04_14"
DATA_FOLDER = os.path.join("../data/android_app/", DATE)

DATA_FILES = [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and
              file.endswith(".txt") and
              not (file.endswith("_r.txt") or
              file.endswith("_i.txt") or
              file.endswith("_ri.txt") or
              file.endswith("_n.txt"))]
REVERSED_DATA_FILES = [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and
                       file.endswith("_r.txt")]
INVERSED_DATA_FILES = [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and
                       file.endswith("_i.txt")]
REVERSED_INVERSED_DATA_FILES = [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and
                                file.endswith("_ri.txt")]
AUGMENTATION_FILES = [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and
              (file.endswith("_r.txt") or
              file.endswith("_i.txt") or
              file.endswith("_ri.txt") or
              file.endswith("_n.txt"))]

SEPARATOR = ";"

In [5]:
for file in AUGMENTATION_FILES:
    os.remove(os.path.join(DATA_FOLDER, file))

In [None]:
# Acceleration data
data = pd.read_csv(os.path.join(DATA_FOLDER, DATA_FILES[0]), sep=SEPARATOR, header=1, names=["ay", "ax", "az"])

# acc_data = pd.read_csv(os.path.join(DATA_FOLDER, "DATA" + str(REC) + ".txt"), sep=";", header=1, names=["t", "ax", "ay", "az", "loc"])

## Revert Sequence

In [None]:
fig = single_graph(data[["ax", "ay", "az"]])
fig.show()

In [None]:
revert_data = data[::-1].reset_index(drop=True)

In [None]:
fig = single_graph(revert_data[["ax", "ay", "az"]])
fig.show()

In [None]:
def revertSequence(folder, file):
    data = pd.read_csv(os.path.join(folder, file), sep=";", header=1, names=["ay", "ax", "az"])
    revert_data = data[::-1].reset_index(drop=True)
    revert_data.to_csv(os.path.join(folder, file[:len(file)-4] + "_r.txt"), sep=";")

In [None]:
def revertSequences(folder):
    for file in [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and
              file.endswith(".txt") and
              not (file.endswith("_r.txt") or
              file.endswith("_i.txt") or
              file.endswith("_ri.txt") or
              file.endswith("_n.txt"))]:
        revertSequence(folder, file)

In [None]:
# Create files
revertSequences(DATA_FOLDER)

## Invert X-axis

In [None]:
fig = single_graph(data[["ax", "ay", "az"]])
fig.show()

In [None]:
inverted_data = data
inverted_data["ax"] = inverted_data["ax"].apply(lambda x: -x)

In [None]:
fig = single_graph(inverted_data[["ax", "ay", "az"]])
fig.show()

In [None]:
def invertSequenceX(folder, file):
    data = pd.read_csv(os.path.join(folder, file), sep=";", header=1, names=["ay", "ax", "az"])
    data["ax"] = data["ax"].apply(lambda x: -x)
    data.to_csv(os.path.join(folder, file[:len(file)-4] + "_i.txt"), sep=";")

In [None]:
def invertSequencesX(folder):
    for file in [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and
              file.endswith(".txt") and
              not (file.endswith("_r.txt") or
              file.endswith("_i.txt") or
              file.endswith("_ri.txt") or
              file.endswith("_n.txt"))]:
        invertSequenceX(folder, file)

In [None]:
# Create files
invertSequencesX(DATA_FOLDER)

## Reverse Sequence and Invert X-Axis

In [None]:
def reverseInvertSequences(folder):
    for file in [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and
              file.endswith(".txt") and
              not (file.endswith("_r.txt") or
              file.endswith("_i.txt") or
              file.endswith("_ri.txt") or
              file.endswith("_n.txt"))]:
        data = pd.read_csv(os.path.join(folder, file), sep=";", header=1, names=["ay", "ax", "az"])
        data["ax"] = data["ax"].apply(lambda x: -x)
        data = data[::-1].reset_index(drop=True)
        data.to_csv(os.path.join(folder, file[:len(file)-4] + "_ri.txt"), sep=";")

In [None]:
# Create files
reverseInvertSequences(DATA_FOLDER)

## Add Noise

### Hyperparameters : sigma = standard devitation (STD) of the noise

In [None]:
sigma = 0.1

In [None]:
def DA_Jitter(X, sigma):
    myNoise = np.random.normal(loc=0, scale=sigma, size=X.shape)
    return X+myNoise

In [None]:
def addNoise_1(folder, file):
    data = pd.read_csv(os.path.join(folder, file), sep=";", header=1, names=["ay", "ax", "az"])
    noise = DA_Jitter(data,sigma)
    noise.to_csv(os.path.join(folder, file[:len(file)-4] + "_n.txt"), sep=";")

In [None]:
def addNoise(folder):
    for file in [file for file in os.listdir(folder) if os.path.isfile(os.path.join(folder, file)) and file.endswith(".txt")]:
        addNoise_1(folder, file)

In [None]:
# Create files
addNoise(DATA_FOLDER)

In [None]:
#Remove files
for file in [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and file.endswith("_n.txt")]:
     os.remove(os.path.join(DATA_FOLDER, file))

In [None]:
fig = single_graph(data[["ax", "ay", "az"]])
fig.show()