# Data Augmentation

In [1]:
import sys, os, logging
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
def singleGraph(df):
    fig = go.Figure()
    for col in df.columns:
      fig.add_trace(go.Scatter(x=df.index,y=df[col],name=col))
    return fig

def tripleGraph(df, fig_title="Test"):
    col_num = 1
    fig = make_subplots(rows=1, cols=3)
    for col in df.columns:
      fig.add_trace(go.Scatter(x=df.index,y=df[col],name=col), row=1, col=col_num)
      col_num += 1
    fig.update_layout(height=1080/2, width=1920/2, title_text=fig_title)
    return fig

def tripleGraph_vertical(df, fig_title="Test"):
    row_num = 1
    fig = make_subplots(rows=3, cols=1)
    for col in df.columns:
      fig.add_trace(go.Scatter(x=df.index,y=df[col],name=col), row=row_num, col=1)
      row_num += 1
    fig.update_layout(height=1920/2, width=1080/2, title_text=fig_title)
    return fig

def comparaisonGraph(df1, df2, fig_title="Test"):    
    col_num = 1
    fig = make_subplots(rows=2, cols=3)
    for col in df1.columns:
        fig.add_trace(go.Scatter(x=df1.index,y=df1[col],name=col), row=1, col=col_num)
        fig.add_trace(go.Scatter(x=df2.index,y=df2[col],name=col), row=2, col=col_num)
        col_num += 1
    fig.update_layout(height=1080/2, width=1920/2, title_text=fig_title)
    return fig

def comparaisonGraph_vertical(df1, df2, fig_title="Test"):    
    row_num = 1
    fig = make_subplots(rows=3, cols=2)
    for col in df1.columns:
        fig.add_trace(go.Scatter(x=df1.index,y=df1[col],name=col), row=row_num, col=1)
        fig.add_trace(go.Scatter(x=df2.index,y=df2[col],name=col), row=row_num, col=2)
        row_num += 1
    fig.update_layout(height=1920/2, width=1080/2, title_text=fig_title)
    return fig

In [3]:
SEPARATOR = ";"
DATA_AUGMENTATION_MARKER = ("_r.txt", "_i.txt", "_ri.txt", "_n.txt", "_m.txt")

DATE = "2023_04_14"
DATA_FOLDER = os.path.join("../data/android_app/", DATE)

DATA_FILES = [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and file.endswith(".txt") and not "_" in file]
AUGMENTATION_FILES = [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and file.endswith(".txt") and "_" in file]

In [4]:
for file in AUGMENTATION_FILES:
    os.remove(os.path.join(DATA_FOLDER, file))

## Load Data

In [5]:
# Acceleration data
data = pd.read_csv(os.path.join(DATA_FOLDER, DATA_FILES[0]), sep=SEPARATOR, header=1, names=["ay", "ax", "az"])

# acc_data = pd.read_csv(os.path.join(DATA_FOLDER, "DATA" + str(REC) + ".txt"), sep=";", header=1, names=["t", "ax", "ay", "az", "loc"])

## Revert Sequence

In [6]:
revert_data = data[::-1].reset_index(drop=True)

fig = comparaisonGraph(data[["ax", "ay", "az"]], revert_data[["ax", "ay", "az"]])
fig.show()

In [7]:
def revertSequence(folder, file):
    data = pd.read_csv(os.path.join(folder, file), sep=";", names=["ax", "ay", "az"])
    revert_data = data[::-1].reset_index(drop=True)
    revert_data.to_csv(os.path.join(folder, file[:len(file)-4] + "_r.txt"), sep=SEPARATOR, index=False, header=False)

In [8]:
def revertSequences(folder):
    for file in [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and
                file.endswith(".txt") and
                not "_" in file]:
        revertSequence(folder, file)

In [9]:
# Create files
revertSequences(DATA_FOLDER)

## Invert X-axis

In [10]:
reversed_x_data = data.copy()
reversed_x_data["ax"] = reversed_x_data["ax"].apply(lambda x: -x)

fig = comparaisonGraph(data[["ax", "ay", "az"]], reversed_x_data[["ax", "ay", "az"]])
fig.show()

In [11]:
def invertSequenceX(folder, file):
    data = pd.read_csv(os.path.join(folder, file), sep=";", names=["ax", "ay", "az"])
    data["ax"] = data["ax"].apply(lambda x: -x)
    data.to_csv(os.path.join(folder, file[:len(file)-4] + "_i.txt"), sep=SEPARATOR, index=False, header=False)

In [12]:
def invertSequencesX(folder):
    for file in [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and
                file.endswith(".txt") and
                not "_" in file]:
        invertSequenceX(folder, file)

In [13]:
# Create files
invertSequencesX(DATA_FOLDER)

## Reverse Sequence and Invert X-Axis

In [14]:
def reverseInvertSequence(folder, file):
    data = pd.read_csv(os.path.join(folder, file), sep=";", names=["ax", "ay", "az"])
    data["ax"] = data["ax"].apply(lambda x: -x)
    data = data[::-1].reset_index(drop=True)
    data.to_csv(os.path.join(folder, file[:len(file)-4] + "_ri.txt"), sep=SEPARATOR, index=False, header=False)

In [15]:
def reverseInvertSequences(folder):
    for file in [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and
                file.endswith(".txt") and
                not "_" in file]:
        reverseInvertSequence(folder, file)

In [16]:
# Create files
reverseInvertSequences(DATA_FOLDER)

## Magnitude Warping

In [17]:
## This example using cubic splice is not the best approach to generate random curves. 
## You can use other aprroaches, e.g., Gaussian process regression, Bezier curve, etc.
from scipy.interpolate import CubicSpline
def GenerateRandomCurves(X, sigma=0.2, knot=4):
    xx = (np.ones((X.shape[1],1))*(np.arange(0,X.shape[0], (X.shape[0]-1)/(knot+1)))).transpose()
    yy = np.random.normal(loc=1.0, scale=sigma, size=(knot+2, X.shape[1]))
    x_range = np.arange(X.shape[0])
    cs_x = CubicSpline(xx[:,0], yy[:,0])
    cs_y = CubicSpline(xx[:,1], yy[:,1])
    cs_z = CubicSpline(xx[:,2], yy[:,2])
    return np.array([cs_x(x_range),cs_y(x_range),cs_z(x_range)]).transpose()

In [18]:
magnitude_data = data.copy()
magnitude_data = magnitude_data * GenerateRandomCurves(magnitude_data)

fig = comparaisonGraph(data[["ax", "ay", "az"]], magnitude_data[["ax", "ay", "az"]])
fig.show()

In [19]:
def addMagnitudeSequence(folder, file, sigma, knot):
    data = pd.read_csv(os.path.join(folder, file), sep=";", names=["ax", "ay", "az"])
    magnitude_data = data * GenerateRandomCurves(data, sigma, knot)
    magnitude_data.to_csv(os.path.join(folder, file[:len(file)-4] + "_m.txt"), sep=SEPARATOR, index=False, header=False)

In [20]:
def addMagnitudeSequences(folder, sigma=0.1, knot=2):
    for file in [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and
                file.endswith(".txt")]:
        addMagnitudeSequence(folder, file, sigma, knot)

In [21]:
# Create files
addMagnitudeSequences(DATA_FOLDER)

## Noise

In [22]:
noisy_data = data.copy()
noise = np.random.normal(loc=0, scale=0.5, size=noisy_data.shape)
noisy_data = noisy_data + noise

fig = comparaisonGraph(data[["ax", "ay", "az"]], noisy_data[["ax", "ay", "az"]])
fig.show()

In [23]:
def DA_Jitter(X, sigma):
    noise = np.random.normal(loc=0, scale=sigma, size=X.shape)
    return X + noise

In [24]:
def addNoiseSequence(folder, file, sigma):
    data = pd.read_csv(os.path.join(folder, file), sep=";", names=["ax", "ay", "az"])
    noisy_data = DA_Jitter(data, sigma)
    noisy_data.to_csv(os.path.join(folder, file[:len(file)-4] + "_n.txt"), sep=SEPARATOR, index=False, header=False)

In [25]:
def addNoiseSequences(folder, sigma=0.1):
    for file in [file for file in os.listdir(DATA_FOLDER) if os.path.isfile(os.path.join(DATA_FOLDER, file)) and
                file.endswith(".txt")]:
        addNoiseSequence(folder, file, sigma)

In [26]:
# Create files
addNoiseSequences(DATA_FOLDER)