# 0. randomly pick data for each label

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import random

f_list = ['A', 'B', 'C', 'HELLO', 'ILOVEU']

for i in f_list:
    path = '/content/gdrive/MyDrive/Vocal_Datasets/' + i + '/'
    file_list = os.listdir(path)
    file_list_py = [file for file in file_list if file.endswith('xlsx')] #5 data

    #randomly pick
    rand_j = random.choice(file_list_py)
    d1 = pd.read_excel(path + rand_j, header=None, index_col=None)
    d1 = d1.iloc[0:35000,:]

    plt.figure()
    plt.plot(d1)
    plt.title("%s" % i)
    plt.axis([0,35000,-10,10])

#0.1 Data comparison between original data and **JITTERRED** data

In [None]:
####################### random data with augmented ############################
import os
import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np

sigma = 0.005

def DA_Jitter(X, sigma = 0.005):
    myNoise = np.random.randint(-100,100, size=(35000,1))*np.array(sigma)

    return X+myNoise

f_list = ['A', 'B', 'C', 'HELLO', 'ILOVEU']

for i in f_list:
    path = '/content/gdrive/MyDrive/Vocal_Datasets/' + i + '/'
    file_list = os.listdir(path)
    file_list_py = [file for file in file_list if file.endswith('xlsx')] #5 data
    #randomly pick
    rand_j = random.choice(file_list_py)
    d1 = pd.read_excel(path + rand_j, header=None, index_col=None)
    d1 = d1.iloc[0:35000,:]

    fig = plt.figure(figsize=(15,4))
    ax = fig.add_subplot(2,4,1)
    ax.set_title(i)
    ax.plot(d1)
    ax = fig.add_subplot(2,4,2)
    ax.set_title(i + " with Jittering")
    ax.plot(DA_Jitter(d1, sigma))
    ax.set_xlim([0,35000])

#0.2 Data comparison between original data and **SCALING data**


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np
sigma = 0.1
def DA_Scaling(X, sigma=0.1):
    scalingFactor = np.random.normal(loc=1.0, scale=sigma, size=(1,X.shape[1]))
    #print(scalingFactor)
    myNoise = np.matmul(np.ones((X.shape[0],1)), scalingFactor)
    return X*myNoise

f_list = ['Happiness', 'Surprise', 'Disgust', 'Anger', 'Sadness']

for i in f_list:
    path = '/content/gdrive/MyDrive/sensorData/' + i + '/'
    file_list = os.listdir(path)
    file_list_py = [file for file in file_list if file.endswith('xlsx')] #5 data
    #randomly pick
    rand_j = random.choice(file_list_py)
    d1 = pd.read_excel(path + rand_j, header=None, index_col=None)
    d1 = d1.iloc[0:64,:]

    fig = plt.figure(figsize=(15,4))
    ax = fig.add_subplot(2,4,1)
    ax.set_title(i)
    ax.plot(d1)
    ax = fig.add_subplot(2,4,2)
    ax.set_title(i + " with Scaling")
    ax.plot(DA_Scaling(d1, sigma))
    ax.set_xlim([0,64])

In [None]:
np.random.normal(loc=1.0, scale=sigma, size=(1,d1.shape[1]))

In [None]:
d1 = d1.iloc[0:64,:]
print(d1)

#0.3 Data comparison between original data and **TIME WARPING data**

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np
from scipy.interpolate import CubicSpline

def GenerateRandomCurves(X, sigma = 0.2, knot = 4):
    xx = (np.ones((X.shape[1],1))*(np.arange(0,X.shape[0], (X.shape[0]-1)/(knot+1)))).transpose()
    yy = np.random.normal(loc = 1.0, scale = sigma, size = (knot+2, X.shape[1]))
    x_range = np.arange(X.shape[0])
    print(x_range)
    cs_1 = CubicSpline(xx[:,0],yy[:,0])
    cs_2 = CubicSpline(xx[:,1],yy[:,1])
    cs_3 = CubicSpline(xx[:,2],yy[:,2])
    cs_4 = CubicSpline(xx[:,3],yy[:,3])
    cs_5 = CubicSpline(xx[:,4],yy[:,4])
    return np.array([cs_1(x_range),cs_2(x_range), cs_3(x_range),cs_4(x_range),cs_5(x_range)]).transpose()

def DistortTimesteps(X, sigma=0.2):
    tt = GenerateRandomCurves(X, sigma)
    tt_cum = np.cumsum(tt, axis=0)
    t_scale = [(X.shape[0]-1)/tt_cum[-1,0],(X.shape[0]-1)/tt_cum[-1,1],(X.shape[0]-1)/tt_cum[-1,2],
               (X.shape[0]-1)/tt_cum[-1,3],(X.shape[0]-1)/tt_cum[-1,4]]
    tt_cum[:,0] = tt_cum[:,0]*t_scale[0]
    tt_cum[:,1] = tt_cum[:,1]*t_scale[1]
    tt_cum[:,2] = tt_cum[:,2]*t_scale[2]
    tt_cum[:,3] = tt_cum[:,3]*t_scale[3]
    tt_cum[:,4] = tt_cum[:,4]*t_scale[4]
    return tt_cum

def DA_TimeWarp(X, sigma=0.2):
    tt_new = DistortTimesteps(X, sigma)
    #print("tt_new: ",tt_new[:,0],"\nX[:,0]",d1[:,0])
    X_new = np.zeros(X.shape)
    #print("xnew[:,0] =",X_new[:,0])
    x_range = np.arange(X.shape[0])
    X_new[:,0] = np.interp(x_range, tt_new[:,0], X.iloc[:,0])
    X_new[:,1] = np.interp(x_range, tt_new[:,1], X.iloc[:,1])
    X_new[:,2] = np.interp(x_range, tt_new[:,2], X.iloc[:,2])
    X_new[:,3] = np.interp(x_range, tt_new[:,3], X.iloc[:,3])
    X_new[:,4] = np.interp(x_range, tt_new[:,4], X.iloc[:,4])
    #print(X_new)
    return X_new

f_list = ['Happiness', 'Surprise', 'Disgust', 'Anger', 'Sadness']

for i in f_list:
    path = '/content/gdrive/MyDrive/sensorData/' + i + '/'
    file_list = os.listdir(path)
    file_list_py = [file for file in file_list if file.endswith('xlsx')] #5 data
    #randomly pick
    rand_j = random.choice(file_list_py)
    d1 = pd.read_excel(path + rand_j, header=None, index_col=None)
    d1 = d1.iloc[0:64,:]

    fig = plt.figure(figsize=(15,4))
    ax = fig.add_subplot(2,4,1)
    ax.set_title(i)
    ax.plot(d1)
    ax = fig.add_subplot(2,4,2)
    ax.set_title(i + " with Time Warping")
    ax.plot(DA_TimeWarp(d1))
    ax.set_xlim([0,64])

#0.4 Data comparison between original data and **Magnitude warping**


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np

def GenerateRandomCurves(X, sigma = 0.2, knot = 4):
    xx = (np.ones((X.shape[1],1))*(np.arange(0,X.shape[0], (X.shape[0]-1)/(knot+1)))).transpose()
    yy = np.random.normal(loc = 1.0, scale = sigma, size = (knot+2, X.shape[1]))
    x_range = np.arange(X.shape[0])
    cs_1 = CubicSpline(xx[:,0],yy[:,0])
    cs_2 = CubicSpline(xx[:,1],yy[:,1])
    cs_3 = CubicSpline(xx[:,2],yy[:,2])
    cs_4 = CubicSpline(xx[:,3],yy[:,3])
    cs_5 = CubicSpline(xx[:,4],yy[:,4])
    return np.array([cs_1(x_range),cs_2(x_range), cs_3(x_range),cs_4(x_range),cs_5(x_range)]).transpose()

def DA_MagWarp(X, sigma = 0.2):
    return X * GenerateRandomCurves(X, sigma)

f_list = ['Happiness', 'Surprise', 'Disgust', 'Anger', 'Sadness']

for i in f_list:
    path = '/content/drive/MyDrive/sensorData/' + i + '/'
    file_list = os.listdir(path)
    file_list_py = [file for file in file_list if file.endswith('xlsx')] #5 data
    #randomly pick
    rand_j = random.choice(file_list_py)
    d1 = pd.read_excel(path + rand_j, header=None, index_col=None)
    d1 = d1.iloc[0:64,:]

    fig = plt.figure(figsize=(15,4))
    ax = fig.add_subplot(2,4,1)
    ax.set_title(i)
    ax.plot(d1)
    ax = fig.add_subplot(2,4,2)
    ax.set_title(i + " with MagWarp")
    ax.plot(DA_MagWarp(d1))
    ax.set_xlim([0,64])

# Method 1. Transfering Augmented data to Excel

In [None]:
!pip install xlsxwriter

# Method 1-1. Transfering augmented data to Excel

In [None]:
###21~132: Jittering
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

def DA_Jitter(X, sigma = 1E-4):
    myNoise = np.random.randint(-100,100, size=(64,5))*np.array(sigma)
    return X+myNoise

f_list = ['Happiness', 'Surprise', 'Disgust', 'Anger', 'Sadness']

for i in f_list:
    counter = 0
    dir = '/content/drive/MyDrive/sensorData_a/' + i + '/'
    file_list = os.listdir(dir)
    file_list_py = [file for file in file_list if file.endswith('xlsx')]

    for j in file_list_py:
        if int(j[0:-5]) <= 14:
            d1 = pd.read_excel(dir + j, header=None, index_col=None)

            for ii in range(8):
                alpha = counter * 8
                train_df = pd.DataFrame(DA_Jitter(d1, sigma))
                writer = pd.ExcelWriter(dir + '%s.xlsx' % str(ii+21+alpha))
                train_df.to_excel(writer, index = False, header=False)
                writer.save()
            counter += 1
        else:
            pass

In [None]:
###133~244: Scaling
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

sigma = 0.1
def DA_Scaling(X, sigma=0.1):
    scalingFactor = np.random.normal(loc=1.0, scale=sigma, size=(1,X.shape[1]))
    myNoise = np.matmul(np.ones((X.shape[0],1)), scalingFactor)
    return X*myNoise

f_list = ['Happiness', 'Surprise', 'Disgust', 'Anger', 'Sadness']

for i in f_list:
    counter = 0
    dir = '/content/drive/MyDrive/sensorData_a/' + i + '/'
    file_list = os.listdir(dir)
    file_list_py = [file for file in file_list if file.endswith('xlsx')]

    for j in file_list_py:
        if int(j[0:-5]) <= 14:
            d1 = pd.read_excel(dir + j, header=None, index_col=None)
            d1 = d1.iloc[0:64,:]

            for ii in range(8):
                alpha = counter * 8
                train_df = pd.DataFrame(DA_Scaling(d1, sigma))
                writer = pd.ExcelWriter(dir + '%s.xlsx' % str(ii+133+alpha))
                train_df.to_excel(writer, index = False, header=False)
                writer.save()
            counter += 1
        else:
            pass

In [None]:
###245~356: Time warping
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

def GenerateRandomCurves(X, sigma = 0.2, knot = 4):
    xx = (np.ones((X.shape[1],1))*(np.arange(0,X.shape[0], (X.shape[0]-1)/(knot+1)))).transpose()
    yy = np.random.normal(loc = 1.0, scale = sigma, size = (knot+2, X.shape[1]))
    x_range = np.arange(X.shape[0])
    cs_1 = CubicSpline(xx[:,0],yy[:,0])
    cs_2 = CubicSpline(xx[:,1],yy[:,1])
    cs_3 = CubicSpline(xx[:,2],yy[:,2])
    cs_4 = CubicSpline(xx[:,3],yy[:,3])
    cs_5 = CubicSpline(xx[:,4],yy[:,4])
    return np.array([cs_1(x_range),cs_2(x_range), cs_3(x_range),cs_4(x_range),cs_5(x_range)]).transpose()

def DistortTimesteps(X, sigma=0.2):
    tt = GenerateRandomCurves(X, sigma)
    tt_cum = np.cumsum(tt, axis=0)
    t_scale = [(X.shape[0]-1)/tt_cum[-1,0],(X.shape[0]-1)/tt_cum[-1,1],(X.shape[0]-1)/tt_cum[-1,2],
               (X.shape[0]-1)/tt_cum[-1,3],(X.shape[0]-1)/tt_cum[-1,4]]
    tt_cum[:,0] = tt_cum[:,0]*t_scale[0]
    tt_cum[:,1] = tt_cum[:,1]*t_scale[1]
    tt_cum[:,2] = tt_cum[:,2]*t_scale[2]
    tt_cum[:,3] = tt_cum[:,3]*t_scale[3]
    tt_cum[:,4] = tt_cum[:,4]*t_scale[4]
    return tt_cum

def DA_TimeWarp(X, sigma=0.2):
    tt_new = DistortTimesteps(X, sigma)
    X_new = np.zeros(X.shape)
    x_range = np.arange(X.shape[0])
    X_new[:,0] = np.interp(x_range, tt_new[:,0], X.iloc[:,0])
    X_new[:,1] = np.interp(x_range, tt_new[:,1], X.iloc[:,1])
    X_new[:,2] = np.interp(x_range, tt_new[:,2], X.iloc[:,2])
    X_new[:,3] = np.interp(x_range, tt_new[:,3], X.iloc[:,3])
    X_new[:,4] = np.interp(x_range, tt_new[:,4], X.iloc[:,4])
    return X_new

f_list = ['Happiness', 'Surprise', 'Disgust', 'Anger', 'Sadness']

for i in f_list:
    counter = 0
    dir = '/content/drive/MyDrive/sensorData_a/' + i + '/'
    file_list = os.listdir(dir)
    file_list_py = [file for file in file_list if file.endswith('xlsx')]

    for j in file_list_py:
        if int(j[0:-5]) <= 14:
            d1 = pd.read_excel(dir + j, header=None, index_col=None)
            d1 = d1.iloc[0:64,:]

            for ii in range(8):
                alpha = counter * 8
                train_df = pd.DataFrame(DA_TimeWarp(d1))
                writer = pd.ExcelWriter(dir + '%s.xlsx' % str(ii+245+alpha))#수정
                train_df.to_excel(writer, index = False, header=False)
                writer.save()
            counter += 1
        else:
            pass

In [None]:
###357~468: Mag warping
import os
import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np

def GenerateRandomCurves(X, sigma = 0.2, knot = 4):
    xx = (np.ones((X.shape[1],1))*(np.arange(0,X.shape[0], (X.shape[0]-1)/(knot+1)))).transpose()
    yy = np.random.normal(loc = 1.0, scale = sigma, size = (knot+2, X.shape[1]))
    x_range = np.arange(X.shape[0])
    cs_1 = CubicSpline(xx[:,0],yy[:,0])
    cs_2 = CubicSpline(xx[:,1],yy[:,1])
    cs_3 = CubicSpline(xx[:,2],yy[:,2])
    cs_4 = CubicSpline(xx[:,3],yy[:,3])
    cs_5 = CubicSpline(xx[:,4],yy[:,4])
    return np.array([cs_1(x_range),cs_2(x_range), cs_3(x_range),cs_4(x_range),cs_5(x_range)]).transpose()

def DA_MagWarp(X, sigma = 0.2):
    return X * GenerateRandomCurves(X, sigma)

f_list = ['Happiness', 'Surprise', 'Disgust', 'Anger', 'Sadness']

for i in f_list:
    counter = 0
    dir = '/content/drive/MyDrive/sensorData_a/' + i + '/'
    file_list = os.listdir(dir)
    file_list_py = [file for file in file_list if file.endswith('xlsx')]

    for j in file_list_py:
        if int(j[0:-5]) <= 14:
            d1 = pd.read_excel(dir + j, header=None, index_col=None)
            d1 = d1.iloc[0:64,:]

            for ii in range(8):
                alpha = counter * 8
                train_df = pd.DataFrame(DA_MagWarp(d1))
                writer = pd.ExcelWriter(dir + '%s.xlsx' % str(ii+357+alpha))#수정
                train_df.to_excel(writer, index = False, header=False)
                writer.save()
            counter += 1
        else:
            pass