In [1]:
cd ../..

/Users/hao/workspace/hpi_de/4th_Semester/Applied Machine Learning/sensor-data-gans


In [2]:
import numpy as np
import pandas as pd
from numpy.random import randn
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Flatten, LSTM, LeakyReLU, Reshape, BatchNormalization, Bidirectional
from tensorflow.keras.optimizers import Adam
from sklearn.utils import shuffle
from gans.utils import create_gan, train_gan
from utils.windowing import windowing_dataframe, transform_windows_df
from utils.preprocess import filter_by_activity_index, calc_consultant
from utils.plotting import plot_n_heatmaps, plot_n_lineplots
import seaborn as sns

In [3]:
def create_discriminator(input_shape, optimizer=Adam(learning_rate=0.0001, clipvalue=1.0, decay=1e-8)):
    discriminator = Sequential()
    discriminator.add(LSTM(5, return_sequences=True, input_shape=input_shape))
    discriminator.add(Flatten())
    discriminator.add(Dense(1, activation='sigmoid'))
    
    discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return discriminator

In [4]:
def create_generator(latent_dim, small=True):
    if small:
        generator = Sequential()
        generator.add(Dense(window_size, kernel_initializer='he_uniform', input_shape=(latent_dim, )))
        generator.add(LeakyReLU())
        generator.add(BatchNormalization())
        generator.add(Reshape((window_size, 1)))
        generator.add(Bidirectional(LSTM(6, return_sequences=True, kernel_initializer='he_uniform')))
        generator.add(LeakyReLU())
        generator.add(BatchNormalization())
        generator.add(LSTM(3, return_sequences=True, kernel_initializer='he_uniform', activation='linear'))
    else:
        generator = Sequential()
        generator.add(Dense(window_size, kernel_initializer='he_uniform', input_shape=(latent_dim, )))
        generator.add(LeakyReLU())
        generator.add(BatchNormalization())
        generator.add(Reshape((window_size, 1)))
        generator.add(Bidirectional(LSTM(12, return_sequences=True, kernel_initializer='he_uniform')))
        generator.add(LeakyReLU())
        generator.add(BatchNormalization())
        generator.add(Bidirectional(LSTM(9, return_sequences=True, kernel_initializer='he_uniform')))
        generator.add(LeakyReLU())
        generator.add(BatchNormalization())
        generator.add(Bidirectional(LSTM(6, return_sequences=True, kernel_initializer='he_uniform')))
        generator.add(LeakyReLU())
        generator.add(BatchNormalization())
        generator.add(LSTM(3, return_sequences=True, kernel_initializer='he_uniform', activation='linear'))
    
    return generator

In [5]:
act_id = 0

latent_dim = 64
steps = 1000
batch_size = 32

num_gen = 1000
add_sample_num = 500

In [6]:
train_df = pd.read_hdf('./datasets/mydata/train_df.h5')
val_df = pd.read_hdf('./datasets/mydata/val_df.h5')
test_df = pd.read_hdf('./datasets/mydata/test_df.h5')

In [7]:
window_size = 5*50
step_size = int(window_size/2)
col_names = ['userAcceleration.x', 'userAcceleration.y', 'userAcceleration.z', 'userAcceleration.c']
method ='sliding'

In [8]:
train_windowed_df = windowing_dataframe(train_df, window_size=window_size, step_or_sample_size=step_size, col_names=col_names, method=method)
val_windowed_df = windowing_dataframe(val_df, window_size=window_size, step_or_sample_size=step_size, col_names=col_names, method=method)
test_windowed_df = windowing_dataframe(test_df, window_size=window_size, step_or_sample_size=step_size, col_names=col_names, method=method)

In [9]:
input_cols = ['userAcceleration.x', 'userAcceleration.y', 'userAcceleration.z']
x_train, y_train = transform_windows_df(train_windowed_df, input_cols=input_cols, one_hot_encode=False, as_channel=False)
x_val, y_val = transform_windows_df(val_windowed_df, input_cols=input_cols, one_hot_encode=False, as_channel=False)
x_test, y_test = transform_windows_df(test_windowed_df, input_cols=input_cols, one_hot_encode=False, as_channel=False)

In [10]:
x_train_activity, _ = filter_by_activity_index(x=x_train, y=y_train, activity_idx=0)

In [11]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
labels=["standing", "waldking", "jogging"]

In [12]:
input_cols = ['userAcceleration.c']
x_train_clf, y_train_clf = transform_windows_df(train_windowed_df, input_cols=input_cols, one_hot_encode=False, as_channel=False)
x_val_clf, y_val_clf = transform_windows_df(val_windowed_df, input_cols=input_cols, one_hot_encode=False, as_channel=False)
x_test_clf, y_test_clf = transform_windows_df(test_windowed_df, input_cols=input_cols, one_hot_encode=False, as_channel=False)

In [13]:
x_train_clf = x_train_clf.reshape((len(x_train_clf), window_size))
x_val_clf = x_val_clf.reshape((len(x_val_clf), window_size))
x_test_clf = x_test_clf.reshape((len(x_test_clf), window_size))

In [14]:
svm_clf = SVC()
svm_clf.fit(x_train_clf, y_train_clf)

SVC()

#### train performance

In [15]:
y_train_head = svm_clf.predict(x_train_clf)
print(classification_report(y_train_clf, y_train_head, target_names=labels))

KeyboardInterrupt: 

In [None]:
cm = confusion_matrix(y_train_clf, y_train_head)
cm_df = pd.DataFrame(cm, index = labels,
                  columns = labels)
sns.heatmap(cm_df, annot=True, cmap='YlGnBu', fmt='g')

In [None]:
orig_train_acc = accuracy_score(y_train_clf, y_train_head)
orig_train_acc

#### test performance

In [None]:
y_test_head = svm_clf.predict(x_test_clf)

In [None]:
print(classification_report(y_test_clf, y_test_head, target_names=labels))

In [None]:
cm = confusion_matrix(y_test, y_test_head)
cm_df = pd.DataFrame(cm, index = labels,
                  columns = labels)
sns.heatmap(cm_df, annot=True, cmap='YlGnBu', fmt='g')

In [None]:
orig_test_acc = accuracy_score(y_test_clf, y_test_head)
orig_test_acc

In [None]:
### GAN

In [None]:
input_shape = x_train[0].shape
discriminator = create_discriminator(input_shape);
generator = create_generator(latent_dim)
gan = create_gan(generator_model=generator, discriminator_model=discriminator)

In [None]:
train_gan(generator, discriminator, gan, x_train_activity, steps)

In [None]:
## eval

In [None]:
random_latent_vectors = np.random.normal(size=(num_gen, latent_dim))
generated_sensor_data = generator.predict(random_latent_vectors)

In [None]:
gen_df = pd.DataFrame(np.array([ts.transpose() for ts in generated_sensor_data]).tolist(), columns= ['userAcceleration.x', 'userAcceleration.y', 'userAcceleration.z'])
gen_df['userAcceleration.c'] = calc_consultant(gen_df)
gen_df['act'] = act_id

In [None]:
gen_windowed_df = windowing_dataframe(gen_df, window_size=window_size, step_or_sample_size=step_size, col_names=col_names, method=method)
    
input_cols = ['userAcceleration.c']
x_gen, y_gen = transform_windows_df(val_windowed_df, input_cols=input_cols, one_hot_encode=False, as_channel=False)
x_gen = x_gen.reshape((len(x_gen), window_size))

x_train_gen = np.concatenate([x_train_clf, x_gen[:add_sample_num]])
y_train_gen = np.concatenate([y_train_clf, np.zeros(add_sample_num)])

In [None]:
svm_clf = SVC()
svm_clf.fit(x_train_gen, y_train_gen)
    
y_train_head = svm_clf.predict(x_train_clf)
train_acc = accuracy_score(y_train_clf, y_train_head)
    
y_test_head = svm_clf.predict(x_test_clf)
test_acc = accuracy_score(y_test_clf, y_test_head)

In [None]:
print('Gen Train acc:', train_acc, 'vs.', orig_train_acc, ': Orig Train Acc')
print('Gen Test acc:', test_acc, 'vs.', orig_test_acc, 'Orig Test Acc')

In [None]:
plot_n_heatmaps(x_train_activity, generated_sensor_data, n=10)

In [None]:
plot_n_lineplots(x_train_activity, generated_sensor_data, n=10)

In [None]:
random_latent_vectors = np.random.normal(size=(num_gen, latent_dim))
generated_sensor_data = generator.predict(random_latent_vectors)

gen_df = pd.DataFrame(np.array([ts.transpose() for ts in generated_sensor_data]).tolist(), columns= ['userAcceleration.x', 'userAcceleration.y', 'userAcceleration.z'])
gen_df['userAcceleration.c'] = calc_consultant(gen_df)
gen_df['act'] = act_id

gen_windowed_df = windowing_dataframe(gen_df, window_size=window_size, step_or_sample_size=step_size, col_names=col_names, method=method)
    
input_cols = ['userAcceleration.c']
x_gen, y_gen = transform_windows_df(val_windowed_df, input_cols=input_cols, one_hot_encode=False, as_channel=False)
x_gen = x_gen.reshape((len(x_gen), window_size))

x_train_gen = np.concatenate([x_train_clf, x_gen[:add_sample_num]])
y_train_gen = np.concatenate([y_train_clf, np.zeros(add_sample_num)])

svm_clf = SVC()
svm_clf.fit(x_train_gen, y_train_gen)
    
y_train_head = svm_clf.predict(x_train_clf)
train_acc = accuracy_score(y_train_clf, y_train_head)
    
y_test_head = svm_clf.predict(x_test_clf)
test_acc = accuracy_score(y_test_clf, y_test_head)

plot_n_heatmaps(x_train_activity, generated_sensor_data, n=10, save_dir='out/steps-{}_latentdim-{}_kernelnum-{}_kernelsize-{}_small-{}'.format(steps, latent_dim, kernel_num, kernel_size, small))
plot_n_lineplots(x_train_activity, generated_sensor_data, n=10, save_dir='out/steps-{}_latentdim-{}_kernelnum-{}_kernelsize-{}_small-{}'.format(steps, latent_dim, kernel_num, kernel_size, small))

file = open('out/steps-{}_latentdim-{}_kernelnum-{}_kernelsize-{}_small-{}/acc.txt'.format(steps, latent_dim, kernel_num, kernel_size, small), 'w+')
file.write(f'Gen Train acc:', train_acc, 'vs.', orig_train_acc, ':Orig Train Acc')
file.write(f'Gen Test acc:', test_acc, 'vs.', orig_test_acc, ':Orig Test Acc')
file.close()