In [None]:
# This module is used for generating an image we will use to check accuracy.
!pip install kaleido==0.2.1



In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
# Run on GPU for best results
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# We import the right wrist accelerometer data.
def importData(filename):
  df=pd.read_csv(f'/content/drive/MyDrive/Sensor Data/PAMAP2/Protocol/{filename}.dat',
                     sep=' ',
                     header=None,
                     usecols=[1,4,5,6])
  df = df[df[1].isin([1, 2, 3, 4, 12, 13, 16, 17])].copy() # Use only the activities which are common to all subjects
  df = df.drop(columns=[1]) # We no longer need the activity column
  df = df.dropna() # Ensure the data has no missing values
  df = df.iloc[::3, :] # Resample from 100Hz to 33Hz for performance
  return df

We keep only the rows common to each subject. Our final dataset consists of timeseries data in three axes.

In [None]:
subject_files = ['subject101', 'subject102', 'subject103', 'subject104', 'subject105', 'subject106', 'subject107']

x_train = pd.concat((importData(f) for f in subject_files), ignore_index=True).to_numpy()
x_test = importData('subject108').to_numpy()

We've created a modified version of the Tensorflow implementation of TimeGAN at the following location.

In [None]:
!git clone https://github.com/PierceRoberts34/time-gan-tensorflow.git
%cd time-gan-tensorflow

fatal: destination path 'time-gan-tensorflow' already exists and is not an empty directory.
/content/time-gan-tensorflow


In [None]:
from time_gan_tensorflow.model import TimeGAN
from time_gan_tensorflow.plots import plot

# Fit the model to the training data
model = TimeGAN(
    x=x_train,
    timesteps=33,
    hidden_dim=64,
    num_layers=3,
    lambda_param=0.1,
    eta_param=10,
    learning_rate=0.001,
    batch_size=128
)

model.fit(
    epochs=1,
    verbose=True
)

# Reconstruct the test data
x_hat = model.reconstruct(x=x_test)

# Generate the synthetic data
x_sim = model.simulate(samples=len(x_test))

# Plot the actual, reconstructed and synthetic data
fig = plot(actual=x_test, reconstructed=x_hat, synthetic=x_sim)
fig.write_image('results.png', scale=4, height=900, width=700)

epoch: 1 autoencoder_loss: 0.000723 generator_loss: 0.022532 discriminator_loss: 31.884769


In [None]:
print("Real Data")
print(x_train)
print("Synthetic Data")
print(x_sim)
print(x_sim.shape[0])

Real Data
[[ 2.2153    8.27915   5.58753 ]
 [ 2.218     7.14365   5.8993  ]
 [ 2.41148   7.5978    5.93915 ]
 ...
 [-6.28978   7.45238  -0.964473]
 [-6.37124   7.37574  -1.11938 ]
 [-6.48648   7.3004   -1.15934 ]]
Synthetic Data
[[-3.73133965  4.3984757   3.93543995]
 [-1.93705991  5.12304404  3.50122751]
 [-0.45274337  6.18154857  3.1091209 ]
 ...
 [-5.49979169  9.25468904  1.35935439]
 [-5.52432358  9.26274894  1.32834986]
 [-5.54226999  9.26742486  1.29372431]]
60588


In [None]:
synthetic_df = pd.DataFrame(x_sim)
index = 0

In [None]:
# Generate new data
append = synthetic_df[index:index+5]
index += 5
print(append)
append.to_csv('/content/drive/MyDrive/Sensor Data/Test Data/sensor_data.csv', mode='a', header=None, index=False)

           0          1         2
10 -3.372252  10.560915  0.939347
11 -3.835561  10.731901  0.860057
12 -4.248534  10.872684  0.792274
13 -4.516910  11.003277  0.715012
14 -4.646976  11.129845  0.625978
