In [8]:
# Import the necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

In [20]:
# Load the processed data file
processed_data = pd.read_csv("Processed Data/Aruba_17/processed_data.csv")

In [21]:
# Create mapping for Status column
def create_status_map(value):
    if value[0] == 'M':
        status_map = {'ON': 0, 'OFF': 1}
        return status_map[value[1:]]
    elif value[0] == 'D':
        status_map = {'OPEN': 0, 'CLOSE': 1}
        return status_map[value[1:]]
    else:
        return float(value)

processed_data['Status'] = processed_data['Status'].apply(create_status_map)

# Create mapping for Device ID column
def device_id_to_numeric(device_id):
    if device_id.startswith("M"):
        return int("0" + device_id[1:])
    elif device_id.startswith("T"):
        return int("1" + device_id[1:])
    elif device_id.startswith("D"):
        return int("2" + device_id[1:])
    else:
        return None

processed_data['Device ID'] = processed_data['Device ID'].apply(device_id_to_numeric)


# Create mapping for Activity column
activity_map = {'Meal_Preparation': 0, 'Relax': 1, 'Eating': 2, 'Work': 3, 'Sleeping': 4, 'Wash_Dishes': 5, 'Bed_to_Toilet': 6, 'Enter_Home': 7, 'Leave_Home': 8, 'Housekeeping': 9, 'Respirate': 10}
processed_data['Activity'] = processed_data['Activity'].map(activity_map)

# Create mapping for Activity Status column
activity_status_map = {'begin': 0, 'end': 1}
processed_data['Activity Status'] = processed_data['Activity Status'].map(activity_status_map)

# Convert the columns to float
processed_data['Timestamp'] = processed_data['Timestamp'].astype(float)
processed_data['Device ID'] = processed_data['Device ID'].astype(float)
processed_data['Status'] = processed_data['Status'].astype(float)
processed_data['Activity'] = processed_data['Activity'].astype(float)
processed_data['Activity Status'] = processed_data['Activity Status'].astype(float)

processed_data.to_csv('processed_data_converted.csv', index=False)

ValueError: could not convert string to float: 'ON'

In [10]:
# Split the data into training and validation sets
train_data = processed_data.iloc[:int(len(processed_data)*0.8), :].values.astype(np.float32)
val_data = processed_data.iloc[int(len(processed_data)*0.8):, :].values.astype(np.float32)

# Define the inputs for the VAE model
inputs = keras.Input(shape=(train_data.shape[1],))

# Define the encoder layer
encoder = keras.layers.Dense(64, activation="relu")(inputs)
encoder = keras.layers.Dense(32, activation="relu")(encoder)
latent = keras.layers.Dense(16, activation="relu")(encoder)

# Define the decoder layer
decoder = keras.layers.Dense(32, activation="relu")(latent)
decoder = keras.layers.Dense(64, activation="relu")(decoder)
outputs = keras.layers.Dense(train_data.shape[1], activation="sigmoid")(decoder)

# Create the VAE model
vae = keras.Model(inputs, outputs)

# Compile the VAE model
vae.compile(loss="mean_squared_error", optimizer="adam")

# Fit the VAE model on the training data
history = vae.fit(train_data, train_data, epochs=100, batch_size=32, validation_data=(val_data, val_data))

# Generate fake datasets
fake_datasets = vae.predict(processed_data.values.astype(np.float32))

# Write the generated fake datasets to files
for i in range(len(fake_datasets)):
    fake_dataset = pd.DataFrame(fake_datasets[i], columns=processed_data.columns)
    fake_dataset.to_csv(f"Fake Data/fake_dataset_{i}.csv", index=False)

ValueError: could not convert string to float: 'M003'