In [8]:
# Import the necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.layers import Input, Dense, Lambda
from keras.models import Model
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.losses import binary_crossentropy
from keras import backend as K
# from tensorflow.python.framework.ops import disable_eager_execution
# disable_eager_execution()


In [11]:
# Load the processed data file
processed_data = pd.read_csv("Processed Data/Aruba_17/processed_data.csv")
# make processed_data only be the first 1000 rows
processed_data = processed_data.head(1000)

# Create mapping for Status column
def map_status(row):
    if row['Device ID'][0] == 'M':
        status_map = {'ON': 1, 'OFF': 0}
        return status_map.get(row['Status'], -1)
    elif row['Device ID'][0] == 'T':
        return row['Status']
    else:
        status_map = {'OPEN': 1, 'CLOSE': 0}
        return status_map.get(row['Status'], -1)

processed_data['Status'] = processed_data.apply(map_status, axis=1)

# Create mapping for Device ID column
def device_id_to_numeric(device_id):
    if device_id.startswith("M"):
        return int("1" + device_id[1:])
    elif device_id.startswith("T"):
        return int("2" + device_id[1:])
    elif device_id.startswith("D"):
        return int("3" + device_id[1:])
    else:
        return None

processed_data['Device ID'] = processed_data['Device ID'].apply(device_id_to_numeric)


# Create mapping for Activity column
activity_map = {'Meal_Preparation': 0, 'Relax': 1, 'Eating': 2, 'Work': 3, 'Sleeping': 4, 'Wash_Dishes': 5, 'Bed_to_Toilet': 6, 'Enter_Home': 7, 'Leave_Home': 8, 'Housekeeping': 9, 'Respirate': 10}
processed_data['Activity'] = processed_data['Activity'].map(activity_map)

# Create mapping for Activity Status column
activity_status_map = {'begin': 1, 'end': 0}
processed_data['Activity Status'] = processed_data['Activity Status'].map(activity_status_map)

# Convert the columns to float
processed_data['Timestamp'] = processed_data['Timestamp'].astype(float)
processed_data['Device ID'] = processed_data['Device ID'].astype(float)
processed_data['Status'] = processed_data['Status'].astype(float)
processed_data['Activity'] = processed_data['Activity'].astype(float)
processed_data['Activity Status'] = processed_data['Activity Status'].astype(float)

processed_data = processed_data.fillna(-1)
processed_data = processed_data.astype(float)

processed_data.to_csv('processed_data_converted.csv', index=False)
# print(processed_data)

In [12]:
# newest attempt:

pd.options.display.max_rows = None
pd.options.display.max_columns = None



# Load the original dataset
processed_data = pd.read_csv('processed_data_converted.csv')

# Extract the relevant columns from the dataset
timestamp = processed_data['Timestamp'].values
device_id = processed_data['Device ID'].values
status = processed_data['Status'].values
activity = processed_data['Activity'].values
activity_status = processed_data['Activity Status'].values

# Prepare the data for input into the VAE model
X = np.stack((timestamp, device_id, status, activity, activity_status), axis=1)

# Normalize the data using minMaxScaler
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
batch_size = 32
validation_split = 0.2

X_train, X_val, y_train, y_val = train_test_split(X, X, test_size=validation_split, shuffle=False)
X_train = tf.constant(X_train, dtype=tf.float32)
X_val = tf.constant(X_val, dtype=tf.float32)

latent_dim = 2
encoding_dim = 32
input_shape = (X_train.shape[1],)

# Set the input shape for the VAE model
inputs = Input(shape=input_shape, name='encoder_input')
x = Dense(encoding_dim, activation='relu')(inputs)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()

latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(encoding_dim, activation='relu')(latent_inputs)
outputs = Dense(input_shape[0], activation='sigmoid')(x)
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()

outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae')

reconstruction_loss = binary_crossentropy(inputs, outputs)
reconstruction_loss *= input_shape[0]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)

vae.add_loss(vae_loss)
vae.compile(optimizer='adam')
vae.summary()

batch_size = 32
num_samples = X_train.shape[0]
steps_per_epoch = num_samples // batch_size
history = vae.fit(X_train, y_train, epochs=100, batch_size=batch_size, validation_data=(X_val, y_val), steps_per_epoch=steps_per_epoch)

# Generate a fake dataset using the VAE model
predicted_values = vae.predict(X)

# Save the prediction data to a new file 'predicted_Data.csv'
predicted_data = pd.DataFrame(predicted_values, columns=['Timestamp', 'Device ID', 'Status', 'Activity', 'Activity Status'])
predicted_data.to_csv('predicted_Data.csv', index=False)


Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_input (InputLayer)     [(None, 5)]          0           []                               
                                                                                                  
 dense_17 (Dense)               (None, 32)           192         ['encoder_input[0][0]']          
                                                                                                  
 z_mean (Dense)                 (None, 2)            66          ['dense_17[0][0]']               
                                                                                                  
 z_log_var (Dense)              (None, 2)            66          ['dense_17[0][0]']               
                                                                                            