# VAE training and processing

Sample code to train a new VAE and run the CSI processing.

In [25]:
import os
import math
import string
import pickle
import tensorflow as tf
import tf_keras as keras
import numpy as np
import scipy.io as sio

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings("ignore")

In [26]:
os.environ["TF_USE_LEGACY_KERAS"]="1"
ANTENNAS = 1
BATCH_SIZE = 25

antenna = 0  # if ANTENNAS==1, this value selects the antenna ID (from 0 to 3)
latent_dim = 2
num_activities = 5
folder_name = f'models/single_antenna_{antenna}'

base_directory = './models'
saveGraph = False

## CSI data generator

In [27]:
class CsiDataGenerator(keras.utils.Sequence):
    def __init__(self, files, num_samples=12000, window_size=450, antennas=1, batch_size=25, antenna_select=0):
        if antennas == 1:
            self.csi = tf.zeros([0, 2048], dtype=tf.float32)
        else:
            self.csi = tf.zeros([0, 2048, antennas], dtype=tf.float32)

        self.labels = tf.zeros([0], dtype=tf.int32)
        self.indices = tf.zeros([0], dtype=tf.int32)
        self.window_size = window_size
        self.batch_size = batch_size
        self.antennas = antennas

        for file in files:
            # Load CSI data from MATLAB file
            mat = sio.loadmat(file)      # WARNING This code does not handle exceptions for simplicity...
            data = np.array(mat['csi'])  # ...exceptions would require keeping track of indices
            if self.antennas == 1:
                data = data[range(num_samples), ..., int(antenna_select)]
            data = np.round(np.abs(data))
            index_offset = self.csi.shape[0]
            activity_label = files.index(file)  # Labels depend on file index

            # Cast CSI data into temporary TF tensors for building the dataset
            csi = tf.convert_to_tensor(data, dtype=tf.float32)
            labels = tf.convert_to_tensor(activity_label * np.ones(num_samples - window_size), dtype=tf.int32)
            indices = tf.convert_to_tensor(tf.range(index_offset, index_offset + num_samples - window_size),
                                           dtype=tf.int32)

            # Concatenate to the previous tensors
            self.csi = tf.concat([self.csi, csi], axis=0)
            self.labels = tf.concat([self.labels, labels], axis=0)
            self.indices = tf.concat([self.indices, indices], axis=0)

        # Normalize the CSI dataset
        if self.antennas == 1:
            self.csi = tf.math.divide(self.csi, tf.math.reduce_max(self.csi, axis=(0, 1)))
        else:
            self.csi = tf.math.divide(self.csi, tf.math.reduce_max(self.csi, axis=(0, 1, 2)))

    def __len__(self):
        return int(np.ceil(self.indices.shape[-1] / float(self.batch_size)))

    def __getitem__(self, batch_idx):
        first_idx = batch_idx * self.batch_size
        last_idx = (batch_idx + 1) * self.batch_size

        data_batch = [self.csi[x:x + self.window_size, ...] for x in range(first_idx, last_idx)]
        labels_batch = np.transpose([self.labels[first_idx:last_idx]])

        data_batch = tf.convert_to_tensor(data_batch)
        labels_batch = tf.convert_to_tensor(labels_batch)

        if self.antennas == 1:
            data_batch = tf.expand_dims(data_batch, 3)
            labels_batch = tf.expand_dims(labels_batch, 2)

        return data_batch, labels_batch

## Variational Auto-Encoder

In [28]:
class Sampling(keras.layers.Layer):
    """Takes a couple (z_mean, z_log_var) to draw a sample z from the latent space."""
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon
    
def create_csi_encoder(input_shape, latent_dim):
    encoder_inputs = keras.Input(shape=input_shape)
    x = keras.layers.Conv2D(32, (5, 8), activation='relu', strides=(5, 8), padding='valid')(encoder_inputs)
    x = keras.layers.Conv2D(32, (5, 8), activation='relu', strides=(5, 8), padding='valid')(x)
    x = keras.layers.Conv2D(32, (2, 4), activation='relu', strides=(2, 4), padding='valid')(x)
    x = keras.layers.Flatten()(x)
    x = keras.layers.Dense(16, activation='relu')(x)

    z_mean = keras.layers.Dense(latent_dim, name='z_mean')(x)
    z_log_var = keras.layers.Dense(latent_dim, name='z_log_var')(x)
    z = Sampling()([z_mean, z_log_var])

    return keras.Model(encoder_inputs, [z_mean, z_log_var, z], name='encoder')


def create_csi_decoder(input_shape, latent_dim, out_filter):
    decoder_inputs = keras.Input(shape=(latent_dim,))
    x = keras.layers.Dense(math.prod(input_shape), activation='relu')(decoder_inputs)
    x = keras.layers.Reshape(input_shape)(x)
    x = keras.layers.Conv2DTranspose(32, (2, 4), activation='relu', strides=(2, 4), padding='same')(x)
    x = keras.layers.Conv2DTranspose(32, (5, 8), activation='relu', strides=(5, 8), padding='same')(x)
    x = keras.layers.Conv2DTranspose(32, (5, 8), activation='relu', strides=(5, 8), padding='same')(x)
    decoder_outputs = keras.layers.Conv2DTranspose(out_filter, out_filter, activation='sigmoid', padding='same')(x)

    return keras.Model(decoder_inputs, decoder_outputs, name='decoder')

In [29]:
class VAE(keras.Model):
    def __init__(self, enc_input_shape=(450, 2048, 1), dec_input_shape=(9, 8, 32), latent_dim=2, **kwargs):
        super().__init__(**kwargs)
        self.encoder = create_csi_encoder(enc_input_shape, latent_dim)
        self.decoder = create_csi_decoder(dec_input_shape, latent_dim, enc_input_shape[-1])
        self.total_loss_tracker = keras.metrics.Mean(name='total_loss')
        self.reconstruction_loss_tracker = keras.metrics.Mean(name='reconstruction_loss')
        self.kl_loss_tracker = keras.metrics.Mean(name='kl_loss')

        self.encoder.summary()
        self.decoder.summary()

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data[0])
            reconstruction = self.decoder(z)

            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.binary_crossentropy(data[0], reconstruction), axis=(1, 2)
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss

        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)

        return {
            'loss': self.total_loss_tracker.result(),
            'reconstruction_loss': self.reconstruction_loss_tracker.result(),
            'kl_loss': self.kl_loss_tracker.result(),
        }

    def call(self, inputs, training=None, mask=None):
        pass


In [30]:
checkpoint_path = f'./{folder_name}/' + 'cp-{epoch:04d}.ckpt'
checkpoint_dir = os.path.dirname(checkpoint_path)
checkpoint_cb = keras.callbacks.ModelCheckpoint(checkpoint_path, verbose=1, save_weights_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(monitor='loss', patience=3)
csv_logger_cb = keras.callbacks.CSVLogger(f'./{folder_name}/model_history_log.csv', append=True)

Training the VAEs from scratch can take a very long time, so it is advised to load the pre-trained models.

In [31]:
load_pretrained_models = True

if load_pretrained_models:
    print('Loading pretrained models...')
    """
    !wget https://zenodo.org/record/7983057/files/VAE_models.zip
    !unzip -o VAE_models.zip
    !rm VAE_models.zip
    """
else:
    # Train from scratch
    #!mkdir {folder_name}
    vae = VAE()
    vae.compile(optimizer=keras.optimizers.Adam())
    vae.save_weights(checkpoint_path.format(epoch=0))
    vae.fit(csi_generator, epochs=20, shuffle=True,
            callbacks=[checkpoint_cb, early_stopping_cb, csv_logger_cb])
    vae.save_weights(f'./{folder_name}/weights_vae')

Loading pretrained models...


## Data Pre-Processing

### PCA Compression

In [32]:
def find_n_components(data, target, directory=base_directory, saveGraph=False, plotGraph=True):
    os.makedirs(directory, exist_ok=True)
    #Apply PCA
    pca = PCA()
    pca.fit(data)

    var_cumulative = np.cumsum(pca.explained_variance_ratio_)*100

    #finds PCs that explain 95% of the variance
    num_components = np.argmax(var_cumulative > target) + 1
    print(f"Number of components explaining {target}% variance: "+ str(num_components))

    if plotGraph:
        plt.figure(figsize=(10, 5))
        plt.title('Cumulative Explained Variance explained by the components')
        plt.ylabel('Cumulative Explained variance')
        plt.xlabel('Principal components')
        plt.axvline(x=num_components, color="r", linestyle="--")
        plt.axhline(y=target, color="r", linestyle="--")
        plt.plot(range(1, pca.n_components_ + 1), var_cumulative, marker='o', linestyle='--')
        plt.grid()
        if (saveGraph):
            graph_path = os.path.join(directory, 'var_cumulative_x_component.png')
            plt.savefig(graph_path)
            print("Graph saved in: ", graph_path)
        plt.show()

    return num_components

def analyze_PCA(data, n_components, directory=base_directory, saveGraph=False, plotGraph=True):
    os.makedirs(directory, exist_ok=True)
    pca = PCA(n_components=n_components)
    reduced_data = pca.fit_transform(data)

    reduced_df = pd.DataFrame(data=reduced_data, columns=[f'PC{i}' for i in range(n_components)])

    #Explained variance ratio
    explained_variance_ratio = pca.explained_variance_ratio_
    print("Explained variance ratio:", explained_variance_ratio)

    #Cumulative explained variance
    cumulative_explained_variance = np.cumsum(explained_variance_ratio)
    print("Final Cumulative Explained Variance:", cumulative_explained_variance[-1])

    if (plotGraph):
        plt.figure(figsize=(10, 5))
        plt.plot(range(1, n_components + 1), cumulative_explained_variance, marker='o', linestyle='--')
        plt.title('Cumulative Explained Variance by PCA Components')
        plt.xlabel('Number of Principal Components')
        plt.ylabel('Cumulative Explained Variance')
        plt.grid()
        if (saveGraph):
            graph_path = os.path.join(directory, 'cumulative_explained_variance.png')
            plt.savefig(graph_path)
            print("Graph saved in: ", graph_path)
        plt.show()
    
    return reduced_df, pca

def reconstruct_data(df, pca, columns):
    df_reconstructed = pca.inverse_transform(df.values)
    df_reconstructed = pd.DataFrame(df_reconstructed, columns=columns)    
    return df_reconstructed

### Quantization

In [33]:
def lloyd_max_quantization(data, num_levels=16, max_iter=100, delta=1e-6):
    min_val = np.min(data)
    max_val = np.max(data)
    clusters = np.linspace(min_val, max_val, num_levels) #Uniformly spaced 

    for _ in range(max_iter):
        thresholds = (clusters[:-1] + clusters[1:]) / 2 #Defines intervals of clusters
        indices = np.digitize(data, thresholds) #Assign each data point to a cluster
        
        new_clusters = np.array([data[indices == i].mean() for i in range(num_levels)]) #Update clusters to better represent the data
        
        empty_clusters = np.isnan(new_clusters) #Restore previous cluster if empty
        new_clusters[empty_clusters] = clusters[empty_clusters] 

        #stop if changes between iterations are small
        if np.max(np.abs(new_clusters - clusters)) < delta:
            break

        clusters = new_clusters

    #Quantize the data based on the final clusters
    quantized_data = clusters[indices]

    return quantized_data, clusters, thresholds

def dequantize_lloyd_max(quantized_data, clusters, thresholds):
    indices = np.digitize(quantized_data, thresholds, right=True)
    return clusters[indices]

def apply_quantization(reduced_df, lvls):
    df_quantized = reduced_df.apply(lambda col: lloyd_max_quantization(col.values, num_levels=lvls)[0])
    return df_quantized

## Iterative Runs

In [10]:
targets = [80]
levels = [16, 32, 64, 128] 

file_list = [f'./dataset/S1a_{x}.mat' for x in string.ascii_uppercase[:num_activities]]
csi_generator = CsiDataGenerator(file_list, batch_size=BATCH_SIZE, antenna_select=antenna)

csi_data = csi_generator.csi.numpy()
csi_subcarriers = [f"Ampl_{i}" for i in range(1024)] + [f"Ampl_{-i}" for i in range(1, 1025)]

df_csi_data_original = pd.DataFrame(csi_data, columns=csi_subcarriers)

In [None]:
for target in targets:
    print(f"-------------- {target}% ----------------------")
    directory = f'./results/{target}%/dumps'
    os.makedirs(directory, exist_ok=True)
    df_csi_data = df_csi_data_original.copy()
    #Apply PCA
    
    """num_components = find_n_components(df_csi_data, target, directory=directory, saveGraph=True, plotGraph=True)
    df_reduced, pca = analyze_PCA(df_csi_data, num_components, directory=directory, saveGraph=True, plotGraph=True)"""

    for num_levels in levels:
        print(f"-------------- {target}% {num_levels} lvls --------------")
        #Quantize the data
        """df_quantized = apply_quantization(df_reduced, num_levels)

        #Reconstruct the data
        df_reconstructed = reconstruct_data(df_quantized, pca, csi_subcarriers)
        df_reconstructed = df_reconstructed.to_numpy()
        reconstructed_csi_data = tf.convert_to_tensor(df_reconstructed)
        #csi_generator.csi = reconstructed_csi_data"""

        #Use the VAE to process CSI data
        z_data = np.zeros([0, 4])
        z_labels = np.zeros([0])

        vae = VAE(enc_input_shape=(450, 2048, ANTENNAS))
        vae.compile(optimizer=keras.optimizers.Adam())
        vae.load_weights(f'./{folder_name}/weights_vae').expect_partial()

        for (data, labels) in csi_generator:
            labels = tf.squeeze(labels)
            z_mean, z_log_var, _ = vae.encoder.predict(data, verbose=0)
            z_tmp = np.concatenate([z_mean, z_log_var], axis=1)
            z_data = np.concatenate([z_data, z_tmp], axis=0)
            z_labels = np.concatenate([z_labels, labels], axis=0)

        # Store the latent space representation of CSI data to file.
        sub_dir=os.path.join(directory, f'{target}%_{num_levels}lvls_single_antenna_{antenna}.pkl')
        with open(sub_dir, 'wb') as f:
            pickle.dump([z_data, z_labels], f)

## Single Run

In [None]:
file_list = [f'./dataset/S1a_{x}.mat' for x in string.ascii_uppercase[:num_activities]]
csi_generator = CsiDataGenerator(file_list, batch_size=BATCH_SIZE, antenna_select=antenna)

target = 90
num_levels = 8

In [None]:
csi_data = csi_generator.csi.numpy()
csi_subcarriers = [f"Ampl_{i}" for i in range(1024)] + [f"Ampl_{-i}" for i in range(1, 1025)]

df_csi_data = pd.DataFrame(csi_data, columns=csi_subcarriers)

In [None]:
num_components = find_n_components(df_csi_data, target, plotGraph=False)
df_reduced, pca = analyze_PCA(df_csi_data, num_components, plotGraph=False)

In [None]:
df_quantized = apply_quantization(df_reduced, num_levels)

In [None]:
df_reconstructed = reconstruct_data(df_quantized, pca, csi_subcarriers)

df_reconstructed = df_reconstructed.to_numpy()
reconstructed_csi_data = tf.convert_to_tensor(df_reconstructed)

print('Original csi data shape:', df_csi_data.shape)
print('PCA df shape:', df_reduced.shape)
print('Reconstructed csi data shape:', reconstructed_csi_data.shape)

csi_generator.csi = reconstructed_csi_data

In [None]:
#Use the VAE to process CSI data
z_data = np.zeros([0, 4])
z_labels = np.zeros([0])

vae = VAE(enc_input_shape=(450, 2048, ANTENNAS))
vae.compile(optimizer=keras.optimizers.Adam())
vae.load_weights(f'./{folder_name}/weights_vae').expect_partial()

for (data, labels) in csi_generator:
    labels = tf.squeeze(labels)
    z_mean, z_log_var, _ = vae.encoder.predict(data, verbose=0)
    z_tmp = np.concatenate([z_mean, z_log_var], axis=1)
    z_data = np.concatenate([z_data, z_tmp], axis=0)
    z_labels = np.concatenate([z_labels, labels], axis=0)

In [None]:
# Store the latent space representation of CSI data to file.
with open(f'./{folder_name}_reconstructed.pkl', 'wb') as f:
    pickle.dump([z_data, z_labels], f)

aaaaaaaaaaaaaaaaaaaa