In [1]:
from genetic_utils import *

# mnist 06

In [None]:
data, lables = csv_data_loader("mnist-06")

In [None]:
data.shape

In [None]:
lables

## VAE

In [4]:

def train_VAE(data,
              latent_dim = 2,
              hidden_layer_n = [20,18,16],
              num_dims = 10,
              kl_loss_factor = 0.01,
              epochs = 100,
              batch_size = 128
              ):


    """
    Training the VAE on the data
    """

    class Sampling(layers.Layer):
        """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

        def call(self, inputs):
            z_mean, z_log_var = inputs
            batch = tf.shape(z_mean)[0]
            dim = tf.shape(z_mean)[1]
            epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
            return z_mean + tf.exp(0.5 * z_log_var) * epsilon
    
    latent_dim = latent_dim

    encoder_inputs = keras.Input(shape=(num_dims,))
    x = layers.Dense(num_dims, activation="tanh")(encoder_inputs)
    x = layers.Dense(hidden_layer_n[0], activation="tanh")(x)
    x = layers.Dense(hidden_layer_n[1], activation="tanh")(x)
    x = layers.Dense(hidden_layer_n[2], activation="tanh")(x)
    z_mean = layers.Dense(latent_dim, name="z_mean")(x)
    z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
    z = Sampling()([z_mean, z_log_var])
    encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")

    latent_inputs = keras.Input(shape=(latent_dim,))
    x = layers.Dense(hidden_layer_n[2], activation="tanh")(latent_inputs)
    x = layers.Dense(hidden_layer_n[1], activation="tanh")(x)
    x = layers.Dense(hidden_layer_n[0], activation="tanh")(x)
    decoder_outputs = layers.Dense(num_dims, activation="linear")(x)
    decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")

    class VAE(keras.Model):
        def __init__(self, encoder, decoder, **kwargs):
            super().__init__(**kwargs)
            self.encoder = encoder
            self.decoder = decoder
            self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
            self.reconstruction_loss_tracker = keras.metrics.Mean(
                name="reconstruction_loss"
            )
            self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

        @property
        def metrics(self):
            return [
                self.total_loss_tracker,
                self.reconstruction_loss_tracker,
                self.kl_loss_tracker,
            ]

        def train_step(self, data):
            with tf.GradientTape() as tape:
                z_mean, z_log_var, z = self.encoder(data)
                reconstruction = self.decoder(z)
                reconstruction_loss = tf.keras.losses.MeanSquaredError()(data,reconstruction)
                kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
                total_loss = reconstruction_loss + kl_loss_factor * kl_loss
        
            grads = tape.gradient(total_loss, self.trainable_weights)
            self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
            self.total_loss_tracker.update_state(total_loss)
            self.reconstruction_loss_tracker.update_state(reconstruction_loss)
            self.kl_loss_tracker.update_state(kl_loss)
            return {
                "loss": self.total_loss_tracker.result(),
                "reconstruction_loss": self.reconstruction_loss_tracker.result(),
                "kl_loss": self.kl_loss_tracker.result(),
            }
    
    creditdata = np.concatenate([data], axis=0)
    creditdata = np.expand_dims(creditdata, -1).astype("float32")

    vae = VAE(encoder, decoder)
    vae.compile(optimizer=tf.keras.optimizers.Adam())
    history = vae.fit(creditdata,epochs=epochs,batch_size=batch_size,verbose=1)

    return vae, history

In [None]:
vae, history = train_VAE(data, num_dims=784, hidden_layer_n=[512, 256, 128] )

In [None]:
sample = data[3, :].reshape([1, 784])

z_mean, z_log_var, z = vae.encoder(sample.astype('float32'))
reconstruction = vae.decoder(z)

reconstruction_loss = tf.keras.losses.MeanSquaredError()(sample.astype('float32'),reconstruction)
        

In [None]:
reconstruction_loss.numpy()

In [16]:
def vae_detect_outliers(data,
                        vae_model,
                        num_dims
                        ):

    data_mean = []

    for i in range(data.shape[0]):
            
        sample = data[i,:].reshape([1, num_dims])
        sample = sample.astype('float32')

        z_mean, z_log_var, z = vae_model.encoder(sample)
        reconstruction = vae_model.decoder(z)

        reconstruction_loss = tf.keras.losses.MeanSquaredError()(sample,reconstruction)
        
        data_mean.append(reconstruction_loss)
    
    data_mean = np.array(data_mean)
    i_mean = np.mean(data_mean)
    i_std = np.std(data_mean)
    

    threshold = i_mean + 2*i_std

    classes = []

    for i in range(data.shape[0]):
            
        sample = data[i,:].reshape([1,num_dims])
        sample = sample.astype('float32')

        z_mean, z_log_var, z = vae_model.encoder(sample)
        reconstruction = vae_model.decoder(z)

        reconstruction_loss = tf.keras.losses.MeanSquaredError()(sample,reconstruction)
        
        if reconstruction_loss > threshold:
            
            classes.append(1)
            
        else:
            
            classes.append(0)

    classes = np.array(classes)

    return classes

In [None]:
classes = vae_detect_outliers(data, vae, 784)

In [None]:
np.sum(classes)

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np
# sklearn.metrics.confusion_matrix(lables, 1-classes, labels=None, sample_weight=None, normalize=None)
true_labels = 1- lables
predicted_labels = classes

cm = confusion_matrix(true_labels, predicted_labels)

# Normalize the confusion matrix to get percentages
cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

print("Confusion Matrix (Percentage):")
print(cm_percentage)

In [None]:
cm

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

true_labels = 1- lables
predicted_labels = classes

precision = precision_score(true_labels, predicted_labels)

# Calculate recall
recall = recall_score(true_labels, predicted_labels)

# Calculate F1 score
f1 = f1_score(true_labels, predicted_labels)

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

In [None]:
mat[:, 0] = mat[:, 0]/(mat[0, 0]+ mat[1, 0])
mat[:, 1] = mat[:, 1]/(mat[0, 1]+ mat[1, 1])

In [None]:
mat

## RAE

In [None]:
def train_RAE(data,
              latent_dim = 2,
              hidden_layer_n = [20,18,16],
              num_dims = 10,
              z_loss_w = 0.01,
              REG_loss_w = 0.01,
              epochs = 100,
              batch_size = 128
              ):


    """
    Training the RAE on the data
    """


    encoder_inputs = keras.Input(shape=(num_dims,))
    x = layers.Dense(hidden_layer_n[0], activation="sigmoid")(encoder_inputs)
    x = layers.Dense(hidden_layer_n[1], activation="sigmoid")(x)
    x = layers.Dense(hidden_layer_n[2], activation="sigmoid")(x)
    encoder_output = layers.Dense(latent_dim, activation="sigmoid")(x)
    encoder = keras.Model(encoder_inputs, encoder_output, name="encoder")

    latent_inputs = keras.Input(shape=(latent_dim,))
    x = layers.Dense(hidden_layer_n[2], activation="sigmoid")(latent_inputs)
    x = layers.Dense(hidden_layer_n[1], activation="sigmoid")(x)
    x = layers.Dense(hidden_layer_n[0], activation="sigmoid")(x)
    decoder_outputs = layers.Dense(num_dims, activation="linear")(x)
    decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")



    class RAE(keras.Model):
        def __init__(self, encoder, decoder, **kwargs):
            super().__init__(**kwargs)
            self.encoder = encoder
            self.decoder = decoder
            self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
            self.reconstruction_loss_tracker = keras.metrics.Mean(
                name="reconstruction_loss"
            )
            self.z_tracker = keras.metrics.Mean(name="z_loss")
            self.REG_tracker = keras.metrics.Mean(name="REG_loss")

        @property
        def metrics(self):
            return [
                self.total_loss_tracker,
                self.reconstruction_loss_tracker,
                self.z_tracker,
                self.REG_tracker,
            ]

        def train_step(self, data):
            with tf.GradientTape(persistent=True) as tape:
                z = self.encoder(data)
                reconstruction = self.decoder(z)

                reconstruction_loss = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)(data,reconstruction)

                z_loss = K.mean(K.square(z), axis=[1])
        
                REG_loss = K.mean(K.square(K.gradients(K.square(reconstruction), z)))

#                 z_loss_w = z_loss_w
#                 REG_loss_w = REG_loss_w

                total_loss = reconstruction_loss +  z_loss_w * z_loss + REG_loss_w * REG_loss
            
                grads = tape.gradient(total_loss, self.trainable_weights)
                self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
                self.total_loss_tracker.update_state(total_loss)
                self.reconstruction_loss_tracker.update_state(reconstruction_loss)
                self.z_tracker.update_state(z_loss)
                self.REG_tracker.update_state(REG_loss)
                del tape
                return {
                    "loss": self.total_loss_tracker.result(),
                    "reconstruction_loss": self.reconstruction_loss_tracker.result(),
                    "z_loss": self.z_tracker.result(),
                    "REG_loss": self.REG_tracker.result(),
                }

    tdata = np.concatenate([data], axis=0)
    tdata = np.expand_dims(tdata, -1).astype("float32")

    rae = RAE(encoder, decoder)
    rae.compile(optimizer=tf.keras.optimizers.Adam())
    history = rae.fit(tdata, epochs=epochs, batch_size=batch_size, verbose=1)



    return rae, history

In [None]:
rae, history = train_RAE(data, num_dims=784, hidden_layer_n=[512, 256, 128], z_loss_w = 0.01)

In [None]:
sample = data[1, :].reshape([1, 784])

z = rae.encoder(sample.astype('float32'))
reconstruction = rae.decoder(z)

reconstruction_loss = tf.keras.losses.MeanSquaredError()(sample.astype('float32'), reconstruction) 

In [None]:
reconstruction_loss.numpy()

In [None]:
def rae_detect_outliers(data,
                        rae_model,
                        num_dims
                        ):

    data_mean = []

    for i in range(data.shape[0]):
            
        sample = data[i,:].reshape([1,num_dims])
        sample = sample.astype('float32')

        z = rae_model.encoder(sample)
        reconstruction = rae_model.decoder(z)

        reconstruction_loss = tf.keras.losses.MeanSquaredError()(sample, reconstruction)
        
        data_mean.append(reconstruction_loss)
    
    data_mean = np.array(data_mean)
    data_std = np.std(data_mean)

    threshold = data_mean + 3*data_std

    classes = []

    for i in range(data.shape[0]):
            
        sample = data[i,:].reshape([1,num_dims])
        sample = sample.astype('float32')
        
        z = rae_model.encoder(sample)
        reconstruction = rae_model.decoder(z)

        reconstruction_loss = tf.keras.losses.MeanSquaredError()(sample, reconstruction)
        
        if any(reconstruction_loss > threshold):
            
            classes.append(1)
            
        else:
            
            classes.append(0)

    classes = np.array(classes)


    return classes

In [None]:
classes = rae_detect_outliers(data, rae, 784)

In [None]:
np.sum(classes)

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np
# sklearn.metrics.confusion_matrix(lables, 1-classes, labels=None, sample_weight=None, normalize=None)
true_labels = 1 - lables
predicted_labels = classes

cm = confusion_matrix(true_labels, predicted_labels)

# Normalize the confusion matrix to get percentages
cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

print("Confusion Matrix:")
print(cm)

print("Confusion Matrix (Percentage):")
print(cm_percentage)

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

true_labels = 1- lables
predicted_labels = classes

precision = precision_score(true_labels, predicted_labels)

# Calculate recall
recall = recall_score(true_labels, predicted_labels)

# Calculate F1 score
f1 = f1_score(true_labels, predicted_labels)

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

# mnist-25

In [None]:
data, labels = csv_data_loader("mnist-25")

In [None]:
data.shape

In [None]:
labels.shape

## VAE

In [None]:

def train_VAE(data,
              latent_dim = 2,
              hidden_layer_n = [20,18,16],
              num_dims = 10,
              kl_loss_factor = 0.1,
              epochs = 200,
              batch_size = 64
              ):


    """
    Training the VAE on the data
    """

    class Sampling(layers.Layer):
        """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

        def call(self, inputs):
            z_mean, z_log_var = inputs
            batch = tf.shape(z_mean)[0]
            dim = tf.shape(z_mean)[1]
            epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
            return z_mean + tf.exp(0.5 * z_log_var) * epsilon
    
    latent_dim = latent_dim

    encoder_inputs = keras.Input(shape=(num_dims,))
    x = layers.Dense(num_dims, activation="tanh")(encoder_inputs)
    x = layers.Dense(hidden_layer_n[0], activation="tanh")(x)
    x = layers.Dense(hidden_layer_n[1], activation="tanh")(x)
    x = layers.Dense(hidden_layer_n[2], activation="tanh")(x)
    z_mean = layers.Dense(latent_dim, name="z_mean")(x)
    z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
    z = Sampling()([z_mean, z_log_var])
    encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")

    latent_inputs = keras.Input(shape=(latent_dim,))
    x = layers.Dense(hidden_layer_n[2], activation="tanh")(latent_inputs)
    x = layers.Dense(hidden_layer_n[1], activation="tanh")(x)
    x = layers.Dense(hidden_layer_n[0], activation="tanh")(x)
    decoder_outputs = layers.Dense(num_dims, activation="linear")(x)
    decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")

    class VAE(keras.Model):
        def __init__(self, encoder, decoder, **kwargs):
            super().__init__(**kwargs)
            self.encoder = encoder
            self.decoder = decoder
            self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
            self.reconstruction_loss_tracker = keras.metrics.Mean(
                name="reconstruction_loss"
            )
            self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

        @property
        def metrics(self):
            return [
                self.total_loss_tracker,
                self.reconstruction_loss_tracker,
                self.kl_loss_tracker,
            ]

        def train_step(self, data):
            with tf.GradientTape() as tape:
                z_mean, z_log_var, z = self.encoder(data)
                reconstruction = self.decoder(z)
                reconstruction_loss = tf.keras.losses.MeanSquaredError()(data,reconstruction)
                kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
                total_loss = reconstruction_loss + kl_loss_factor * kl_loss
        
            grads = tape.gradient(total_loss, self.trainable_weights)
            self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
            self.total_loss_tracker.update_state(total_loss)
            self.reconstruction_loss_tracker.update_state(reconstruction_loss)
            self.kl_loss_tracker.update_state(kl_loss)
            return {
                "loss": self.total_loss_tracker.result(),
                "reconstruction_loss": self.reconstruction_loss_tracker.result(),
                "kl_loss": self.kl_loss_tracker.result(),
            }
    
    creditdata = np.concatenate([data], axis=0)
    creditdata = np.expand_dims(creditdata, -1).astype("float32")

    vae = VAE(encoder, decoder)
    vae.compile(optimizer=tf.keras.optimizers.Adam())
    history = vae.fit(creditdata,epochs=epochs,batch_size=batch_size,verbose=1)

    return vae, history

In [None]:
vae, history = train_VAE(data, num_dims=784, hidden_layer_n=[512, 256, 128] )

In [None]:
sample = data[1, :].reshape([1, 784])

z_mean, z_log_var, z = vae.encoder(sample.astype('float32'))
reconstruction = vae.decoder(z)

reconstruction_loss = tf.keras.losses.MeanSquaredError()(sample.astype('float32'),reconstruction)
        

In [None]:
reconstruction_loss.numpy()

In [None]:
labels

In [None]:
def vae_detect_outliers(data,
                        vae_model,
                        num_dims
                        ):

    data_mean = []

    for i in range(data.shape[0]):
            
        sample = data[i,:].reshape([1, num_dims])
        sample = sample.astype('float32')

        z_mean, z_log_var, z = vae_model.encoder(sample)
        reconstruction = vae_model.decoder(z)

        reconstruction_loss = tf.keras.losses.MeanSquaredError()(sample,reconstruction)
        
        data_mean.append(reconstruction_loss)
    
    data_mean = np.array(data_mean)
    i_mean = np.mean(data_mean)
    i_std = np.std(data_mean)
    

    threshold = i_mean + 2*i_std

    classes = []

    for i in range(data.shape[0]):
            
        sample = data[i,:].reshape([1,num_dims])
        sample = sample.astype('float32')

        z_mean, z_log_var, z = vae_model.encoder(sample)
        reconstruction = vae_model.decoder(z)

        reconstruction_loss = tf.keras.losses.MeanSquaredError()(sample,reconstruction)
        
        if reconstruction_loss > threshold:
            
            classes.append(1)
            
        else:
            
            classes.append(0)

    classes = np.array(classes)

    return classes

In [None]:
classes = vae_detect_outliers(data, vae, 784)

In [None]:
np.sum(classes)

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np
# sklearn.metrics.confusion_matrix(lables, 1-classes, labels=None, sample_weight=None, normalize=None)
true_labels = 1- labels
predicted_labels = classes

cm = confusion_matrix(true_labels, predicted_labels)

# Normalize the confusion matrix to get percentages
cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

print("Confusion Matrix:")
print(cm)

print("Confusion Matrix (Percentage):")
print(cm_percentage)

## RAE

In [20]:
def train_RAE(data,
              latent_dim = 2,
              hidden_layer_n = [20,18,16],
              num_dims = 10,
              z_loss_w = 0.01,
              REG_loss_w = 0.01,
              epochs = 100,
              batch_size = 128
              ):


    """
    Training the RAE on the data
    """


    encoder_inputs = keras.Input(shape=(num_dims,))
    x = layers.Dense(hidden_layer_n[0], activation="sigmoid")(encoder_inputs)
    x = layers.Dense(hidden_layer_n[1], activation="sigmoid")(x)
    x = layers.Dense(hidden_layer_n[2], activation="sigmoid")(x)
    encoder_output = layers.Dense(latent_dim, activation="sigmoid")(x)
    encoder = keras.Model(encoder_inputs, encoder_output, name="encoder")

    latent_inputs = keras.Input(shape=(latent_dim,))
    x = layers.Dense(hidden_layer_n[2], activation="sigmoid")(latent_inputs)
    x = layers.Dense(hidden_layer_n[1], activation="sigmoid")(x)
    x = layers.Dense(hidden_layer_n[0], activation="sigmoid")(x)
    decoder_outputs = layers.Dense(num_dims, activation="linear")(x)
    decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")



    class RAE(keras.Model):
        def __init__(self, encoder, decoder, **kwargs):
            super().__init__(**kwargs)
            self.encoder = encoder
            self.decoder = decoder
            self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
            self.reconstruction_loss_tracker = keras.metrics.Mean(
                name="reconstruction_loss"
            )
            self.z_tracker = keras.metrics.Mean(name="z_loss")
            self.REG_tracker = keras.metrics.Mean(name="REG_loss")

        @property
        def metrics(self):
            return [
                self.total_loss_tracker,
                self.reconstruction_loss_tracker,
                self.z_tracker,
                self.REG_tracker,
            ]

        def train_step(self, data):
            with tf.GradientTape(persistent=True) as tape:
                z = self.encoder(data)
                reconstruction = self.decoder(z)

                reconstruction_loss = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)(data,reconstruction)

                z_loss = K.mean(K.square(z), axis=[1])
        
                REG_loss = K.mean(K.square(K.gradients(K.square(reconstruction), z)))

#                 z_loss_w = z_loss_w
#                 REG_loss_w = REG_loss_w

                total_loss = reconstruction_loss +  z_loss_w * z_loss + REG_loss_w * REG_loss
            
                grads = tape.gradient(total_loss, self.trainable_weights)
                self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
                self.total_loss_tracker.update_state(total_loss)
                self.reconstruction_loss_tracker.update_state(reconstruction_loss)
                self.z_tracker.update_state(z_loss)
                self.REG_tracker.update_state(REG_loss)
                del tape
                return {
                    "loss": self.total_loss_tracker.result(),
                    "reconstruction_loss": self.reconstruction_loss_tracker.result(),
                    "z_loss": self.z_tracker.result(),
                    "REG_loss": self.REG_tracker.result(),
                }

    tdata = np.concatenate([data], axis=0)
    tdata = np.expand_dims(tdata, -1).astype("float32")

    rae = RAE(encoder, decoder)
    rae.compile(optimizer=tf.keras.optimizers.Adam())
    history = rae.fit(tdata, epochs=epochs, batch_size=batch_size, verbose=1)



    return rae, history

In [None]:
rae, history = train_RAE(data, num_dims=784, hidden_layer_n=[512, 256, 128], z_loss_w = 0.01)

In [None]:
sample = data[-1, :].reshape([1, 784])

z = rae.encoder(sample.astype('float32'))
reconstruction = rae.decoder(z)

reconstruction_loss = tf.keras.losses.MeanSquaredError()(sample.astype('float32'), reconstruction) 

In [None]:
reconstruction_loss.numpy()

In [22]:
def rae_detect_outliers(data,
                        rae_model,
                        num_dims
                        ):

    data_mean = []

    for i in range(data.shape[0]):
            
        sample = data[i,:].reshape([1,num_dims])
        sample = sample.astype('float32')

        z = rae_model.encoder(sample)
        reconstruction = rae_model.decoder(z)

        reconstruction_loss = tf.keras.losses.MeanSquaredError()(sample, reconstruction)
        
        data_mean.append(reconstruction_loss)
    
    data_mean = np.array(data_mean)
    data_std = np.std(data_mean)

    threshold = data_mean + 3*data_std

    classes = []

    for i in range(data.shape[0]):
            
        sample = data[i,:].reshape([1,num_dims])
        sample = sample.astype('float32')
        
        z = rae_model.encoder(sample)
        reconstruction = rae_model.decoder(z)

        reconstruction_loss = tf.keras.losses.MeanSquaredError()(sample, reconstruction)
        
        if any(reconstruction_loss > threshold):
            
            classes.append(1)
            
        else:
            
            classes.append(0)

    classes = np.array(classes)


    return classes

In [None]:
classes = rae_detect_outliers(data, rae, 784)

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np
# sklearn.metrics.confusion_matrix(lables, 1-classes, labels=None, sample_weight=None, normalize=None)
true_labels = 1 - labels
predicted_labels = classes

cm = confusion_matrix(true_labels, predicted_labels)

# Normalize the confusion matrix to get percentages
cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

print("Confusion Matrix:")
print(cm)

print("Confusion Matrix (Percentage):")
print(cm_percentage)

# Fashion TP

In [11]:
data, lables = csv_data_loader("fashion-TB")

In [12]:
data.shape

(7100, 784)

In [5]:
vae, history = train_VAE(data, num_dims=784, hidden_layer_n=[512, 256, 128] )



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [7]:
sample = data[-1, :].reshape([1, 784])

z_mean, z_log_var, z = vae.encoder(sample.astype('float32'))
reconstruction = vae.decoder(z)

reconstruction_loss = tf.keras.losses.MeanSquaredError()(sample.astype('float32'),reconstruction)
reconstruction_loss.numpy()  

6847.4556

In [17]:
classes = vae_detect_outliers(data, vae, 784)

In [18]:
from sklearn.metrics import confusion_matrix
import numpy as np
# sklearn.metrics.confusion_matrix(lables, 1-classes, labels=None, sample_weight=None, normalize=None)
true_labels = 1 - lables
predicted_labels = classes

cm = confusion_matrix(true_labels, predicted_labels)

# Normalize the confusion matrix to get percentages
cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

print("Confusion Matrix:")
print(cm)

print("Confusion Matrix (Percentage):")
print(cm_percentage)

Confusion Matrix:
[[6725  275]
 [  69   31]]
Confusion Matrix (Percentage):
[[0.96071429 0.03928571]
 [0.69       0.31      ]]


In [19]:
from sklearn.metrics import precision_score, recall_score, f1_score

true_labels = 1- lables
predicted_labels = classes

precision = precision_score(true_labels, predicted_labels)

# Calculate recall
recall = recall_score(true_labels, predicted_labels)

# Calculate F1 score
f1 = f1_score(true_labels, predicted_labels)

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Precision: 0.10130718954248366
Recall: 0.31
F1 Score: 0.15270935960591134


In [21]:
rae, history = train_RAE(data, num_dims=784, hidden_layer_n=[512, 256, 128], z_loss_w = 0.01)



Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

In [23]:
classes = rae_detect_outliers(data, rae, 784)

KeyboardInterrupt: 