In [1]:
# https://www.tensorflow.org/tutorials/generative/autoencoder
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model

import glob
import os
from PIL import Image
import io
from contextlib import redirect_stdout
from log_training import TrainingLogger

In [5]:
# Prepare image data 
def img_to_np(path, resize = True, extract_labels=False):  
    img_array = []
    labels = []
    fpaths = glob.glob(path, recursive=True)
    for fname in fpaths:
        if(extract_labels): 
            if '_bad' in os.path.basename(fname):
                labels.append(1)  # 1 for outlier
            else:
                labels.append(0)  # 0 for non-outlier
        img = Image.open(fname).convert("L") # Grayscale when using "RGB" you have to change the encoder and decoder 
        if(resize): img = img.resize((64,64))
        img_array.append(np.asarray(img))
    images = np.array(img_array)
    if(extract_labels): return images, np.array(labels)
    return images

path_train = r'C:\Users\oswal\Desktop\ImageClassification\elpv-dataset\train_without_bad_images\**\*'
path_test = r'C:\Users\oswal\Desktop\ImageClassification\elpv-dataset\test_images\**\*'
path_treshold = r'C:\Users\oswal\Desktop\ImageClassification\elpv-dataset\train_without_good_images\**\*'

train = img_to_np(path_train)
test, test_labels = img_to_np(path_test, extract_labels=True)
set_bad_threshold = img_to_np(path_treshold)
train = train.astype('float32') / 255.0
test = test.astype('float32') / 255.0
set_bad_threshold  = set_bad_threshold.astype('float32') / 255.0
# Reshape to include the channel dimension -> needed with grayscale conversion
train = np.expand_dims(train, axis=-1)
test = np.expand_dims(test, axis=-1)
set_bad_threshold = np.expand_dims(set_bad_threshold, axis=-1)

In [3]:
EPOCHS=10
LEARNING_RATE=1e-4
BATCH_SIZE=32
OPTIMIZER='adam'

In [6]:
# Custom Autoencoder model
class Autoencoder(tf.keras.Model):
    def __init__(self, encoder, decoder):
        super(Autoencoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    # For logging
    def summary(self):
        return self.encoder.summary(), self.decoder.summary()

# Model parameters
encoding_dim = 1024
dense_dim = [8, 8, 128]

# Define the encoder
encoder = tf.keras.Sequential([
    layers.Input(shape=(64, 64, 1)),  # 64x64 pixels with one color channel (gray)
     # extracts 64 feature maps using 4x4 filters | output dimensions are reduced by half due to the stride of 2.
    layers.Conv2D(64, 4, strides=2, padding='same', activation='relu'),# (64, 64, 1) -> (32, 32, 64)
    layers.Conv2D(128, 4, strides=2, padding='same', activation='relu'), # extracts 64 feature maps using 4x4 filters (32, 32, 64) -> (16, 16, 128)
    layers.Conv2D(512, 4, strides=2, padding='same', activation='relu'), # (16, 16, 128) -> (8, 8, 512)
    layers.Flatten(), # flattens the 3D tensor (8, 8, 512) into a 1D vector of size 32768
    layers.Dense(encoding_dim) # fully connected layer reduces the flattened vector to a 1D vector of size 1024
])

# Define the decoder
decoder = tf.keras.Sequential([
    layers.Input(shape=(encoding_dim,)),
    layers.Dense(np.prod(dense_dim)), # takes 1024-dimensional vector and maps it to size 8192, which corresponds to the flattened form of the next target shape (8, 8, 128)
    layers.Reshape(target_shape=dense_dim), # reshapes 8192 into (8, 8, 128)
    layers.Conv2DTranspose(256, 4, strides=2, padding='same', activation='relu'), # deconvolution -> increases the depth to 256 feature maps (8, 8, 128) -> (16, 16, 256)
    layers.Conv2DTranspose(64, 4, strides=2, padding='same', activation='relu'), # (16, 16, 256) -> (32, 32, 64)
    layers.Conv2DTranspose(1, 4, strides=2, padding='same', activation='sigmoid')  # (32, 32, 64) -> (64, 64, 1)
])

# Create the Autoencoder model
autoencoder = Autoencoder(encoder, decoder)
# loss function is Mean Squared Error(MSE)
autoencoder.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE), loss='mse')

# Train the model
# not shure if validation_split is best but validation_data I dont understand completely 
history = autoencoder.fit(train, train, epochs=EPOCHS, batch_size=BATCH_SIZE, shuffle=True, validation_split=0.2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [7]:
# Compute the reconstruction error for the test set
reconstructions = autoencoder.predict(test)
reconstruction_errors = np.mean(np.square(test - reconstructions), axis=(1, 2, 3))

# Set a threshold for classifying outliers 
# 90 means 90% are classified as inliners | because test set is 50% outliers 50 makes sense I think
# maybe make testset with only outliers and set a threshold of 95 for example 
threshold = np.percentile(reconstruction_errors, 50)  

# Predict whether each test instance is an outlier
predicted_labels = (reconstruction_errors > threshold).astype(int)

# Calculate accuracy
accuracy = accuracy_score(test_labels, predicted_labels)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 60.71%


In [10]:
# Log all the data and store it
logger = TrainingLogger(epochs=EPOCHS, learning_rate=LEARNING_RATE, batch_size=BATCH_SIZE, optimizer=OPTIMIZER)
logger.capture_model_summary(autoencoder)
logger.update_train_metrics(val_accuracy=None, val_loss=history.history['val_loss'], accuracy=None, loss=history.history['loss'])
logger.update_test_metrics(accuracy=accuracy, loss=None)
logger.print_and_save_log()


"epochs": 10,
"learning_rate": 0.0001,
"batch_size": 32,
"optimizer": adam,
"train_accuracy": None,
"train_val_accuracy": None,
"train_loss": [0.03285584971308708, 0.014688660390675068, 0.01194201409816742, 0.011253530159592628, 0.010828845202922821, 0.010693410411477089, 0.010759016498923302, 0.010642633773386478, 0.010185929015278816, 0.009922865778207779],
"train_val_loss": [0.010713508352637291, 0.006333703175187111, 0.005967853125184774, 0.006221625488251448, 0.006382073741406202, 0.00719117047265172, 0.006908770184963942, 0.006842107977718115, 0.007095559500157833, 0.0061037871055305],
"test_accuracy": 0.6071428571428571,
"test_loss": None,
"timestamp": 2024-08-28 17:03:37, 
"model_summary": Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 32, 32, 64)        1088      
                                                                 
 conv2