# DVA263 project

# Libraries

Declaring all libraries in use

In [34]:
import autoencoder as ae
import cnn as cnn
import preprocessing as pre
import visualization as viz
import metrics
import numpy as np

from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.models import load_model

# Load Dataset

In [35]:
def load(image_size):
    train_path = "../dataset/Project/train/"
    test_path = "../dataset/Project/test/"
    
    X_train, y_train = pre.get_dataset(train_path, image_size)
    X_test, y_test = pre.get_dataset(test_path, image_size)
    
    return (X_train, y_train, X_test, y_test)

# Prepare Data

In [36]:
def prep_ae(X_train, y_train):
    return ae.preprocess_images(X_train, y_train)

def prep_cnn(cnn_train, cnn_test):
    """Add noise images to train and test set and normalize"""
    g_noisy_train_data = cnn_train.map(pre.add_gaussian_noise)
    snp_noisy_train_data = cnn_train.map(pre.add_salt_and_pepper_noise)
    combined_train_data = cnn_train.concatenate(g_noisy_train_data).concatenate(snp_noisy_train_data)

    percentage_noisy_images = 0.2
    total_test_images = len(CNN_test) * 16  # Total images in test data
    num_noisy_images = int(total_test_images * percentage_noisy_images)

    g_noisy_test_data = cnn_test.take(num_noisy_images).map(pre.add_gaussian_noise)
    snp_noisy_test_data = cnn_test.take(num_noisy_images).map(pre.add_salt_and_pepper_noise)
    remaining_test_data = cnn_test.skip(num_noisy_images)

    combined_test_data = g_noisy_test_data.concatenate(remaining_test_data)
    combined_test_data = combined_test_data.concatenate(snp_noisy_test_data)

    test_data_norm, train_data_norm = pre.normalize_dataset(combined_train_data, combined_test_data)
    return train_data_norm, test_data_norm

# Build and Compile Models

In [37]:
def build_ae(image_size):
    model = ae.build_and_compile(image_size)
    return model
def build_cnn():
    model = cnn.build_cnn_model()
    return model

#  Train Models

In [38]:
def train_ae(model, ae_train):
    reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=3, min_delta=0.0005)
    early_stop = EarlyStopping(monitor="val_loss", patience=5, min_delta=0.0002, restore_best_weights=True)
    callbacks = [reduce_lr, early_stop]

    history = ae.fit_model(model, ae_train, epochs=30, batch_size=32, callbacks=callbacks)
    return history

def train_cnn(model,cnn_train, cnn_test):
    history = cnn.train_model(model, cnn_train, cnn_test)
    return history

# Anomaly detection procedure

In [39]:
def calculate_mse_list(input_images, predicted_images):
    values = []
    count = len(input_images)
    
    for i in range(count):
        temp_mse = metrics.mse(input_images[i], predicted_images[i])
        values.append(temp_mse)
        
    return values
        
def get_true_anomalies(labels):
    labels = np.argmax(labels, axis=1)
    y_true = []
    count = len(labels)
    
    for i in range(count):
        if labels[i] == 3:
            y_true.append(0)
        else: 
            y_true.append(1)
    return y_true

def detect_anomalies(mse_list, threshold):
    y_pred = []
    count = len(mse_list)
    
    for i in range(count):
        if mse_list[i] < threshold:
            y_pred.append(0)
        else:
            y_pred.append(1)
            
    return y_pred

# Plotting Training and Validation Figures

In [40]:
def visualize_training_history(ae_history, cnn_history):
    viz.plot_metrics(ae_history, "Autoencoder", accuracy=False)
    viz.plot_metrics(cnn_history, "CNN", accuracy=True)

# Putting everything together

Main section putting each part together, doing anomaly detection and running the classification task

## Import and preprocess data

### Load images

In [41]:
image_size = (152, 152)

X_train, y_train, X_test, y_test = load(image_size)

ae_X_train, ae_y_train = prep_ae(X_train, y_train)
X_test = pre.normalize_images(X_test)

Found 259 files belonging to 4 classes.
Found 33 files belonging to 4 classes.


### Prepare autoencoder training set and threshold set

In [42]:
normal_train, normal_test, anomaly_test = ae.get_training_and_threshold_set(ae_X_train, ae_y_train)

[3 2 3 ... 3 2 3]


## Build and Train autoencoder

In [43]:
#ae_model = build_ae(image_size)
#ae_model.summary()

In [44]:
#ae_history = train_ae(ae_model, normal_train)
ae_model = load_model("./model.keras")

## Predict on normal and anomalous data to calculate threshold for anomaly detection procedure

In [47]:
normal_predictions = ae_model.predict(normal_test)
anomaly_predictions = ae_model.predict(anomaly_test)

print(normal_test.shape)
normal_mse = calculate_mse_list(normal_test, normal_predictions)
anomaly_mse = calculate_mse_list(anomaly_test, anomaly_predictions)

anomaly_threshold = (max(normal_mse) + min(anomaly_mse)) / 2

print(anomaly_threshold)

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 207ms/step
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 207ms/step
(502, 152, 152, 3)
0.0014573273


## Train CNN model

In [46]:
CNN_train, CNN_test = load_cnn()
CNN_train, CNN_test = prep_cnn(CNN_train, CNN_test)

CNN_model = build_cnn()
CNN_history = train_cnn(CNN_model, CNN_train, CNN_test)
CNN_model.predict(CNN_test)


NameError: name 'load_cnn' is not defined

# Run the machine learning pipeline

In [48]:
y_anomaly_true = get_true_anomalies(y_test)
image_predictions = ae_model.predict(X_test)

mse_list = calculate_mse_list(X_test, image_predictions)
y_anomaly_pred = detect_anomalies(mse_list, anomaly_threshold)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 156ms/step


IndexError: list index out of range