# Defect Binary Classification
![System Test Engineering Logo](../figures/logo_fhj_stm.jpg)<br>
---

**A Component my System Test Engineering Master's Thesis**  
**FH JOANNEUM - University of Applied Sciences**

---

**Author:** Luis Kraker  
**Supervisor:** DDr. Gudrun Schappacher-Tilp  
**Date:** 28<sup>th</sup> March, 2024    

---

## Setup

#### Imports

In [None]:
import importlib
import os

import tensorflow as tf

import source.load_raw_data.kaggle_dataset as kaggle_dataset
from source.model.helpers.image_classifier_visualizer import ImageClassifierVisualizer
from source.model.helpers.image_classifiers_trainer import ImageClassifiersTrainer
from source.image_preprocessing.image_preprocessor import ImagePreprocessor
import source.image_preprocessing.preprocessing_steps as steps
from source.load_raw_data.save_images_from_tf_dataset import save_images_from_tf_dataset

In [2]:
OUTPUT_DIR = os.path.join(os.path.curdir,  '..', '..', '..', '..', 'outputs', 'defect_binary_classification')
IMAGES_DIR = os.path.join(OUTPUT_DIR, 'images')
RESULTS_DIR = os.path.join(OUTPUT_DIR, 'results')

#### Load Dataset 

In [None]:
no_defect_dataset = kaggle_dataset.get_tf_dataset_with_category_zero()
defect_datasets = kaggle_dataset.get_tf_datasets_for_each_category()


#### Count Elements

In [None]:
def count_elements_in_datasets(datasets):
    for category, dataset in datasets.items():
        count = 0
        for _ in dataset:
            count += 1
        print(f"Category: {category}, Number of Images: {count}")

In [12]:
count_elements_in_datasets(defect_datasets)
count_elements_in_datasets({"NO DEFECT": no_defect_dataset})

Category: MISSING_HOLE, Number of Images: 115
Category: MOUSE_BITE, Number of Images: 115
Category: OPEN_CIRCUIT, Number of Images: 116
Category: SHORT, Number of Images: 116
Category: SPUR, Number of Images: 115
Category: SPURIOUS_COPPER, Number of Images: 116
Category: NO DEFECT, Number of Images: 10


#### Upsample in 'NO_DEFECT' dataset

In [None]:
no_defect_dataset_repeated = no_defect_dataset.repeat(11)
no_defect_dataset_partial = no_defect_dataset.take(5)
no_defect_dataset_upsampled = no_defect_dataset_repeated.concatenate(no_defect_dataset_partial)

In [14]:
count_elements_in_datasets({"NO DEFECT": no_defect_dataset_upsampled})  

Category: NO DEFECT, Number of Images: 115


#### Concatenate no_defect_dataset_upsampled with all defect_dataset

In [None]:
concatenated_datasets = {}
for category, dataset in defect_datasets.items():
    concatenated_datasets[category] = dataset.concatenate(no_defect_dataset_upsampled)

#### Shuffle the Datasets

In [None]:
tf.random.set_seed(42)
for category, dataset in concatenated_datasets.items():
    concatenated_datasets[category] = dataset.shuffle(1000)

#### Binary Labeling

In [None]:
def map_label_to_binary(image, label):
    return image, tf.where(label == 0, 0, 1)

binary_labeled_datasets = {}
for category, dataset in concatenated_datasets.items():
    binary_labeled_datasets[category] = dataset.map(map_label_to_binary)

In [19]:
count_elements_in_datasets(binary_labeled_datasets)

Category: MISSING_HOLE, Number of Images: 230
Category: MOUSE_BITE, Number of Images: 230
Category: OPEN_CIRCUIT, Number of Images: 231
Category: SHORT, Number of Images: 231
Category: SPUR, Number of Images: 230
Category: SPURIOUS_COPPER, Number of Images: 231


## Preprocessing

#### Print Dimensions Statistics

In [None]:
def print_dimensions_statistics(datasets):
    for category, dataset in datasets.items():
        total_images = 0
        sum_dims = tf.constant([0, 0], dtype=tf.int32)

        for image, _ in dataset:
            img_shape = tf.shape(image)
            sum_dims += img_shape[:2]
            total_images += 1

        mean_dims = sum_dims / total_images

        sum_squared_diff = tf.constant([0, 0], dtype=tf.float32)
        for image, _ in dataset:
            img_shape = tf.shape(image)
            squared_diff = tf.square(tf.cast(img_shape[:2], tf.float32) - tf.cast(mean_dims, tf.float32))
            sum_squared_diff += squared_diff

        stddev_dims = tf.sqrt(sum_squared_diff / total_images)
        aspect_ratio = mean_dims[0] / mean_dims[1]

        print(f"Category: {category}, Mean: {mean_dims.numpy()}, STDDev: {stddev_dims.numpy()}, Aspect Ratio: {aspect_ratio}")

print_dimensions_statistics(binary_labeled_datasets)

#### Preprocess Images

In [9]:
tf.random.set_seed(42)

preprocessor = ImagePreprocessor()

pipeline = [
    steps.ShapeResizer(desired_shape=(20, 26)),
    steps.RandomRotator(angle_range=(-3,3)),
    steps.RandomFlipper(flip_direction='horizontal'),
    steps.RandomFlipper(flip_direction='vertical'),
]

preprocessor.set_pipe(pipeline)
processed_datasets = {}
for category, dataset in binary_labeled_datasets.items():
    processed_datasets[category] = preprocessor.process(dataset)


In [None]:
# Output images
for category, dataset in processed_datasets.items():
    save_images_from_tf_dataset(dataset, os.path.join(IMAGES_DIR, category), max_images=5)

#### Visualization

In [None]:
from source.load_raw_data.save_images_from_tf_dataset import save_images_from_tf_dataset

save_images_from_tf_dataset(processed_datasets['NO_DEFECT'].take(15), os.path.join(IMAGES_DIR, 'no_defect'))

In [None]:
from source.utils.pcb_visualization import PCBVisualizerforTF as PCBVisualizer
from source.load_raw_data.unpack_tf_dataset import unpack_tf_dataset

no_defect_dataset = unpack_tf_dataset(processed_datasets['NO_DEFECT'])[0]
visualizer = PCBVisualizer()
visualizer.plot_images(no_defect_dataset)

In [None]:
visualizer = ImageClassifierVisualizer(categories)
visualizer.plot_images(processed_datasets['NO_DEFECT'], n_cols=3, n_rows=3)

#### Split Datasets

In [10]:
train_datasets = {}
validation_datasets = {}
split_factor = 0.7

for category, dataset in processed_datasets.items(): 
    dataset_length = dataset.reduce(0, lambda x, _: x + 1).numpy()
    train_length = int(dataset_length * split_factor)
    train_datasets[category] = dataset.take(train_length)
    validation_datasets[category] = dataset.skip(train_length)

print("Successfully split the datasets")

for category, dataset in train_datasets.items():
    print(f"Category: {category}, Train Size: {dataset.reduce(0, lambda x, _: x + 1).numpy()}")
    break

Successfully split the datasets
Category: MISSING_HOLE, Train Size: 161


#### Batch Datasets

In [None]:
batch_size = 32
train_datasets = {category: dataset.batch(batch_size) for category, dataset in train_datasets.items()}
validation_datasets = {category: dataset.batch(batch_size) for category, dataset in validation_datasets.items()}

## Test Model


In [12]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(20, 26, 3)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(
    train_datasets['MISSING_HOLE'],
    validation_data=validation_datasets['MISSING_HOLE'],
    epochs=5,
    verbose=2
)

In [None]:
import source.model.helpers.image_classifiers_trainer as icv
importlib.reload(icv)

category_names = ['DEFECT', 'NO_DEFECT']
group_names = list(binary_labeled_datasets.keys())

trainer = icv.ImageClassifiersTrainer(category_names=category_names, group_names=group_names)
trainer.load_model(model)
trainer.fit_all(binary_labeled_datasets, epochs=5, verbose=2)


In [None]:
figure = trainer.plot_histories()