# This is the notebook for optimizing the model for categorizing our Lego bricks. Categories will be defined by Tom Alphin's Lego Brick Labels (v39).

In [9]:
#Import packages
import os  
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import math
import random
import shutil
import importlib

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import inspect
from tqdm import tqdm
import pandas as pd
from tabulate import tabulate

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.utils.class_weight import compute_class_weight
import mlHelper as mlHelper

importlib.reload(mlHelper)

<module 'mlHelper' from '/home/billiam/Documents/Repos/Lego-Brick-Sorter/mlHelper.py'>

In [2]:
print(tf.reduce_sum(tf.random.normal([1000, 1000])))
print(tf.config.list_physical_devices('GPU'))
HOME_DIR = "/home/billiam/Documents/Repos/Lego-Brick-Sorter/"
os.chdir(HOME_DIR)
print(os.getcwd())

tf.Tensor(-2201.3362, shape=(), dtype=float32)
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
/home/billiam/Documents/Repos/Lego-Brick-Sorter


# Comparing Transfer Learning Models

We will be using transfer learning techniques on Keras pre-trained models to create our categorization model. We will first need to test out all of the different Keras Applications and discover which one will best work for our needs.



**Training Dataset:**

~~We will be using the 447 class training data set referenced in this paper:https://www.iccs-meeting.org/archive/iccs2022/papers/133520608.pdf~~

We will be using our synthetic data set with 4 currently generated classes (1000 imgs each)


**Testing:**

~~We will be selecting 20 random classes of Lego brick to train our models on. Models will then be compared with each other to determine what will best work for our purposes~~

These 4 classes have matching real world data that can be used as a true concrete test of accuracy

**Data Augmentation:**

Data will be augmented in order to create a more robust model:

* Horizontal flip
* Vertical flip
* 360 range of rotation
* 50.0 range of random channel shift

In [3]:
# Create a dictionary of models we will test
model_dict = {}
model_dict["VGG16"] = tf.keras.applications.VGG16
model_dict["VGG19"] = tf.keras.applications.VGG19
model_dict["ResNet101V2"] = tf.keras.applications.ResNet101V2
# model_dict["ResNet152"] = tf.keras.applications.ResNet152
model_dict["ResNet152V2"] = tf.keras.applications.ResNet152V2
model_dict["ResNet50"] = tf.keras.applications.ResNet50
model_dict["ResNet50V2"] = tf.keras.applications.ResNet50V2
model_dict["InceptionResNetV2"] = tf.keras.applications.InceptionResNetV2
model_dict["InceptionV3"] = tf.keras.applications.InceptionV3
model_dict["MobileNet"] = tf.keras.applications.MobileNet
model_dict["MobileNetV2"] = tf.keras.applications.MobileNetV2
# model_dict["MobileNetV3"] = tf.keras.applications.MobileNetV3 <- weird exceptions
model_dict["Xception"] = tf.keras.applications.Xception
model_dict["DenseNet121"] = tf.keras.applications.DenseNet121
model_dict["DenseNet169"] = tf.keras.applications.DenseNet169
model_dict["DenseNet201"] = tf.keras.applications.DenseNet201
model_dict["EfficientNetV2S"] = tf.keras.applications.EfficientNetV2S
model_dict["EfficientNetV2M"] = tf.keras.applications.EfficientNetV2M
model_dict["EfficientNetV2L"] = tf.keras.applications.EfficientNetV2L




In [4]:
# Load in our preprocessed models into a dictionary
preprocessed_models = {}
for model_name, model in tqdm(model_dict.items()):
    print("Processing: " + model_name)
    preprocessed_models[model_name] = model(include_top=False)

  0%|                                                                                       | 0/17 [00:00<?, ?it/s]

Processing: VGG16


  6%|████▋                                                                          | 1/17 [00:00<00:03,  4.56it/s]

Processing: VGG19


 12%|█████████▎                                                                     | 2/17 [00:00<00:03,  4.33it/s]

Processing: ResNet101V2


 18%|█████████████▉                                                                 | 3/17 [00:02<00:12,  1.12it/s]

Processing: ResNet152V2


 24%|██████████████████▌                                                            | 4/17 [00:04<00:19,  1.47s/it]

Processing: ResNet50


 29%|███████████████████████▏                                                       | 5/17 [00:05<00:15,  1.26s/it]

Processing: ResNet50V2


 35%|███████████████████████████▉                                                   | 6/17 [00:06<00:12,  1.12s/it]

Processing: InceptionResNetV2


 41%|████████████████████████████████▌                                              | 7/17 [00:09<00:17,  1.78s/it]

Processing: InceptionV3


 47%|█████████████████████████████████████▏                                         | 8/17 [00:10<00:14,  1.61s/it]

Processing: MobileNet


 53%|█████████████████████████████████████████▊                                     | 9/17 [00:10<00:09,  1.22s/it]

Processing: MobileNetV2


 59%|█████████████████████████████████████████████▉                                | 10/17 [00:11<00:07,  1.05s/it]

Processing: Xception


 65%|██████████████████████████████████████████████████▍                           | 11/17 [00:12<00:06,  1.02s/it]

Processing: DenseNet121


 71%|███████████████████████████████████████████████████████                       | 12/17 [00:14<00:06,  1.21s/it]

Processing: DenseNet169


 76%|███████████████████████████████████████████████████████████▋                  | 13/17 [00:16<00:05,  1.50s/it]

Processing: DenseNet201


 82%|████████████████████████████████████████████████████████████████▏             | 14/17 [00:19<00:05,  1.92s/it]

Processing: EfficientNetV2S


 88%|████████████████████████████████████████████████████████████████████▊         | 15/17 [00:21<00:04,  2.01s/it]

Processing: EfficientNetV2M


 94%|█████████████████████████████████████████████████████████████████████████▍    | 16/17 [00:24<00:02,  2.42s/it]

Processing: EfficientNetV2L


100%|██████████████████████████████████████████████████████████████████████████████| 17/17 [00:29<00:00,  1.75s/it]


In [5]:
# Process our models
# (delete the last layer, make all remaining layers untrainable, and add our own trainable layer)
names = ["3003", "3004", "3021", "6091"]
NUM_CLASSES = len(names)

processed_models = {}
for name, pre_model in preprocessed_models.items():
    print("processing: " + name)
    # Create our empty model (look up sequential vs functional)
    model = keras.models.Sequential()
    
    # Add all layers from our pre-trained model (last layer already deleted from include_top=False)
    model.add(pre_model)

    # Make all remaining layers untrainable and add our last trainable layer
    for layer in model.layers:
        layer.trainable = False
        
    # Add our layers
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(1024, activation='relu')) # Dense layer for combining features model recognized in img
    model.add(layers.Dropout(0.5)) # Dropout layer which may prevent overfitting and improve generalization ability (test-set)
    model.add(layers.Dense(NUM_CLASSES, activation='softmax')) # Final classification layer equal to number of classes
    
    # Loss and optimizer functions
    loss = keras.losses.CategoricalCrossentropy()
    optim = keras.optimizers.Adam(learning_rate=0.001)
    
    # Might try mean average precision for metric because we are categorizing so many classes
    metrics = ["categorical_accuracy"]
    
    # Used to do accuracy but someone online recommended MAP
    # https://www.reddit.com/r/learnmachinelearning/comments/xpyv8j/data_set_for_lego_image_classification_800000/
    # metrics = ["accuracy"]

    # Compile our model
    model.compile(optimizer=optim, loss=loss, metrics=metrics)
    
    # Add model to our dict
    processed_models[name] = model

processing: VGG16
processing: VGG19
processing: ResNet101V2
processing: ResNet152V2
processing: ResNet50
processing: ResNet50V2
processing: InceptionResNetV2
processing: InceptionV3
processing: MobileNet
processing: MobileNetV2
processing: Xception
processing: DenseNet121
processing: DenseNet169
processing: DenseNet201
processing: EfficientNetV2S
processing: EfficientNetV2M
processing: EfficientNetV2L


In [6]:
# Generate tensor image data batches with model specific preprocessing
BASE_DIR = 'data/fakeData/'


tf.random.set_seed(1)

ppDict = {}
ppDict["VGG16"] = tf.keras.applications.vgg16.preprocess_input
ppDict["VGG19"] = tf.keras.applications.vgg19.preprocess_input
ppDict["ResNet101V2"] = tf.keras.applications.resnet_v2.preprocess_input
ppDict["ResNet152V2"] = tf.keras.applications.resnet_v2.preprocess_input
ppDict["ResNet50"] = tf.keras.applications.resnet50.preprocess_input
ppDict["ResNet50V2"] = tf.keras.applications.resnet_v2.preprocess_input
ppDict["InceptionResNetV2"] = tf.keras.applications.inception_resnet_v2.preprocess_input
ppDict["InceptionV3"] = tf.keras.applications.inception_v3.preprocess_input
ppDict["MobileNet"] = tf.keras.applications.mobilenet.preprocess_input
ppDict["MobileNetV2"] = tf.keras.applications.mobilenet_v2.preprocess_input
ppDict["Xception"] = tf.keras.applications.xception.preprocess_input
ppDict["DenseNet121"] = tf.keras.applications.densenet.preprocess_input
ppDict["DenseNet169"] = tf.keras.applications.densenet.preprocess_input
ppDict["DenseNet201"] = tf.keras.applications.densenet.preprocess_input
ppDict["EfficientNetV2S"] = tf.keras.applications.efficientnet_v2.preprocess_input
ppDict["EfficientNetV2M"] = tf.keras.applications.efficientnet_v2.preprocess_input
ppDict["EfficientNetV2L"] = tf.keras.applications.efficientnet_v2.preprocess_input

fitModels = {}

allMetrics = []

numIteration = 3

for name, model in processed_models.items():
    
    modelMetric = [name]
    
    for iteration in range(numIteration):
        train_gen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=ppDict[name],
                                                                horizontal_flip = True,
                                                                vertical_flip = True,
                                                                rotation_range = 360,
                                                                channel_shift_range = 50.0,
                                                                fill_mode = 'wrap')
        valid_gen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=ppDict[name])
        test_gen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=ppDict[name])

        train_batches = train_gen.flow_from_directory(
            BASE_DIR + 'train',
            target_size=(224, 224),
            batch_size=4,
            shuffle=True,
            color_mode="rgb",
            classes=names   
        )

        val_batches = valid_gen.flow_from_directory(
            BASE_DIR + 'val',
            target_size=(224, 224),
            batch_size=4,
            shuffle=True,
            color_mode="rgb",
            classes=names
        )

        test_batches = test_gen.flow_from_directory(
            BASE_DIR + 'test',
            target_size=(224, 224),
            batch_size=4,
            shuffle=False,
            color_mode="rgb",
            classes=names
        )

        epochs = 25

        early_stopping = keras.callbacks.EarlyStopping(
            monitor="val_loss",
            patience=5,
            verbose=2
        )

        print("Start training of: " + name)
        #model.summary()
        model.fit(train_batches, validation_data=val_batches,
                  callbacks=[early_stopping],
                  epochs=epochs, verbose=0)
        print("Performance of" + name + ": ")
        results = model.evaluate(test_batches, verbose=2)
    
    
        modelMetric.append(results[1])
    
    avg = np.mean(modelMetric[1:])
    modelMetric.append(avg)
    
    allMetrics.append(modelMetric)

    
columns = ["name"]    
for num in range(numIteration):
    columns.append('categorical_accuracy_it_' + str(num+1))
columns.append('avg')

df_results = pd.DataFrame(allMetrics, 
                          columns = columns)
df_results.sort_values(by='avg', ascending=False, inplace=True)
df_results.reset_index(inplace=True,drop=True)
df_results

Found 3000 images belonging to 4 classes.
Found 1000 images belonging to 4 classes.
Found 1401 images belonging to 4 classes.
Start training of: VGG16
Epoch 8: early stopping
Performance ofVGG16: 
351/351 - 7s - loss: 1.0302 - categorical_accuracy: 0.7380 - 7s/epoch - 19ms/step
Found 3000 images belonging to 4 classes.
Found 1000 images belonging to 4 classes.
Found 1401 images belonging to 4 classes.
Start training of: VGG16
Epoch 7: early stopping
Performance ofVGG16: 
351/351 - 6s - loss: 1.2795 - categorical_accuracy: 0.7509 - 6s/epoch - 17ms/step
Found 3000 images belonging to 4 classes.
Found 1000 images belonging to 4 classes.
Found 1401 images belonging to 4 classes.
Start training of: VGG16
Epoch 9: early stopping
Performance ofVGG16: 
351/351 - 6s - loss: 0.9152 - categorical_accuracy: 0.8166 - 6s/epoch - 17ms/step
Found 3000 images belonging to 4 classes.
Found 1000 images belonging to 4 classes.
Found 1401 images belonging to 4 classes.
Start training of: VGG19
Epoch 14: ea

Start training of: MobileNetV2
Epoch 7: early stopping
Performance ofMobileNetV2: 
351/351 - 2s - loss: 1.0092 - categorical_accuracy: 0.7016 - 2s/epoch - 5ms/step
Found 3000 images belonging to 4 classes.
Found 1000 images belonging to 4 classes.
Found 1401 images belonging to 4 classes.
Start training of: MobileNetV2
Epoch 16: early stopping
Performance ofMobileNetV2: 
351/351 - 2s - loss: 1.6122 - categorical_accuracy: 0.6296 - 2s/epoch - 5ms/step
Found 3000 images belonging to 4 classes.
Found 1000 images belonging to 4 classes.
Found 1401 images belonging to 4 classes.
Start training of: Xception
Epoch 18: early stopping
Performance ofXception: 
351/351 - 4s - loss: 0.8519 - categorical_accuracy: 0.7031 - 4s/epoch - 11ms/step
Found 3000 images belonging to 4 classes.
Found 1000 images belonging to 4 classes.
Found 1401 images belonging to 4 classes.
Start training of: Xception
Epoch 13: early stopping
Performance ofXception: 
351/351 - 4s - loss: 0.9524 - categorical_accuracy: 0.6

Unnamed: 0,name,categorical_accuracy_it_1,categorical_accuracy_it_2,categorical_accuracy_it_3,avg
0,ResNet50,0.79015,0.79586,0.8601,0.81537
1,EfficientNetV2S,0.788009,0.8601,0.783726,0.810611
2,ResNet50V2,0.824411,0.697359,0.809422,0.777064
3,VGG16,0.738044,0.750892,0.81656,0.768499
4,ResNet101V2,0.829408,0.698073,0.770878,0.766119
5,EfficientNetV2L,0.704497,0.794433,0.793719,0.764216
6,MobileNet,0.895789,0.589579,0.756602,0.747323
7,EfficientNetV2M,0.820842,0.685225,0.679515,0.728527
8,VGG19,0.738758,0.658101,0.780157,0.725672
9,InceptionV3,0.673804,0.716631,0.778729,0.723055


<br></br>
**Results:**

In [16]:
preResults = df_results
preResults

Unnamed: 0,name,categorical_accuracy_it_1,categorical_accuracy_it_2,categorical_accuracy_it_3,avg
0,ResNet50,0.79015,0.79586,0.8601,0.81537
1,EfficientNetV2S,0.788009,0.8601,0.783726,0.810611
2,ResNet50V2,0.824411,0.697359,0.809422,0.777064
3,VGG16,0.738044,0.750892,0.81656,0.768499
4,ResNet101V2,0.829408,0.698073,0.770878,0.766119
5,EfficientNetV2L,0.704497,0.794433,0.793719,0.764216
6,MobileNet,0.895789,0.589579,0.756602,0.747323
7,EfficientNetV2M,0.820842,0.685225,0.679515,0.728527
8,VGG19,0.738758,0.658101,0.780157,0.725672
9,InceptionV3,0.673804,0.716631,0.778729,0.723055


From our model, we can see that our top 5 performers without fine-tuning are:

1. ResNet50
2. EfficientNetV2S
3. ResNet50V2
4. VGG16
5. ResNet101V2

From these, we can see lots of ResNet combined with EfficientNetV2S and VGG16 (expected top contender). Going forward, we will be comparing ResNet50 and EfficientNetV2S with fine-tuning testing to see which one will perform better.

<br></br>
# Fine Tuning Testing

Comparing ResNet50 and EfficientNetV2S by fine-tuning on our real-world data set.

Since this data set is unbalanced, we will be using class weights.

In [15]:
# ResNet50 Fine-Tuning Layers: conv4 and conv5
# EfficientNetV2S Fine-Tuning Layers: block5 and block 6

preprocessed_models["ResNet50"].trainable = True
for layer in preprocessed_models["ResNet50"].layers:
    trainable = ('conv4' in layer.name or 'conv5' in layer.name)
    layer.trainable = trainable

preprocessed_models["EfficientNetV2S"].trainable = True
for layer in preprocessed_models["EfficientNetV2S"].layers:
    trainable = ('block5' in layer.name or 'block6' in layer.name)
    layer.trainable = trainable
    
# Loss and optimizer functions
loss_fine = keras.losses.CategoricalCrossentropy()
optim_fine = keras.optimizers.Adam(learning_rate=0.00001) #Lower learning rate since fine tuning
    
#Metrics:
metrics_fine = ['categorical_accuracy']

processed_models["ResNet50"].compile(optimizer=optim_fine, loss=loss_fine, metrics=metrics_fine)
processed_models["EfficientNetV2S"].compile(optimizer=optim_fine, loss=loss_fine, metrics=metrics_fine)

In [21]:
# Generate tensor image data batches with model specific preprocessing
BASE_DIR = 'data/realData/'

models = ["ResNet50", "EfficientNetV2S"]

tf.random.set_seed(1)

ppDict = {}
ppDict["ResNet50"] = tf.keras.applications.resnet50.preprocess_input
ppDict["EfficientNetV2S"] = tf.keras.applications.efficientnet_v2.preprocess_input

fitModels = {}

allMetrics = []

numIteration = 3

for name, model in processed_models.items():
    if name in models:

        modelMetric = [name]

        for iteration in range(numIteration):
            train_gen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=ppDict[name],
                                                                    horizontal_flip = True,
                                                                    vertical_flip = True,
                                                                    rotation_range = 360,
                                                                    channel_shift_range = 50.0,
                                                                    fill_mode = 'wrap')
            valid_gen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=ppDict[name])
            test_gen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=ppDict[name])

            train_batches = train_gen.flow_from_directory(
                BASE_DIR + 'train',
                target_size=(224, 224),
                batch_size=4,
                shuffle=True,
                color_mode="rgb",
                classes=names   
            )

            val_batches = valid_gen.flow_from_directory(
                BASE_DIR + 'val',
                target_size=(224, 224),
                batch_size=4,
                shuffle=True,
                color_mode="rgb",
                classes=names
            )

            test_batches = test_gen.flow_from_directory(
                BASE_DIR + 'test',
                target_size=(224, 224),
                batch_size=4,
                shuffle=False,
                color_mode="rgb",
                classes=names
            )

            epochs = 25

            early_stopping = keras.callbacks.EarlyStopping(
                monitor="val_loss",
                patience=5,
                verbose=2
            )

            #Compute Weights:
            class_weight = compute_class_weight(class_weight='balanced',
                                        classes=np.unique(train_batches.classes),
                                        y=train_batches.classes)
            class_weight = {i : class_weight[i] for i in range(len(names))}


            print("Start fine-tuning of: " + name)

            history_fine = model.fit(train_batches,
                             validation_data=val_batches,
                             callbacks=[early_stopping],
                             epochs=epochs,
                             verbose=0,
                             shuffle=True,
                            class_weight=class_weight)

            print("Performance of" + name + ": ")
            results = model.evaluate(test_batches, verbose=2)


            modelMetric.append(results[1])

        avg = np.mean(modelMetric[1:])
        modelMetric.append(avg)

        allMetrics.append(modelMetric)

    
columns = ["name"]    
for num in range(numIteration):
    columns.append('categorical_accuracy_it_' + str(num+1))
columns.append('avg')

df_results = pd.DataFrame(allMetrics, 
                          columns = columns)
df_results.sort_values(by='avg', ascending=False, inplace=True)
df_results.reset_index(inplace=True,drop=True)
df_results

Found 841 images belonging to 4 classes.
Found 351 images belonging to 4 classes.
Found 209 images belonging to 4 classes.
Start fine-tuning of: ResNet50
Epoch 23: early stopping
Performance ofResNet50: 
53/53 - 1s - loss: 0.2177 - categorical_accuracy: 0.9378 - 769ms/epoch - 15ms/step
Found 841 images belonging to 4 classes.
Found 351 images belonging to 4 classes.
Found 209 images belonging to 4 classes.
Start fine-tuning of: ResNet50
Epoch 8: early stopping
Performance ofResNet50: 
53/53 - 1s - loss: 0.1615 - categorical_accuracy: 0.9569 - 780ms/epoch - 15ms/step
Found 841 images belonging to 4 classes.
Found 351 images belonging to 4 classes.
Found 209 images belonging to 4 classes.
Start fine-tuning of: ResNet50
Epoch 6: early stopping
Performance ofResNet50: 
53/53 - 1s - loss: 0.2415 - categorical_accuracy: 0.9569 - 758ms/epoch - 14ms/step
Found 841 images belonging to 4 classes.
Found 351 images belonging to 4 classes.
Found 209 images belonging to 4 classes.
Start fine-tuning 

Unnamed: 0,name,categorical_accuracy_it_1,categorical_accuracy_it_2,categorical_accuracy_it_3,avg
0,ResNet50,0.937799,0.956938,0.956938,0.950558
1,EfficientNetV2S,0.894737,0.961722,0.961722,0.939394


In [22]:
final_results = df_results
final_results

Unnamed: 0,name,categorical_accuracy_it_1,categorical_accuracy_it_2,categorical_accuracy_it_3,avg
0,ResNet50,0.937799,0.956938,0.956938,0.950558
1,EfficientNetV2S,0.894737,0.961722,0.961722,0.939394


# Final Results: ResNet50 WILL BE OUR MODEL!!!!