# This is the notebook for optimizing the model for categorizing our Lego bricks. Categories will be defined by Tom Alphin's Lego Brick Labels (v39).

In [1]:
#Import packages
import os  
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import math
import random
import shutil

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import inspect
from tqdm import tqdm

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

2023-02-14 15:05:36.244954: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
print(tf.reduce_sum(tf.random.normal([1000, 1000])))
print(tf.config.list_physical_devices('GPU'))

tf.Tensor(617.16956, shape=(), dtype=float32)
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# Comparing Transfer Learning Models

We will be using transfer learning techniques on Keras pre-trained models to create our categorization model. We will first need to test out all of the different Keras Applications and discover which one will best work for our needs.



**Training Dataset:**

We will be using the 447 class training data set referenced in this paper:https://www.iccs-meeting.org/archive/iccs2022/papers/133520608.pdf

**Testing:**

We will be selecting 20 random classes of Lego brick to train our models on. Models will then be compared with each other to determine what will best work for our purposes

**Data Augmentation:**

In order to save time for finding an optimal model, no data augmentation other than resizing our images (without preserving aspect ratio) to match the input of the models will be utilized.

In [3]:
# Create a dictionary of models we will test
model_dict = {}
model_dict["VGG16"] = tf.keras.applications.VGG16
model_dict["VGG19"] = tf.keras.applications.VGG19
model_dict["ResNet101V2"] = tf.keras.applications.ResNet101V2
# model_dict["ResNet152"] = tf.keras.applications.ResNet152
model_dict["ResNet152V2"] = tf.keras.applications.ResNet152V2
model_dict["ResNet50"] = tf.keras.applications.ResNet50
model_dict["ResNet50V2"] = tf.keras.applications.ResNet50V2
model_dict["InceptionResNetV2"] = tf.keras.applications.InceptionResNetV2
model_dict["InceptionV3"] = tf.keras.applications.InceptionV3
model_dict["MobileNet"] = tf.keras.applications.MobileNet
model_dict["MobileNetV2"] = tf.keras.applications.MobileNetV2
# model_dict["MobileNetV3"] = tf.keras.applications.MobileNetV3 <- weird exceptions
model_dict["Xception"] = tf.keras.applications.Xception
model_dict["DenseNet121"] = tf.keras.applications.DenseNet121
model_dict["DenseNet169"] = tf.keras.applications.DenseNet169
model_dict["DenseNet201"] = tf.keras.applications.DenseNet201
model_dict["EfficientNetV2S"] = tf.keras.applications.EfficientNetV2S
model_dict["EfficientNetV2M"] = tf.keras.applications.EfficientNetV2M
model_dict["EfficientNetV2L"] = tf.keras.applications.EfficientNetV2L




In [4]:
# Load in our preprocessed models into a dictionary
preprocessed_models = {}
for model_name, model in tqdm(model_dict.items()):
    print("Processing: " + model_name)
    preprocessed_models[model_name] = model(include_top=False)

  0%|                                                    | 0/17 [00:00<?, ?it/s]

Processing: VGG16


  6%|██▌                                         | 1/17 [00:00<00:04,  3.84it/s]

Processing: VGG19


 12%|█████▏                                      | 2/17 [00:00<00:04,  3.66it/s]

Processing: ResNet101V2


 18%|███████▊                                    | 3/17 [00:02<00:13,  1.03it/s]

Processing: ResNet152V2


 24%|██████████▎                                 | 4/17 [00:05<00:21,  1.65s/it]

Processing: ResNet50


 29%|████████████▉                               | 5/17 [00:05<00:16,  1.39s/it]

Processing: ResNet50V2


 35%|███████████████▌                            | 6/17 [00:06<00:13,  1.24s/it]

Processing: InceptionResNetV2


 41%|██████████████████                          | 7/17 [00:10<00:19,  1.99s/it]

Processing: InceptionV3


 47%|████████████████████▋                       | 8/17 [00:11<00:16,  1.80s/it]

Processing: MobileNet


 53%|███████████████████████▎                    | 9/17 [00:12<00:10,  1.37s/it]

Processing: MobileNetV2


 59%|█████████████████████████▎                 | 10/17 [00:13<00:08,  1.18s/it]

Processing: Xception


 65%|███████████████████████████▊               | 11/17 [00:14<00:06,  1.12s/it]

Processing: DenseNet121


 71%|██████████████████████████████▎            | 12/17 [00:15<00:06,  1.31s/it]

Processing: DenseNet169


 76%|████████████████████████████████▉          | 13/17 [00:18<00:06,  1.66s/it]

Processing: DenseNet201


 82%|███████████████████████████████████▍       | 14/17 [00:21<00:06,  2.04s/it]

Processing: EfficientNetV2S


 88%|█████████████████████████████████████▉     | 15/17 [00:23<00:04,  2.11s/it]

Processing: EfficientNetV2M


 94%|████████████████████████████████████████▍  | 16/17 [00:26<00:02,  2.51s/it]

Processing: EfficientNetV2L


100%|███████████████████████████████████████████| 17/17 [00:32<00:00,  1.88s/it]


In [12]:
# Process our models
# (delete the last layer, make all remaining layers untrainable, and add our own trainable layer)

NUM_CLASSES = 5

processed_models = {}
for name, pre_model in preprocessed_models.items():
    print("processing: " + name)
    # Create our empty model (look up sequential vs functional)
    model = keras.models.Sequential()
    
    # Add all layers from our pre-trained model (last layer already deleted from include_top=False)
    model.add(pre_model)

    # Make all remaining layers untrainable and add our last trainable layer
    for layer in model.layers:
        layer.trainable = False
        
    # model.summary()
        
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(NUM_CLASSES))
    
    # Loss and optimizer functions
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    optim = keras.optimizers.Adam(learning_rate=0.001)
    
    # Might try mean average precision for metric because we are categorizing so many classes
    metrics = ["accuracy"]
    
    # Used to do accuracy but someone online recommended MAP
    # https://www.reddit.com/r/learnmachinelearning/comments/xpyv8j/data_set_for_lego_image_classification_800000/
    # metrics = ["accuracy"]

    # Compile our model
    model.compile(optimizer=optim, loss=loss, metrics=metrics)
    
    # Add model to our dict
    processed_models[name] = model

processing: VGG16
processing: VGG19
processing: ResNet101V2
processing: ResNet152V2
processing: ResNet50
processing: ResNet50V2
processing: InceptionResNetV2
processing: InceptionV3
processing: MobileNet
processing: MobileNetV2
processing: Xception
processing: DenseNet121
processing: DenseNet169
processing: DenseNet201
processing: EfficientNetV2S
processing: EfficientNetV2M
processing: EfficientNetV2L


In [13]:
# Generate tensor image data batches with model specific preprocessing
BASE_DIR = 'images/'
names = ["3003", "3004", "3021", "6091", "852929"]

tf.random.set_seed(1)

ppDict = {}
ppDict["VGG16"] = tf.keras.applications.vgg16.preprocess_input
ppDict["VGG19"] = tf.keras.applications.vgg19.preprocess_input
ppDict["ResNet101V2"] = tf.keras.applications.resnet_v2.preprocess_input
ppDict["ResNet152V2"] = tf.keras.applications.resnet_v2.preprocess_input
ppDict["ResNet50"] = tf.keras.applications.resnet50.preprocess_input
ppDict["ResNet50V2"] = tf.keras.applications.resnet_v2.preprocess_input
ppDict["InceptionResNetV2"] = tf.keras.applications.inception_resnet_v2.preprocess_input
ppDict["InceptionV3"] = tf.keras.applications.inception_v3.preprocess_input
ppDict["MobileNet"] = tf.keras.applications.mobilenet.preprocess_input
ppDict["MobileNetV2"] = tf.keras.applications.mobilenet_v2.preprocess_input
ppDict["Xception"] = tf.keras.applications.xception.preprocess_input
ppDict["DenseNet121"] = tf.keras.applications.densenet.preprocess_input
ppDict["DenseNet169"] = tf.keras.applications.densenet.preprocess_input
ppDict["DenseNet201"] = tf.keras.applications.densenet.preprocess_input
ppDict["EfficientNetV2S"] = tf.keras.applications.efficientnet_v2.preprocess_input
ppDict["EfficientNetV2M"] = tf.keras.applications.efficientnet_v2.preprocess_input
ppDict["EfficientNetV2L"] = tf.keras.applications.efficientnet_v2.preprocess_input

fitModels = {}

for name, model in processed_models.items():
    train_gen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=ppDict[name])
    valid_gen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=ppDict[name])
    test_gen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=ppDict[name])
    
    train_batches = train_gen.flow_from_directory(
        BASE_DIR + 'train',
        target_size=(224, 224),
        class_mode='sparse',
        batch_size=4,
        shuffle=True,
        color_mode="rgb",
        classes=names   
    )

    val_batches = valid_gen.flow_from_directory(
        BASE_DIR + 'val',
        target_size=(224, 224),
        class_mode='sparse',
        batch_size=4,
        shuffle=True,
        color_mode="rgb",
        classes=names
    )

    test_batches = test_gen.flow_from_directory(
        BASE_DIR + 'test',
        target_size=(224, 224),
        class_mode='sparse',
        batch_size=4,
        shuffle=False,
        color_mode="rgb",
        classes=names
    )
    
    epochs = 30
    
    early_stopping = keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=5,
        verbose=2
    )

    print("Start training of: " + name)
    model.summary()
    model.fit(train_batches, validation_data=val_batches,
              callbacks=[early_stopping],
              epochs=epochs, verbose=2)
    print("Performance of" + name + ": ")
    model.evaluate(test_batches, verbose=2)
    
    
    fitModels[name] = model
    

Found 8463 images belonging to 5 classes.
Found 3528 images belonging to 5 classes.
Found 2115 images belonging to 5 classes.
Start training of: VGG16
Model: "sequential_41"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, None, None, 512)   14714688  
                                                                 
 global_average_pooling2d (G  (None, 512)              0         
 lobalAveragePooling2D)                                          
                                                                 
 dense_38 (Dense)            (None, 5)                 2565      
                                                                 
Total params: 14,717,253
Trainable params: 2,565
Non-trainable params: 14,714,688
_________________________________________________________________
Epoch 1/30
2116/2116 - 54s - loss: 0.5436 - accuracy: 0.8139 - val_loss: 0.2961 - va

Start training of: ResNet152V2
Model: "sequential_44"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet152v2 (Functional)    (None, None, None, 2048)  58331648  
                                                                 
 global_average_pooling2d_3   (None, 2048)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_41 (Dense)            (None, 5)                 10245     
                                                                 
Total params: 58,341,893
Trainable params: 10,245
Non-trainable params: 58,331,648
_________________________________________________________________
Epoch 1/30
2116/2116 - 86s - loss: 0.2763 - accuracy: 0.8968 - val_loss: 0.1392 - val_accuracy: 0.9515 - 86s/epoch - 41ms/step
Epoch 2/30
2116/2116 - 80s - loss: 0.1291 - accuracy: 0.9532 - val_loss: 0.1

 (GlobalAveragePooling2D)                                        
                                                                 
 dense_44 (Dense)            (None, 5)                 7685      
                                                                 
Total params: 54,344,421
Trainable params: 7,685
Non-trainable params: 54,336,736
_________________________________________________________________
Epoch 1/30
2116/2116 - 81s - loss: 0.3440 - accuracy: 0.8698 - val_loss: 0.2722 - val_accuracy: 0.9039 - 81s/epoch - 38ms/step
Epoch 2/30
2116/2116 - 74s - loss: 0.2209 - accuracy: 0.9154 - val_loss: 0.1715 - val_accuracy: 0.9385 - 74s/epoch - 35ms/step
Epoch 3/30
2116/2116 - 73s - loss: 0.1780 - accuracy: 0.9345 - val_loss: 0.1862 - val_accuracy: 0.9376 - 73s/epoch - 34ms/step
Epoch 4/30
2116/2116 - 73s - loss: 0.1639 - accuracy: 0.9395 - val_loss: 0.1843 - val_accuracy: 0.9382 - 73s/epoch - 34ms/step
Epoch 5/30
2116/2116 - 73s - loss: 0.1433 - accuracy: 0.9467 - val_loss: 0.1295 

Epoch 14/30
2116/2116 - 14s - loss: 0.0313 - accuracy: 0.9895 - val_loss: 0.1203 - val_accuracy: 0.9589 - 14s/epoch - 6ms/step
Epoch 15/30
2116/2116 - 14s - loss: 0.0291 - accuracy: 0.9887 - val_loss: 0.0890 - val_accuracy: 0.9688 - 14s/epoch - 6ms/step
Epoch 16/30
2116/2116 - 14s - loss: 0.0271 - accuracy: 0.9902 - val_loss: 0.0992 - val_accuracy: 0.9691 - 14s/epoch - 6ms/step
Epoch 17/30
2116/2116 - 13s - loss: 0.0282 - accuracy: 0.9904 - val_loss: 0.1049 - val_accuracy: 0.9668 - 13s/epoch - 6ms/step
Epoch 17: early stopping
Performance ofMobileNet: 
529/529 - 2s - loss: 0.1162 - accuracy: 0.9669 - 2s/epoch - 5ms/step
Found 8463 images belonging to 5 classes.
Found 3528 images belonging to 5 classes.
Found 2115 images belonging to 5 classes.
Start training of: MobileNetV2
Model: "sequential_50"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenetv2_1.00_224 (Funct  (None, None, None, 1280)  2257

Epoch 3/30
2116/2116 - 55s - loss: 0.1470 - accuracy: 0.9464 - val_loss: 0.1309 - val_accuracy: 0.9535 - 55s/epoch - 26ms/step
Epoch 4/30
2116/2116 - 53s - loss: 0.1269 - accuracy: 0.9568 - val_loss: 0.1483 - val_accuracy: 0.9453 - 53s/epoch - 25ms/step
Epoch 5/30
2116/2116 - 53s - loss: 0.1156 - accuracy: 0.9592 - val_loss: 0.1090 - val_accuracy: 0.9646 - 53s/epoch - 25ms/step
Epoch 6/30
2116/2116 - 53s - loss: 0.1057 - accuracy: 0.9618 - val_loss: 0.1208 - val_accuracy: 0.9575 - 53s/epoch - 25ms/step
Epoch 7/30
2116/2116 - 53s - loss: 0.0990 - accuracy: 0.9643 - val_loss: 0.1155 - val_accuracy: 0.9598 - 53s/epoch - 25ms/step
Epoch 8/30
2116/2116 - 53s - loss: 0.0913 - accuracy: 0.9663 - val_loss: 0.1047 - val_accuracy: 0.9606 - 53s/epoch - 25ms/step
Epoch 9/30
2116/2116 - 53s - loss: 0.0867 - accuracy: 0.9693 - val_loss: 0.1103 - val_accuracy: 0.9609 - 53s/epoch - 25ms/step
Epoch 10/30
2116/2116 - 53s - loss: 0.0812 - accuracy: 0.9721 - val_loss: 0.1144 - val_accuracy: 0.9589 - 53s/e

Epoch 18/30
2116/2116 - 83s - loss: 0.0355 - accuracy: 0.9872 - val_loss: 0.1150 - val_accuracy: 0.9677 - 83s/epoch - 39ms/step
Epoch 19/30
2116/2116 - 83s - loss: 0.0329 - accuracy: 0.9878 - val_loss: 0.1080 - val_accuracy: 0.9685 - 83s/epoch - 39ms/step
Epoch 19: early stopping
Performance ofDenseNet201: 
529/529 - 15s - loss: 0.1101 - accuracy: 0.9683 - 15s/epoch - 28ms/step
Found 8463 images belonging to 5 classes.
Found 3528 images belonging to 5 classes.
Found 2115 images belonging to 5 classes.
Start training of: EfficientNetV2S
Model: "sequential_55"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 efficientnetv2-s (Functiona  (None, None, None, 1280)  20331360 
 l)                                                              
                                                                 
 global_average_pooling2d_14  (None, 1280)             0         
  (GlobalAveragePooling2D)             

Epoch 14/30
2116/2116 - 76s - loss: 0.1172 - accuracy: 0.9595 - val_loss: 0.1180 - val_accuracy: 0.9572 - 76s/epoch - 36ms/step
Epoch 15/30
2116/2116 - 75s - loss: 0.1200 - accuracy: 0.9585 - val_loss: 0.1073 - val_accuracy: 0.9637 - 75s/epoch - 36ms/step
Epoch 16/30
2116/2116 - 76s - loss: 0.1130 - accuracy: 0.9603 - val_loss: 0.1059 - val_accuracy: 0.9663 - 76s/epoch - 36ms/step
Epoch 17/30
2116/2116 - 75s - loss: 0.1136 - accuracy: 0.9586 - val_loss: 0.1062 - val_accuracy: 0.9651 - 75s/epoch - 35ms/step
Epoch 18/30
2116/2116 - 75s - loss: 0.1144 - accuracy: 0.9582 - val_loss: 0.1032 - val_accuracy: 0.9640 - 75s/epoch - 35ms/step
Epoch 19/30
2116/2116 - 75s - loss: 0.1076 - accuracy: 0.9624 - val_loss: 0.1026 - val_accuracy: 0.9643 - 75s/epoch - 36ms/step
Epoch 20/30
2116/2116 - 76s - loss: 0.1051 - accuracy: 0.9644 - val_loss: 0.1008 - val_accuracy: 0.9660 - 76s/epoch - 36ms/step
Epoch 21/30
2116/2116 - 75s - loss: 0.1045 - accuracy: 0.9629 - val_loss: 0.1074 - val_accuracy: 0.9612 