# Improved CNN

In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, Input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import cv2
import matplotlib.pyplot as plt

# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

2025-04-08 23:44:35.148433: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-08 23:44:35.184736: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744136075.229099  314565 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744136075.241444  314565 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744136075.278101  314565 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

### Data preprocessing 

In [None]:
def preprocess_image_fn(img):
    """
    Preprocess an input image:
      - Resize to 128x128 using bilinear interpolation,
      - Convert to grayscale (if needed),
      - Invert the image (i.e. subtract from 255),
      - Rescale pixel values to [0, 1].
    (Assumes the input image is loaded as RGB or grayscale.)
    """
    # If image has 3 channels, convert to grayscale.
    if len(img.shape) == 3 and img.shape[-1] == 3:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    # Resize to 128x128
    img_resized = cv2.resize(img, (128, 128), interpolation=cv2.INTER_LINEAR)
    # Invert the image so background becomes white (i.e., pixel value 1)
    img_inverted = 255 - img_resized
    # Normalize pixel values to [0,1]
    img_norm = img_inverted.astype("float32") / 255.0
    # Expand dims to make shape (128,128,1)
    img_norm = np.expand_dims(img_norm, axis=-1)
    return img_norm

# Custom preprocessing function for ImageDataGenerator
def preprocessing_function(img):
    # Input img is a NumPy array with shape (H, W, C) in [0,255]
    return preprocess_image_fn(img)


### Residual Block Definition

In [None]:
def residual_block(input_tensor, filters, stride=1):
    """
    A standard residual block:
      Conv (3x3, filters, stride) -> BN -> ReLU -> Conv (3x3, filters, stride=1) -> BN
      plus a shortcut connection (with projection if necessary).
    """
    x = layers.Conv2D(filters, kernel_size=3, strides=stride, padding='same',
                      kernel_initializer='he_normal')(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(filters, kernel_size=3, strides=1, padding='same',
                      kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)

    shortcut = input_tensor
    # If input has different shape, project it
    if stride != 1 or input_tensor.shape[-1] != filters:
        shortcut = layers.Conv2D(filters, kernel_size=1, strides=stride, padding='same',
                                 kernel_initializer='he_normal')(input_tensor)
        shortcut = layers.BatchNormalization()(shortcut)

    x = layers.add([x, shortcut])
    x = layers.ReLU()(x)
    return x


### Model Architecture

In [None]:
def build_model(num_classes=250, input_shape=(128, 128, 1)):
    """
    Build the residual convolutional network with the following layers:
      - Input dropout,
      - 7x7 conv (64 filters, stride 2) -> BN -> ReLU,
      - 3 residual units (64),
      - Dropout,
      - 3 residual units (128, first with stride 2),
      - Dropout,
      - 3 residual units (256, first with stride 2),
      - Dropout,
      - 3 residual units (512, first with stride 2),
      - 8x8 Average Pooling, Dropout,
      - Fully connected (dense) layer to num_classes with softmax.
    """
    inputs = Input(shape=input_shape)
    
    # Input dropout (if desired, e.g., 0.2)
    x = layers.Dropout(0.2)(inputs)
    
    # 7x7 Convolution, 64 filters, stride 2, padding same => output: 64x64x64
    x = layers.Conv2D(64, kernel_size=7, strides=2, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    
    # Three residual units with 64 filters (64x64x64)
    x = residual_block(x, 64, stride=1)
    x = residual_block(x, 64, stride=1)
    x = residual_block(x, 64, stride=1)
    
    # Dropout
    x = layers.Dropout(0.2)(x)
    
    # Residual unit with 128 filters, stride 2 => 32x32x128
    x = residual_block(x, 128, stride=2)
    x = residual_block(x, 128, stride=1)
    x = residual_block(x, 128, stride=1)
    
    # Dropout
    x = layers.Dropout(0.2)(x)
    
    # Residual unit with 256 filters, stride 2 => 16x16x256
    x = residual_block(x, 256, stride=2)
    x = residual_block(x, 256, stride=1)
    x = residual_block(x, 256, stride=1)
    
    # Dropout
    x = layers.Dropout(0.2)(x)
    
    # Residual unit with 512 filters, stride 2 => 8x8x512
    x = residual_block(x, 512, stride=2)
    x = residual_block(x, 512, stride=1)
    x = residual_block(x, 512, stride=1)
    
    # 8x8 Average Pooling -> results in a 512 vector
    x = layers.GlobalAveragePooling2D()(x)
    
    # Dropout before the final classification
    x = layers.Dropout(0.2)(x)
    
    # Fully connected layer to num_classes with softmax
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = models.Model(inputs, outputs)
    return model


### Data Preparation using ImageDataGenerator

In [None]:
def prepare_data(data_dir, batch_size=64, target_size=(128,128)):
    """
    Create training, validation, and test generators from a directory.
    Assumes subfolders correspond to classes.
    """
    train_datagen = ImageDataGenerator(
        preprocessing_function=preprocessing_function,
        horizontal_flip=True,
        validation_split=0.25  # Assume 60% training, 20% val, 20% test (or adjust as needed)
    )
    
    # Create training generator (subset of training data)
    train_generator = train_datagen.flow_from_directory(
        data_dir,
        target_size=target_size,
        batch_size=batch_size,
        color_mode="grayscale",
        class_mode="categorical",
        subset="training",
        shuffle=True,
        seed=42
    )
    
    # Create validation generator
    val_generator = train_datagen.flow_from_directory(
        data_dir,
        target_size=target_size,
        batch_size=batch_size,
        color_mode="grayscale",
        class_mode="categorical",
        subset="validation",
        shuffle=False,
        seed=42
    )
    
    # For testing, you might have a separate directory or use the validation generator as a proxy.
    # Here, we use the same validation generator as a test set.
    test_generator = val_generator
    
    return train_generator, val_generator, test_generator

### Training and evaluation

In [None]:
def train_model(data_dir, model_save_path="best_ann_model_4.h5", epochs=100, batch_size=64):
    """
    Train the model on the dataset located in data_dir using ImageDataGenerator.
    Saves the best model and prints the training and validation accuracy.
    """
    train_gen, val_gen, test_gen = prepare_data(data_dir, batch_size=batch_size)
    
    num_classes = len(train_gen.class_indices)
    print("Number of classes:", num_classes)
    
    model = build_model(num_classes=num_classes, input_shape=(128,128,1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.summary()
    
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True),
        ModelCheckpoint(model_save_path, monitor='val_accuracy', save_best_only=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, verbose=1)
    ]
    
    history = model.fit(
        train_gen,
        epochs=epochs,
        validation_data=val_gen,
        callbacks=callbacks,
        verbose=1
    )
    
    # Evaluate on test set
    test_loss, test_acc = model.evaluate(test_gen, verbose=1)
    print(f"Final Test Accuracy: {test_acc:.4f}")
    
    return model, history

### Test Image Prediction

In [None]:
def predict_single_image(image_path, model, debug=True):
    """
    Load and preprocess a single test image, predict its label using the trained model,
    and display debugging information.
    """
    # Load image and preprocess
    img = preprocess_image_fn(cv2.imread(image_path, cv2.IMREAD_GRAYSCALE))
    # Expand dims to form a batch of 1
    img_batch = np.expand_dims(img, axis=0)
    # Predict
    predictions = model.predict(img_batch)
    predicted_index = np.argmax(predictions)
    if debug:
        print("Raw prediction probabilities:", predictions)
        print("Predicted index:", predicted_index)
    return predicted_index

### Main execution

In [None]:
if __name__ == "__main__":
    
    data_dir = "../sketches"  
    
    # Train the model
    model, history = train_model(data_dir, model_save_path="best_ann_model_4.h5", epochs=100, batch_size=64)

    # Assuming the training generator was used:
    train_gen, _, _ = prepare_data(data_dir, batch_size=64)
    label_dict = {v: k for k, v in train_gen.class_indices.items()}
    print("Label mapping:", label_dict)
    
    # Single image testing
    test_image_path = "../sketches/fan/6321.png"  
    predicted_idx = predict_single_image(test_image_path, model, debug=True)
    predicted_label = label_dict.get(predicted_idx, predicted_idx)
    print("Final predicted label for the test image:", predicted_label)

Found 900 images belonging to 15 classes.
Found 300 images belonging to 15 classes.
Number of classes: 15


2025-04-08 23:44:58.241824: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


  self._warn_if_super_not_called()


Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12s/step - accuracy: 0.1885 - loss: 3.6413 



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m214s[0m 13s/step - accuracy: 0.1912 - loss: 3.5919 - val_accuracy: 0.0667 - val_loss: 32656.3535 - learning_rate: 0.0010
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 11s/step - accuracy: 0.3891 - loss: 1.8990 - val_accuracy: 0.0667 - val_loss: 6177.1094 - learning_rate: 0.0010
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 9s/step - accuracy: 0.3952 - loss: 1.8007 - val_accuracy: 0.0667 - val_loss: 402.4196 - learning_rate: 0.0010
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 9s/step - accuracy: 0.4373 - loss: 1.6512 - val_accuracy: 0.0667 - val_loss: 354.1013 - learning_rate: 0.0010
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 10s/step - accuracy: 0.4521 - loss: 1.6522 - val_accuracy: 0.0667 - val_loss: 60.7942 - learning_rate: 0.0010
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 10s/step - accuracy: 0.5030 - loss: 1.4449 - val_accuracy: 0.0833 - val_loss: 15.5139 - learning_rate: 0.0010
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9s/step - accuracy: 0.5825 - loss: 1.2819



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 10s/step - accuracy: 0.5829 - loss: 1.2796 - val_accuracy: 0.1133 - val_loss: 9.5200 - learning_rate: 0.0010
Epoch 8/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 9s/step - accuracy: 0.5517 - loss: 1.3302 - val_accuracy: 0.0700 - val_loss: 13.0403 - learning_rate: 0.0010
Epoch 9/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 10s/step - accuracy: 0.5724 - loss: 1.3135 - val_accuracy: 0.0800 - val_loss: 19.8623 - learning_rate: 0.0010
Epoch 10/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10s/step - accuracy: 0.6017 - loss: 1.2074



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 11s/step - accuracy: 0.6023 - loss: 1.2055 - val_accuracy: 0.1833 - val_loss: 11.5679 - learning_rate: 0.0010
Epoch 11/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9s/step - accuracy: 0.6394 - loss: 0.9820



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 10s/step - accuracy: 0.6402 - loss: 0.9830 - val_accuracy: 0.2633 - val_loss: 6.3972 - learning_rate: 0.0010
Epoch 12/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 9s/step - accuracy: 0.6527 - loss: 1.1009 - val_accuracy: 0.1933 - val_loss: 7.0457 - learning_rate: 0.0010
Epoch 13/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 11s/step - accuracy: 0.6691 - loss: 1.0592 - val_accuracy: 0.2267 - val_loss: 8.2684 - learning_rate: 0.0010
Epoch 14/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 11s/step - accuracy: 0.6797 - loss: 0.9465 - val_accuracy: 0.1733 - val_loss: 7.4485 - learning_rate: 0.0010
Epoch 15/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 11s/step - accuracy: 0.6929 - loss: 0.9006 - val_accuracy: 0.2233 - val_loss: 5.7706 - learning_rate: 0.0010
Epoch 16/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 11s/step - accuracy: 0.7798 - loss: 0.6942 - val_accuracy: 0.2867 - val_loss: 5.1330 - learning_rate: 0.0010
Epoch 18/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 12s/step - accuracy: 0.7178 - loss: 0.7808 - val_accuracy: 0.1700 - val_loss: 7.3076 - learning_rate: 0.0010
Epoch 19/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10s/step - accuracy: 0.8120 - loss: 0.5873 



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 12s/step - accuracy: 0.8126 - loss: 0.5868 - val_accuracy: 0.3933 - val_loss: 3.1680 - learning_rate: 0.0010
Epoch 20/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 11s/step - accuracy: 0.8624 - loss: 0.4639 - val_accuracy: 0.3167 - val_loss: 4.1290 - learning_rate: 0.0010
Epoch 21/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m170s[0m 11s/step - accuracy: 0.8381 - loss: 0.4836 - val_accuracy: 0.2967 - val_loss: 5.2097 - learning_rate: 0.0010
Epoch 22/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 11s/step - accuracy: 0.8015 - loss: 0.5944 - val_accuracy: 0.1400 - val_loss: 12.2255 - learning_rate: 0.0010
Epoch 23/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11s/step - accuracy: 0.8138 - loss: 0.5470 
Epoch 23: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m177s



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 13s/step - accuracy: 0.7744 - loss: 0.6395 - val_accuracy: 0.5300 - val_loss: 2.3975 - learning_rate: 5.0000e-04
Epoch 25/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11s/step - accuracy: 0.8881 - loss: 0.3831 



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 12s/step - accuracy: 0.8874 - loss: 0.3842 - val_accuracy: 0.5467 - val_loss: 2.6108 - learning_rate: 5.0000e-04
Epoch 26/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11s/step - accuracy: 0.8427 - loss: 0.4168 



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m181s[0m 12s/step - accuracy: 0.8435 - loss: 0.4157 - val_accuracy: 0.6233 - val_loss: 1.6234 - learning_rate: 5.0000e-04
Epoch 27/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 11s/step - accuracy: 0.8754 - loss: 0.3349 - val_accuracy: 0.3567 - val_loss: 3.0212 - learning_rate: 5.0000e-04
Epoch 28/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 10s/step - accuracy: 0.8526 - loss: 0.4991 - val_accuracy: 0.5367 - val_loss: 2.3218 - learning_rate: 5.0000e-04
Epoch 29/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 10s/step - accuracy: 0.9129 - loss: 0.3044 - val_accuracy: 0.5900 - val_loss: 2.0887 - learning_rate: 5.0000e-04
Epoch 30/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9s/step - accuracy: 0.8967 - loss: 0.3492
Epoch 30: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 10s/step - accuracy: 0.9580 - loss: 0.1629 - val_accuracy: 0.6733 - val_loss: 1.2709 - learning_rate: 2.5000e-04
Epoch 34/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 12s/step - accuracy: 0.9001 - loss: 0.2643 - val_accuracy: 0.6233 - val_loss: 1.3417 - learning_rate: 2.5000e-04
Epoch 35/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 12s/step - accuracy: 0.9363 - loss: 0.2255 - val_accuracy: 0.5567 - val_loss: 1.7872 - learning_rate: 2.5000e-04
Epoch 36/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11s/step - accuracy: 0.9474 - loss: 0.1941 



[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 12s/step - accuracy: 0.9465 - loss: 0.1959 - val_accuracy: 0.7433 - val_loss: 0.9489 - learning_rate: 2.5000e-04
Epoch 37/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 12s/step - accuracy: 0.9347 - loss: 0.2241 - val_accuracy: 0.6033 - val_loss: 1.6841 - learning_rate: 2.5000e-04
Epoch 38/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m185s[0m 12s/step - accuracy: 0.9578 - loss: 0.1590 - val_accuracy: 0.6933 - val_loss: 1.1697 - learning_rate: 2.5000e-04
Epoch 39/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 14s/step - accuracy: 0.8836 - loss: 0.3248 - val_accuracy: 0.6967 - val_loss: 1.1914 - learning_rate: 2.5000e-04
Epoch 40/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11s/step - accuracy: 0.9349 - loss: 0.2425 
Epoch 40: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [None]:
# Checking the Labels

print(label_dict)

{0: 'airplane', 1: 'book', 2: 'cup', 3: 'envelope', 4: 'fan', 5: 'fork', 6: 'hat', 7: 'key', 8: 'laptop', 9: 'leaf', 10: 'moon', 11: 'pizza', 12: 't-shirt', 13: 'traffic light', 14: 'wineglass'}


## Prediction Testing on single test image

In [9]:
# Now test on a single image:
test_image_path = "../sketches/airplane/1.png"  
predicted_idx = predict_single_image(test_image_path, model, debug=True)
predicted_label = label_dict.get(predicted_idx, predicted_idx)
print("Final predicted label for the test image:", predicted_label)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 203ms/step
Raw prediction probabilities: [[9.7189695e-01 1.2792053e-03 3.8687911e-04 8.3770276e-05 1.3407930e-02
  1.3203414e-05 9.2167444e-05 1.0362328e-03 2.2585116e-05 5.9619052e-03
  7.7419746e-04 3.4823690e-03 1.3418053e-03 2.2056031e-04 1.9228851e-07]]
Predicted index: 0
Final predicted label for the test image: airplane


In [None]:
# Now test on a single image:
test_image_path = "../sketches/book/1921.png" 
predicted_idx = predict_single_image(test_image_path, model, debug=True)
predicted_label = label_dict.get(predicted_idx, predicted_idx)
print("Final predicted label for the test image:", predicted_label)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
Raw prediction probabilities: [[9.2505542e-07 1.5721206e-02 2.3156232e-08 2.5347651e-07 7.2228645e-09
  2.5507077e-10 1.3958944e-08 3.4905780e-09 9.8427618e-01 4.9782656e-10
  7.8907031e-10 3.3236284e-07 5.1675681e-11 9.8530916e-07 6.6851214e-08]]
Predicted index: 8
Final predicted label for the test image: laptop


In [None]:
# Now test on a single image:
test_image_path = "../sketches/pizza/12721.png"  
predicted_idx = predict_single_image(test_image_path, model, debug=True)
predicted_label = label_dict.get(predicted_idx, predicted_idx)
print("Final predicted label for the test image:", predicted_label)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 188ms/step
Raw prediction probabilities: [[1.9292008e-06 4.0985230e-02 1.1537861e-06 4.0071674e-05 4.7934773e-05
  2.4568913e-06 2.0000019e-08 4.1385362e-04 9.0569658e-05 6.7924564e-03
  2.9350865e-06 8.7089455e-01 3.2595926e-04 8.0400884e-02 1.1945906e-08]]
Predicted index: 11
Final predicted label for the test image: pizza


In [None]:
# Now test on a single image:
test_image_path = "../sketches/moon/10561.png"  
predicted_idx = predict_single_image(test_image_path, model, debug=True)
predicted_label = label_dict.get(predicted_idx, predicted_idx)
print("Final predicted label for the test image:", predicted_label)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 192ms/step
Raw prediction probabilities: [[2.98563964e-06 3.94970755e-07 3.37967982e-08 1.22728409e-06
  7.90879540e-07 7.77143810e-04 8.91997388e-07 3.70922171e-05
  1.06445036e-07 1.52605535e-05 9.99130905e-01 7.30344937e-07
  1.33874467e-09 3.12777920e-05 1.10539054e-06]]
Predicted index: 10
Final predicted label for the test image: moon


In [13]:
# Now test on a single image:
test_image_path = "../sketches/envelope/6010.png"  
predicted_idx = predict_single_image(test_image_path, model, debug=True)
predicted_label = label_dict.get(predicted_idx, predicted_idx)
print("Final predicted label for the test image:", predicted_label)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step
Raw prediction probabilities: [[1.1003969e-09 2.4435083e-05 6.2260717e-08 9.9989223e-01 8.6026536e-10
  1.5367917e-10 3.0775393e-09 3.3262065e-08 8.2488950e-05 4.2947562e-07
  3.8885868e-09 2.8305158e-09 3.3216260e-10 1.2731278e-08 2.7071476e-07]]
Predicted index: 3
Final predicted label for the test image: envelope


In [None]:
# Now test on a single image:
test_image_path = "../sketches/key/9211.png" 
predicted_idx = predict_single_image(test_image_path, model, debug=True)
predicted_label = label_dict.get(predicted_idx, predicted_idx)
print("Final predicted label for the test image:", predicted_label)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step
Raw prediction probabilities: [[6.3178306e-03 2.7785525e-03 2.3789036e-05 9.5946398e-03 1.5399069e-02
  1.1002890e-02 1.9134462e-04 9.4813484e-01 6.7922592e-05 3.6629525e-03
  8.0508710e-04 6.5661094e-04 5.3421000e-04 7.9502870e-04 3.5143974e-05]]
Predicted index: 7
Final predicted label for the test image: key
