Install & import

In [None]:
!pip -q install tensorflow kagglehub matplotlib

In [None]:
import os, shutil, random, re, pathlib
from pathlib import Path
import numpy as np
import tensorflow as tf

print("TensorFlow:", tf.__version__)

TensorFlow: 2.19.0


Base Directory


In [None]:
WORKDIR = Path("/content/work")
DATA_RAW = WORKDIR / "raw"
DATA_SMALL = WORKDIR / "small"
ARTIFACTS = WORKDIR / "artifacts"
for p in [WORKDIR, DATA_RAW, DATA_SMALL, ARTIFACTS]:
    p.mkdir(parents=True, exist_ok=True)

Config


In [None]:
IMG_SIZE = 96         # more detail than 96
BATCH_SIZE = 32
EPOCHS = 40
MAX_IMAGES_PER_CLASS = 400   # allow more data per class
TOP_K_CLASSES = 10            # how many brands to keep (4–6 is good)
SEED = 123
tf.random.set_seed(SEED); random.seed(SEED); np.random.seed(SEED)


Download dataset

In [None]:
import kagglehub

path = kagglehub.dataset_download("ahmedelsany/car-brand-classification-dataset")
print("Dataset path:", path)

DATASET_DIR = "/kaggle/input/car-brand-classification-dataset/Car Brand Classification Dataset"
TRAIN_SRC = Path(DATASET_DIR + "/train")
TEST_SRC = Path(DATASET_DIR + "/test")

# Show classes
print("Train classes:", [d.name for d in TRAIN_SRC.iterdir() if d.is_dir()])

Using Colab cache for faster access to the 'car-brand-classification-dataset' dataset.
Dataset path: /kaggle/input/car-brand-classification-dataset
Train classes: ['Nissan', 'Ford', 'Hyundai', 'Mercedes-Benz', 'Chrysler', 'GMC', 'Lincoln', 'FIAT', 'Aston Martin', 'Volkswagen', 'Land Rover', 'Buick', 'Honda', 'Dodge', 'Porsche', 'Kia', 'Toyota', 'Jeep', 'Volvo', 'Mitsubishi', 'Cadillac', 'Chevrolet', 'Jaguar', 'Bentley', 'BMW', 'Lexus', 'Audi', 'INFINITI', 'Acura', 'Subaru', 'Mazda', 'Ram', 'MINI']


Build a smaller subset

In [None]:
from collections import Counter

def ensure_empty_dir(p: Path):
    if p.exists(): shutil.rmtree(p)
    p.mkdir(parents=True, exist_ok=True)

def class_counts(root: Path):
    cnt = {}
    for d in sorted([x for x in root.iterdir() if x.is_dir()], key=lambda p: p.name.lower()):
        imgs = list(d.glob("*.jpg")) + list(d.glob("*.png")) + list(d.glob("*.jpeg"))
        cnt[d.name] = len(imgs)
    return cnt

# count classes in original train
train_cnt = class_counts(TRAIN_SRC)
#top = [c for c,_ in Counter(train_cnt).most_common()][:TOP_K_CLASSES]
top = ['Mercedes-Benz', 'Porsche', 'MINI']
print("Top classes:", top)

def collect_images_for(cls_name, src_root: Path):
    d = src_root/cls_name
    return list(d.glob("*.jpg")) + list(d.glob("*.png")) + list(d.glob("*.jpeg"))

ensure_empty_dir(DATA_SMALL)
for sp in ["train","val","test"]:
    (DATA_SMALL/sp).mkdir(parents=True, exist_ok=True)

for cname in top:
    # gather
    train_imgs = collect_images_for(cname, TRAIN_SRC)
    test_imgs  = collect_images_for(cname, TEST_SRC)

    # cap & split train→(train/val/test)
    random.shuffle(train_imgs)
    train_imgs = train_imgs[:MAX_IMAGES_PER_CLASS*3]
    n_train = int(0.7*len(train_imgs))
    n_val = int(0.15*len(train_imgs))
    splits = {
        "train": train_imgs[:n_train],
        "val": train_imgs[n_train:n_train+n_val],
        "test": train_imgs[n_train+n_val:]
    }
    # add extra test from original TEST
    if test_imgs:
        extra = random.sample(test_imgs, min(len(test_imgs), MAX_IMAGES_PER_CLASS))
        splits["test"] += extra

    # copy
    for sp, arr in splits.items():
        out = DATA_SMALL/sp/cname
        out.mkdir(parents=True, exist_ok=True)
        for src in arr:
            shutil.copy2(src, out/src.name)

for sp in ["train","val","test"]:
    print(sp, "classes:", [d.name for d in (DATA_SMALL/sp).iterdir()])


Top classes: ['Mercedes-Benz', 'Porsche', 'MINI']
train classes: ['MINI', 'Mercedes-Benz', 'Porsche']
val classes: ['MINI', 'Mercedes-Benz', 'Porsche']
test classes: ['MINI', 'Mercedes-Benz', 'Porsche']


Pre-processing


In [None]:
IMG_SHAPE = (IMG_SIZE, IMG_SIZE)
AUTOTUNE = tf.data.AUTOTUNE

train_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_SMALL/"train", image_size=IMG_SHAPE, batch_size=BATCH_SIZE,
    label_mode="int", seed=SEED, shuffle=True)
val_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_SMALL/"val", image_size=IMG_SHAPE, batch_size=BATCH_SIZE,
    label_mode="int", seed=SEED, shuffle=False)
test_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_SMALL/"test", image_size=IMG_SHAPE, batch_size=BATCH_SIZE,
    label_mode="int", seed=SEED, shuffle=False)

class_names = train_ds.class_names
NUM_CLASSES = len(class_names)
print("Classes:", class_names)

normalizer = tf.keras.layers.Rescaling(1./255)
augment = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.08),
    tf.keras.layers.RandomZoom(0.15),
    tf.keras.layers.RandomContrast(0.15),
])

def prepare(ds, training=False):
    ds = ds.map(lambda x,y: (normalizer(x), y), num_parallel_calls=AUTOTUNE)
    if training:
        ds = ds.map(lambda x,y: (augment(x, training=True), y), num_parallel_calls=AUTOTUNE)
    return ds.shuffle(1024) if training else ds

train_ds2 = prepare(train_ds, True).prefetch(AUTOTUNE)
val_ds2   = prepare(val_ds, False).cache().prefetch(AUTOTUNE)
test_ds2  = prepare(test_ds, False).cache().prefetch(AUTOTUNE)


Found 732 files belonging to 3 classes.
Found 156 files belonging to 3 classes.
Found 384 files belonging to 3 classes.
Classes: ['MINI', 'Mercedes-Benz', 'Porsche']


Model

In [None]:
from tensorflow.keras import layers, models

def smooth_sparse_labels(y_true, num_classes, smoothing=0.05):
    y_true = tf.one_hot(tf.cast(y_true, tf.int32), num_classes)
    smooth = y_true * (1 - smoothing) + (smoothing / num_classes)
    return smooth

loss_fn = lambda y_true, y_pred: tf.keras.losses.categorical_crossentropy(
    smooth_sparse_labels(y_true, NUM_CLASSES, 0.05), y_pred
)

base = tf.keras.applications.MobileNetV2(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights="imagenet",
    alpha=0.5  # 0.35–0.5 keeps it small; raise to 0.75 if you can afford size
)

base.trainable = False  # first warmup
inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = inputs
x = tf.keras.applications.mobilenet_v2.preprocess_input(x * 255.0)  # expects [-1,1]
x = base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)
model = models.Model(inputs, outputs)

# cosine decay, label smoothing helps
steps_per_epoch = tf.data.experimental.cardinality(train_ds2).numpy()
lr = tf.keras.optimizers.schedules.CosineDecayRestarts(1e-3, first_decay_steps=max(steps_per_epoch*5, 100))
model.compile(optimizer=tf.keras.optimizers.Adam(lr),
              loss=loss_fn,
              metrics=["accuracy"])
model.summary()

Train

In [None]:
ckpt_path = ARTIFACTS/"best_tl.keras"
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(ckpt_path, save_best_only=True, monitor="val_accuracy"),
    tf.keras.callbacks.EarlyStopping(patience=6, restore_best_weights=True, monitor="val_accuracy")
]
history = model.fit(train_ds2, validation_data=val_ds2, epochs=12, callbacks=callbacks)


Epoch 1/12
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step - accuracy: 0.3172 - loss: 1.4602

    smooth_sparse_labels(y_true, NUM_CLASSES, 0.05), y_pred
)

  return {key: serialize_keras_object(value) for key, value in obj.items()}


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 253ms/step - accuracy: 0.3179 - loss: 1.4566 - val_accuracy: 0.4103 - val_loss: 1.1578
Epoch 2/12
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 132ms/step - accuracy: 0.4738 - loss: 1.0918 - val_accuracy: 0.4359 - val_loss: 1.1401
Epoch 3/12
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 182ms/step - accuracy: 0.5386 - loss: 1.0142 - val_accuracy: 0.4808 - val_loss: 1.0830
Epoch 4/12
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 139ms/step - accuracy: 0.5402 - loss: 0.9940 - val_accuracy: 0.4872 - val_loss: 1.0713
Epoch 5/12
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 132ms/step - accuracy: 0.5734 - loss: 0.9695 - val_accuracy: 0.4936 - val_loss: 1.0684
Epoch 6/12
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 166ms/step - accuracy: 0.5255 - loss: 0.9705 - va

In [None]:
for layer in base.layers:
    if "block_13" in layer.name or "block_14" in layer.name or "block_15" in layer.name:
        layer.trainable = True
    else:
        layer.trainable = False

model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
              loss=loss_fn,
              metrics=["accuracy"])

ft_ckpt = ARTIFACTS/"best_finetune.keras"
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(ft_ckpt, save_best_only=True, monitor="val_accuracy"),
    tf.keras.callbacks.EarlyStopping(patience=8, restore_best_weights=True, monitor="val_accuracy"),
    tf.keras.callbacks.ReduceLROnPlateau(patience=3, factor=0.5, min_lr=1e-5, monitor="val_loss")
]
history2 = model.fit(train_ds2, validation_data=val_ds2, epochs=EPOCHS, callbacks=callbacks)


Epoch 1/40
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 300ms/step - accuracy: 0.5975 - loss: 0.9744 - val_accuracy: 0.5256 - val_loss: 1.0077 - learning_rate: 1.0000e-04
Epoch 2/40
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 197ms/step - accuracy: 0.6424 - loss: 0.8641 - val_accuracy: 0.5256 - val_loss: 0.9904 - learning_rate: 1.0000e-04
Epoch 3/40
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 174ms/step - accuracy: 0.7117 - loss: 0.7594 - val_accuracy: 0.5769 - val_loss: 0.9587 - learning_rate: 1.0000e-04
Epoch 4/40
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 159ms/step - accuracy: 0.7281 - loss: 0.6947 - val_accuracy: 0.5641 - val_loss: 0.9519 - learning_rate: 1.0000e-04
Epoch 5/40
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 148ms/step - accuracy: 0.6819 - loss: 0.7694 - val_accuracy: 0.5705 - val_loss: 0.9514 - learning_rate: 1.0000e-04
Epoch 6/40
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━

In [None]:
test_loss, test_acc = model.evaluate(test_ds2)
print(f"✅ Test accuracy: {test_acc:.3f}")

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 97ms/step - accuracy: 0.7742 - loss: 0.6632
✅ Test accuracy: 0.695


Convert to TFLITE file for uploading to Edge Impulse

In [31]:
def representative_data_gen():
    for images, _ in train_ds.take(100):
        # MobileNetV2 training used preprocess_input([-1,1]),
        # but for full INT8 with uint8 I/O we feed [0..1] then scale in post-training
        yield [tf.cast(images, tf.float32)/255.0]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8
tflite_model = converter.convert()

tflite_path = ARTIFACTS/"carbrand_mnv2_3Classes_int8.tflite"
with open(tflite_path,"wb") as f: f.write(tflite_model)
print("TFLite size (bytes):", tflite_path.stat().st_size)


Saved artifact at '/tmp/tmpzlq7w1_i'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 96, 96, 3), dtype=tf.float32, name='keras_tensor_488')
Output Type:
  TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)
Captures:
  133911349044304: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133911349045840: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133911349045648: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133911349045072: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133911349046416: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133911349044496: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133911349046032: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133911349046224: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133911349045264: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133911349047376: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133911349046



TFLite size (bytes): 975960
