In [1]:
import tensorflow as tf
from tensorflow.keras import layers
import utils
from tensorflow.keras import mixed_precision
import os
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
mixed_precision.set_global_policy('mixed_float16')

2024-03-23 11:50:10.944529: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-23 11:50:10.969786: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-23 11:50:10.969824: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-23 11:50:10.969852: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-23 11:50:10.975683: I tensorflow/core/platform/cpu_feature_g

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 4070 Laptop GPU, compute capability 8.9


2024-03-23 11:50:12.612716: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-23 11:50:12.633987: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-23 11:50:12.634213: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [2]:
train_data_path = "BIRDS1_split/train"
test_data_path = "BIRDS1_split/test"
IMG_HEIGHT, IMG_WIDTH = 224, 224
BATCH_SIZE = 32
bird_classes = sorted(os.listdir(train_data_path))

In [25]:
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    train_data_path,
    labels='inferred',
    label_mode='categorical',
    batch_size=BATCH_SIZE,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    shuffle=True,
    seed=123,
    validation_split=0.3,
    subset='training'
)

validation_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    train_data_path,
    labels='inferred',
    label_mode='categorical',
    batch_size=BATCH_SIZE,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    shuffle=False,
    seed=123,
    validation_split=0.3,
    subset='validation'
)
test_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    test_data_path,
    labels='inferred',
    label_mode='categorical',
    batch_size=BATCH_SIZE,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    shuffle=False,
    seed=123,

)

Found 25744 files belonging to 167 classes.
Using 18021 files for training.
Found 25744 files belonging to 167 classes.
Using 7723 files for validation.
Found 11545 files belonging to 167 classes.


In [26]:
preprocess_input = tf.keras.applications.mobilenet_v3.preprocess_input
train_dataset = train_dataset.map(lambda x, y: (preprocess_input(x), y))
validation_dataset = validation_dataset.map(lambda x, y: (preprocess_input(x), y))
test_dataset = test_dataset.map(lambda x, y: (preprocess_input(x), y))

In [5]:
base_model = tf.keras.applications.MobileNetV3Small(
    weights='imagenet',
    include_top=False,
    input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)
)
base_model.trainable = False

for layer in base_model.layers:
    print(layer.name)

input_1
rescaling
Conv
Conv/BatchNorm
tf.math.add
re_lu
tf.math.multiply
multiply
expanded_conv/depthwise/pad
expanded_conv/depthwise
expanded_conv/depthwise/BatchNorm
re_lu_1
expanded_conv/squeeze_excite/AvgPool
expanded_conv/squeeze_excite/Conv
expanded_conv/squeeze_excite/Relu
expanded_conv/squeeze_excite/Conv_1
tf.math.add_1
re_lu_2
tf.math.multiply_1
expanded_conv/squeeze_excite/Mul
expanded_conv/project
expanded_conv/project/BatchNorm
expanded_conv_1/expand
expanded_conv_1/expand/BatchNorm
re_lu_3
expanded_conv_1/depthwise/pad
expanded_conv_1/depthwise
expanded_conv_1/depthwise/BatchNorm
re_lu_4
expanded_conv_1/project
expanded_conv_1/project/BatchNorm
expanded_conv_2/expand
expanded_conv_2/expand/BatchNorm
re_lu_5
expanded_conv_2/depthwise
expanded_conv_2/depthwise/BatchNorm
re_lu_6
expanded_conv_2/project
expanded_conv_2/project/BatchNorm
expanded_conv_2/Add
expanded_conv_3/expand
expanded_conv_3/expand/BatchNorm
tf.math.add_2
re_lu_7
tf.math.multiply_2
multiply_1
expanded_conv

In [6]:
from keras.src import regularizers

inputs = layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3), name="input_layer")

x = base_model(inputs, training=False)
x = layers.GlobalAvgPool2D()(x)

x = tf.keras.layers.Dense(1024, activation='relu')(x)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(512, activation='relu')(x)
x = tf.keras.layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
x = tf.keras.layers.Dropout(0.3)(x)

outputs = layers.Dense(len(bird_classes), activation='softmax', dtype=tf.float32, kernel_regularizer=regularizers.l2(0.005))(x)

model1 = tf.keras.Model(inputs, outputs)

In [7]:
model1.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    [(None, 224, 224, 3)]     0         
                                                                 
 MobilenetV3small (Function  (None, 7, 7, 576)         939120    
 al)                                                             
                                                                 
 global_average_pooling2d (  (None, 576)               0         
 GlobalAveragePooling2D)                                         
                                                                 
 dense (Dense)               (None, 1024)              590848    
                                                                 
 dense_1 (Dense)             (None, 1024)              1049600   
                                                                 
 dropout (Dropout)           (None, 1024)              0     

In [8]:
model1.compile(loss='categorical_crossentropy',
                           optimizer=Adam(),
                           metrics=['accuracy'])

checkpointer = ModelCheckpoint(filepath='Best_models/best_mobilenetv3small.hdf5', verbose=1, save_best_only=True)

history_eff_b0 = model1.fit(train_dataset,
                                        validation_data=validation_dataset,
                                        epochs=10,
                                        verbose=1,
                                        callbacks=[checkpointer]) 

Epoch 1/10


2024-03-23 11:50:17.377980: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8600
2024-03-23 11:50:17.430280: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-03-23 11:50:18.119762: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x74ca68003500 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-03-23 11:50:18.119786: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
2024-03-23 11:50:18.125285: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-03-23 11:50:18.192674: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 1: val_loss improved from inf to 3.71443, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 2/10


  saving_api.save_model(


Epoch 2: val_loss improved from 3.71443 to 3.05981, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 3/10
Epoch 3: val_loss improved from 3.05981 to 2.66200, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 4/10
Epoch 4: val_loss improved from 2.66200 to 2.44850, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 5/10
Epoch 5: val_loss improved from 2.44850 to 2.14514, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 6/10
Epoch 6: val_loss improved from 2.14514 to 1.71723, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 7/10
Epoch 7: val_loss improved from 1.71723 to 1.63202, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 8/10
Epoch 8: val_loss improved from 1.63202 to 1.32422, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 9/10
Epoch 9: val_loss improved from 1.32422 to 1.22979, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 10/10
Epoch 10: val_loss improved from 1.22979 to 1.09985, sa

In [9]:

for layer in base_model.layers[-100:]:
    layer.trainable = True


In [10]:
for layer in base_model.layers:
    print(layer.name, layer.trainable)

input_1 False
rescaling False
Conv False
Conv/BatchNorm False
tf.math.add False
re_lu False
tf.math.multiply False
multiply False
expanded_conv/depthwise/pad False
expanded_conv/depthwise False
expanded_conv/depthwise/BatchNorm False
re_lu_1 False
expanded_conv/squeeze_excite/AvgPool False
expanded_conv/squeeze_excite/Conv False
expanded_conv/squeeze_excite/Relu False
expanded_conv/squeeze_excite/Conv_1 False
tf.math.add_1 False
re_lu_2 False
tf.math.multiply_1 False
expanded_conv/squeeze_excite/Mul False
expanded_conv/project False
expanded_conv/project/BatchNorm False
expanded_conv_1/expand False
expanded_conv_1/expand/BatchNorm False
re_lu_3 False
expanded_conv_1/depthwise/pad False
expanded_conv_1/depthwise False
expanded_conv_1/depthwise/BatchNorm False
re_lu_4 False
expanded_conv_1/project False
expanded_conv_1/project/BatchNorm False
expanded_conv_2/expand False
expanded_conv_2/expand/BatchNorm False
re_lu_5 False
expanded_conv_2/depthwise False
expanded_conv_2/depthwise/BatchNo

In [11]:
model1.compile(loss='categorical_crossentropy',
                           optimizer=Adam(),
                           metrics=['accuracy'])
start_epoch = 10

model1.fit(train_dataset,
                                        validation_data=validation_dataset,
                                        epochs=start_epoch+15,
                                        initial_epoch=start_epoch, 
                                        verbose=1,
                                        callbacks=[checkpointer]) 

Epoch 11/25
Epoch 11: val_loss improved from 1.09985 to 0.93692, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 12/25
Epoch 12: val_loss did not improve from 0.93692
Epoch 13/25
Epoch 13: val_loss improved from 0.93692 to 0.89186, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 14/25
Epoch 14: val_loss did not improve from 0.89186
Epoch 15/25
Epoch 15: val_loss did not improve from 0.89186
Epoch 16/25
Epoch 16: val_loss improved from 0.89186 to 0.80969, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 17/25
Epoch 17: val_loss improved from 0.80969 to 0.80166, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 18/25
Epoch 18: val_loss improved from 0.80166 to 0.77836, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 19/25
Epoch 19: val_loss improved from 0.77836 to 0.71057, saving model to Best_models/best_mobilenetv3small.hdf5
Epoch 20/25
Epoch 20: val_loss did not improve from 0.71057
Epoch 21/25
Epoch 21: val_loss did not i

<keras.src.callbacks.History at 0x74cb281248e0>

In [12]:
len(base_model.layers)

229

In [13]:

for layer in base_model.layers[-150:]:
    layer.trainable = True


In [23]:
model1.compile(loss='categorical_crossentropy',
                           optimizer=Adam(0.0001, beta_1=0.8),
                           metrics=['accuracy'])
start_epoch = 115

model1.fit(train_dataset,
                                        
                                        validation_data=validation_dataset,
                                        
                                        epochs=start_epoch+15,
                                        initial_epoch=start_epoch, 
                                        verbose=1,
                                        callbacks=[checkpointer]) 

Epoch 116/130
Epoch 116: val_loss did not improve from 0.34074
Epoch 117/130
Epoch 117: val_loss did not improve from 0.34074
Epoch 118/130
Epoch 118: val_loss did not improve from 0.34074
Epoch 119/130
Epoch 119: val_loss did not improve from 0.34074
Epoch 120/130
Epoch 120: val_loss did not improve from 0.34074
Epoch 121/130
Epoch 121: val_loss did not improve from 0.34074
Epoch 122/130
Epoch 122: val_loss did not improve from 0.34074
Epoch 123/130
Epoch 123: val_loss did not improve from 0.34074
Epoch 124/130
Epoch 124: val_loss did not improve from 0.34074
Epoch 125/130
Epoch 125: val_loss did not improve from 0.34074
Epoch 126/130

KeyboardInterrupt: 

In [24]:
loaded = tf.keras.models.load_model("Best_models/best_mobilenetv3small.hdf5")




In [28]:
loaded.evaluate(test_dataset)



[0.5539168119430542, 0.8965786099433899]

In [29]:
tf.keras.models.save_model(loaded, "Best_models/best_mobilenet_small_89.65")

INFO:tensorflow:Assets written to: Best_models/best_mobilenet_small_89.65/assets


INFO:tensorflow:Assets written to: Best_models/best_mobilenet_small_89.65/assets
