In [1]:
# Rare Species Family Classifier - Deep Learning Project

import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


In [8]:
# === 1. Paths ===
DATA_DIR = r'C:\Users\inesm\OneDrive\Documentos\Mestrado\1º ano\2ºS\DeepLearning\Project DL\dataset_split'
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
VAL_DIR = os.path.join(DATA_DIR, 'val')
TEST_DIR = os.path.join(DATA_DIR, 'test')

In [9]:
# === 2. Image Parameters ===
IMG_SIZE = (224, 224)
BATCH_SIZE = 32


In [10]:
# === 3. Image Data Generators ===
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=15,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1,
                                   zoom_range=0.1,
                                   horizontal_flip=True)
val_test_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(TRAIN_DIR,
                                              target_size=IMG_SIZE,
                                              batch_size=BATCH_SIZE,
                                              class_mode='categorical')
val_gen = val_test_datagen.flow_from_directory(VAL_DIR,
                                              target_size=IMG_SIZE,
                                              batch_size=BATCH_SIZE,
                                              class_mode='categorical')
test_gen = val_test_datagen.flow_from_directory(TEST_DIR,
                                               target_size=IMG_SIZE,
                                               batch_size=BATCH_SIZE,
                                               class_mode='categorical',
                                               shuffle=False)

num_classes = train_gen.num_classes
class_indices = train_gen.class_indices
inv_class_indices = {v: k for k, v in class_indices.items()}

Found 8388 images belonging to 202 classes.
Found 1797 images belonging to 202 classes.
Found 1798 images belonging to 202 classes.


In [11]:
# === 4. Build Model ===
base_model = MobileNetV2(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze base

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [12]:
# === 5. Training ===
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3)

history = model.fit(train_gen, validation_data=val_gen, epochs=25,
                    callbacks=[early_stop, reduce_lr])

  self._warn_if_super_not_called()


Epoch 1/25
[1m 22/263[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m6:13[0m 2s/step - accuracy: 0.0318 - loss: 5.4867



[1m 33/263[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m6:06[0m 2s/step - accuracy: 0.0319 - loss: 5.4274

KeyboardInterrupt: 

In [None]:
# === 6. Evaluation ===
loss, acc = model.evaluate(test_gen)
print(f"Test accuracy: {acc:.4f}")

In [None]:
# === 7. Classification Report and Confusion Matrix ===
preds = model.predict(test_gen)
y_true = test_gen.classes
y_pred = np.argmax(preds, axis=1)

target_names = [inv_class_indices[i] for i in sorted(inv_class_indices)]
print(classification_report(y_true, y_pred, target_names=target_names))

cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=False, cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

In [None]:
# === 8. Save Model ===
model.save('best_species_classifier.h5')
