# 1. Imports & Config

In [8]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from numpy import ndarray
from matplotlib.pyplot import subplots, show
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, top_k_accuracy_score

# bias
from collections import Counter


# model building imports
import keras
from keras import Model, Sequential, Input
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.layers import LeakyReLU
from keras.ops import add
from keras.utils import to_categorical
from keras.optimizers import SGD
from keras.optimizers import Adam
from keras.losses import CategoricalCrossentropy
from keras.metrics import CategoricalAccuracy, AUC, F1Score, TopKCategoricalAccuracy
from keras.callbacks import ModelCheckpoint, CSVLogger, LearningRateScheduler ,ReduceLROnPlateau, EarlyStopping
from keras.utils import Sequence, load_img, img_to_array


# image filtering
from tensorflow.keras.applications import ResNet50, EfficientNetB6,EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input, decode_predictions
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.utils import load_img, img_to_array
import numpy as np
from tqdm import tqdm

#import keras_tuner
# augmentation operations
from keras.layers import RandomBrightness, RandomFlip, RandomRotation, RandomContrast
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dropout, Dense, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.regularizers import l2
from keras.layers import Pipeline
from keras.layers import BatchNormalization
from keras.layers import Dropout
from keras.applications import ResNet50
from keras.applications import EfficientNetV2S
from keras.models import Model
from keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input
from keras.initializers import Constant

In [9]:
import tensorflow as tf

# List all physical GPUs
gpus = tf.config.list_physical_devices('GPU')
print("Available GPUs:", gpus)


Available GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [10]:
print('num gpus:', len(tf.config.list_physical_devices('GPU')))

num gpus: 1


In [11]:
tf.config.run_functions_eagerly(True)

In [12]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Memory growth set")
    except RuntimeError as e:
        print(e)

Memory growth set


In [13]:
# importing the dataset
# metadata = pd.read_csv('../rare_species 1/metadata.csv')
# metadata = pd.read_csv('../Data/rare_species/metadata.csv')
# metadata.sample(5)

In [14]:

from google.colab import drive
drive.mount('/content/drive')
metadata = pd.read_csv('drive/MyDrive/rare_species/metadata.csv')
metadata.sample(5)

Mounted at /content/drive


Unnamed: 0,rare_species_id,eol_content_id,eol_page_id,kingdom,phylum,family,file_path
11271,18354fcd-a947-4c6d-833a-4a7759dd2629,28175168,45516020,animalia,chordata,phasianidae,chordata_phasianidae/28175168_45516020_eol-ful...
2330,abcd8d7a-1bd5-489d-b71c-2cba6e03a7e1,20743471,1052882,animalia,chordata,thraupidae,chordata_thraupidae/20743471_1052882_eol-full-...
4895,1d4c7201-d8bc-4722-83c0-25cbc2603ad6,22961896,1041309,animalia,arthropoda,triopsidae,arthropoda_triopsidae/22961896_1041309_eol-ful...
9565,f17ff9c5-b6b6-4bc5-9f19-bf62c3528c4f,21471724,2865857,animalia,arthropoda,lucanidae,arthropoda_lucanidae/21471724_2865857_eol-full...
10314,404917cc-f4c9-4bc9-ab3c-07943a953b8b,20900190,1048038,animalia,chordata,hynobiidae,chordata_hynobiidae/20900190_1048038_eol-full-...


In [15]:
# applying the filepath
# metadata['file_path'] = metadata['file_path'].apply(lambda x: os.path.join('../rare_species 1', x))

metadata['file_path'] = metadata['file_path'].apply(lambda x: os.path.join('drive/MyDrive/rare_species', x))

In [16]:
metadata.sample(5)

Unnamed: 0,rare_species_id,eol_content_id,eol_page_id,kingdom,phylum,family,file_path
11101,c420a3b0-1981-4fdf-a2ab-0446ad5bcefa,29638450,476649,animalia,arthropoda,formicidae,drive/MyDrive/rare_species/arthropoda_formicid...
8742,ac8deea5-c087-4697-bd6d-4a71611b39af,29727661,1049004,animalia,chordata,ardeidae,drive/MyDrive/rare_species/chordata_ardeidae/2...
7585,7c4e8014-01f3-437d-97f8-4dcc464b9709,21606543,45513587,animalia,chordata,scolopacidae,drive/MyDrive/rare_species/chordata_scolopacid...
7083,7cca432a-3d26-43c2-9224-712fb05e6d0f,20298388,4792502,animalia,mollusca,haliotidae,drive/MyDrive/rare_species/mollusca_haliotidae...
11660,0a6a879a-d41a-439f-86cd-b45c0ae9ec40,20502110,46560555,animalia,chordata,rajidae,drive/MyDrive/rare_species/chordata_rajidae/20...


# 2.  Preprocessing

In [17]:
metadata.phylum.unique()

array(['mollusca', 'chordata', 'arthropoda', 'echinodermata', 'cnidaria'],
      dtype=object)

In [18]:
print(metadata['family'].value_counts())

family
dactyloidae        300
cercopithecidae    300
formicidae         291
carcharhinidae     270
salamandridae      270
                  ... 
cyprinodontidae     30
alligatoridae       30
balaenidae          30
goodeidae           30
siluridae           29
Name: count, Length: 202, dtype: int64


In [19]:
num_classes = metadata['family'].nunique()
num_classes

202

our dataset is inbalaced and has a length of 202 classes

In [20]:
# Transforming our target and feature into a int
label_encoder = LabelEncoder()
metadata['target'] = label_encoder.fit_transform(metadata['family'])


In [21]:
metadata['target'].sample(10)

Unnamed: 0,target
10,123
1544,183
3388,141
3298,32
5140,183
8057,13
1702,76
10947,15
1125,2
7213,153


## Cheking for misslabeled images/ images that are not supposed to be in our dataset

Even with False Positives the objective is to not discard more than 5% of our dataset

In [23]:
model = EfficientNetB0(weights="imagenet", include_top=True)

#From imagenet class index:https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json
animal_prefixes = ('n014', 'n015', 'n016', 'n017', 'n018', 'n019', 'n020', 'n021', 'n022', 'n023', 'n024')


weird_samples = []

for _, row in tqdm(metadata.iterrows(), total=len(metadata)):
    path = row['file_path']
    true_label = row['family']

    try:
        img = load_img(path, target_size=(224, 224))
        x = img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        preds = model.predict(x, verbose=0)
        top_pred = decode_predictions(preds, top=1)[0][0]  # (id, label, prob)

        class_id = top_pred[0]
        prob = top_pred[2]

        # Check if class_id is among animal-related prefixes
        if not class_id.startswith(animal_prefixes) and prob > 0.85:
            print(f"Not animal (class_id={class_id}) → removed: labeled '{true_label}'")
            weird_samples.append((path, class_id, prob, true_label))

    except Exception as e:
        print(f"Error reading {path}: {e}")




  0%|          | 0/11983 [00:00<?, ?it/s]

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json
[1m35363/35363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step


  1%|▏         | 170/11983 [02:53<3:49:33,  1.17s/it]

Not animal (class_id=n04275548) → removed: labeled 'parulidae'


  2%|▏         | 287/11983 [05:13<3:42:12,  1.14s/it]

Not animal (class_id=n04275548) → removed: labeled 'motacillidae'


  4%|▍         | 477/11983 [09:10<4:09:37,  1.30s/it]

Not animal (class_id=n04118776) → removed: labeled 'bufonidae'


  4%|▍         | 502/11983 [09:39<3:43:56,  1.17s/it]

Not animal (class_id=n04118776) → removed: labeled 'anguidae'


  5%|▍         | 566/11983 [11:02<3:40:22,  1.16s/it]

Not animal (class_id=n04275548) → removed: labeled 'pittidae'


  5%|▌         | 622/11983 [12:08<3:22:57,  1.07s/it]

Not animal (class_id=n03794056) → removed: labeled 'cryptobranchidae'


  7%|▋         | 787/11983 [15:27<3:46:57,  1.22s/it]

Not animal (class_id=n04118776) → removed: labeled 'agariciidae'


  8%|▊         | 945/11983 [18:39<3:54:12,  1.27s/it]

Not animal (class_id=n04118776) → removed: labeled 'pocilloporidae'


 12%|█▏        | 1484/11983 [29:58<3:25:44,  1.18s/it]

Not animal (class_id=n04118776) → removed: labeled 'plethodontidae'


 14%|█▎        | 1644/11983 [33:09<3:32:18,  1.23s/it]

Not animal (class_id=n04118776) → removed: labeled 'faviidae'


 14%|█▍        | 1714/11983 [34:36<3:37:20,  1.27s/it]

Not animal (class_id=n04275548) → removed: labeled 'bovidae'


 14%|█▍        | 1731/11983 [34:57<3:34:03,  1.25s/it]

Not animal (class_id=n04118776) → removed: labeled 'agariciidae'


 15%|█▌        | 1852/11983 [37:27<3:11:32,  1.13s/it]

Not animal (class_id=n04118776) → removed: labeled 'agariciidae'


 17%|█▋        | 2013/11983 [40:41<3:13:47,  1.17s/it]

Not animal (class_id=n04118776) → removed: labeled 'salamandridae'


 18%|█▊        | 2140/11983 [43:19<2:56:21,  1.07s/it]

Not animal (class_id=n04118776) → removed: labeled 'dactyloidae'


 22%|██▏       | 2653/11983 [53:44<3:08:55,  1.21s/it]

Not animal (class_id=n03196217) → removed: labeled 'conidae'


 24%|██▎       | 2837/11983 [57:26<2:58:00,  1.17s/it]

Not animal (class_id=n03840681) → removed: labeled 'diomedeidae'


 26%|██▌       | 3097/11983 [1:02:50<3:14:35,  1.31s/it]

Not animal (class_id=n04275548) → removed: labeled 'hominidae'


 27%|██▋       | 3286/11983 [1:06:41<2:55:47,  1.21s/it]

Not animal (class_id=n03532672) → removed: labeled 'myliobatidae'


 28%|██▊       | 3342/11983 [1:07:47<2:46:10,  1.15s/it]

Not animal (class_id=n03196217) → removed: labeled 'gavialidae'


 31%|███       | 3671/11983 [1:14:45<2:53:35,  1.25s/it]

Not animal (class_id=n02640242) → removed: labeled 'acipenseridae'


 33%|███▎      | 3905/11983 [1:19:29<2:24:21,  1.07s/it]

Not animal (class_id=n04118776) → removed: labeled 'plethodontidae'


 34%|███▎      | 4024/11983 [1:21:53<2:26:11,  1.10s/it]

Not animal (class_id=n04275548) → removed: labeled 'brachypteraciidae'


 35%|███▍      | 4193/11983 [1:25:22<2:38:42,  1.22s/it]

Not animal (class_id=n03196217) → removed: labeled 'phyllomedusidae'


 44%|████▎     | 5214/11983 [1:46:13<2:19:18,  1.23s/it]

Not animal (class_id=n04275548) → removed: labeled 'cercopithecidae'


 44%|████▍     | 5318/11983 [1:48:20<1:57:38,  1.06s/it]

Not animal (class_id=n04118776) → removed: labeled 'rajidae'


 46%|████▌     | 5527/11983 [1:52:51<2:45:42,  1.54s/it]

Not animal (class_id=n04118776) → removed: labeled 'rhyacotritonidae'


 47%|████▋     | 5636/11983 [1:55:10<1:54:52,  1.09s/it]

Not animal (class_id=n04118776) → removed: labeled 'squalidae'


 49%|████▉     | 5869/11983 [1:59:48<2:02:58,  1.21s/it]

Not animal (class_id=n04118776) → removed: labeled 'pocilloporidae'


 50%|████▉     | 5939/11983 [2:01:15<2:00:46,  1.20s/it]

Not animal (class_id=n04118776) → removed: labeled 'agariciidae'


 50%|████▉     | 5961/11983 [2:01:41<2:06:07,  1.26s/it]

Not animal (class_id=n04118776) → removed: labeled 'alopiidae'


 51%|█████     | 6135/11983 [2:05:13<1:50:42,  1.14s/it]

Not animal (class_id=n03196217) → removed: labeled 'rhinodermatidae'


 54%|█████▍    | 6493/11983 [2:12:40<1:43:25,  1.13s/it]

Not animal (class_id=n04275548) → removed: labeled 'psittacidae'


 57%|█████▋    | 6835/11983 [2:19:53<1:47:46,  1.26s/it]

Not animal (class_id=n04275548) → removed: labeled 'ramphastidae'


 59%|█████▉    | 7083/11983 [2:25:06<1:32:27,  1.13s/it]

Not animal (class_id=n02794156) → removed: labeled 'agamidae'


 60%|██████    | 7206/11983 [2:27:39<1:44:54,  1.32s/it]

Not animal (class_id=n04118776) → removed: labeled 'agariciidae'


 61%|██████    | 7282/11983 [2:29:08<1:26:30,  1.10s/it]

Not animal (class_id=n04118776) → removed: labeled 'turdidae'


 62%|██████▏   | 7401/11983 [2:31:40<1:32:01,  1.21s/it]

Not animal (class_id=n04118776) → removed: labeled 'agariciidae'


 62%|██████▏   | 7454/11983 [2:32:47<1:36:24,  1.28s/it]

Not animal (class_id=n04275548) → removed: labeled 'ramphastidae'


 62%|██████▏   | 7458/11983 [2:32:52<1:33:17,  1.24s/it]

Not animal (class_id=n04118776) → removed: labeled 'salamandridae'


 63%|██████▎   | 7574/11983 [2:35:15<1:45:53,  1.44s/it]

Not animal (class_id=n04118776) → removed: labeled 'salamandridae'


 64%|██████▍   | 7670/11983 [2:37:13<1:36:14,  1.34s/it]

Not animal (class_id=n04332243) → removed: labeled 'serranidae'


 66%|██████▌   | 7850/11983 [2:40:57<1:26:08,  1.25s/it]

Not animal (class_id=n02640242) → removed: labeled 'acipenseridae'


 66%|██████▋   | 7963/11983 [2:43:24<1:16:36,  1.14s/it]

Not animal (class_id=n03942813) → removed: labeled 'carcharhinidae'


 69%|██████▉   | 8244/11983 [2:49:25<1:18:53,  1.27s/it]

Not animal (class_id=n09229709) → removed: labeled 'aotidae'


 69%|██████▉   | 8307/11983 [2:50:50<1:17:27,  1.26s/it]

Not animal (class_id=n04347754) → removed: labeled 'sphyrnidae'


 74%|███████▍  | 8909/11983 [3:03:13<1:06:20,  1.29s/it]

Not animal (class_id=n04118776) → removed: labeled 'unionidae'


 75%|███████▍  | 8940/11983 [3:03:52<1:06:36,  1.31s/it]

Not animal (class_id=n04275548) → removed: labeled 'bucerotidae'


 75%|███████▍  | 8951/11983 [3:04:06<1:04:30,  1.28s/it]

Not animal (class_id=n04332243) → removed: labeled 'alopiidae'


 77%|███████▋  | 9228/11983 [3:09:37<50:02,  1.09s/it]

Not animal (class_id=n04118776) → removed: labeled 'agariciidae'


 78%|███████▊  | 9361/11983 [3:12:23<47:19,  1.08s/it]

Not animal (class_id=n03530642) → removed: labeled 'colubridae'


 81%|████████  | 9691/11983 [3:19:06<42:33,  1.11s/it]

Not animal (class_id=n04118776) → removed: labeled 'plethodontidae'


 88%|████████▊ | 10499/11983 [3:35:50<31:22,  1.27s/it]

Not animal (class_id=n04275548) → removed: labeled 'chamaeleonidae'


 91%|█████████ | 10899/11983 [3:43:55<21:15,  1.18s/it]

Not animal (class_id=n09229709) → removed: labeled 'atelidae'


 93%|█████████▎| 11189/11983 [3:49:55<15:56,  1.20s/it]

Not animal (class_id=n04118776) → removed: labeled 'agariciidae'


100%|██████████| 11983/11983 [4:05:51<00:00,  1.23s/it]


In [24]:
len(weird_samples)

55

In [25]:
len(metadata) * 0.05

599.15

Less than 5% like we wanted

In [26]:
# creating a clean dataset
bad_photos = [sample[0] for sample in weird_samples]

clean_metadata = metadata[~metadata['file_path'].isin(bad_photos)]

In [27]:
print(len(clean_metadata))
print(len(metadata))

11928
11983


In [None]:
# clean_metadata = pd.read_csv('drive/MyDrive/rare_species/clean_metadata.csv')
# clean_metadata['file_path'] = clean_metadata['file_path'].str.replace('../rare_species 1/', 'drive/MyDrive/rare_species/', regex=False)

# clean_metadata['target'] = le.transform(metadata_clean['family'])



In [28]:
clean_metadata.sample(5)

Unnamed: 0,rare_species_id,eol_content_id,eol_page_id,kingdom,phylum,family,file_path,target
7291,799923be-276b-4017-9483-9ebd5da7e60e,28675220,795885,animalia,chordata,dactyloidae,drive/MyDrive/rare_species/chordata_dactyloida...,60
2855,c4180b46-b6c1-43ee-ac7f-b8d8d859ead6,29932515,46578271,animalia,chordata,chaetodontidae,drive/MyDrive/rare_species/chordata_chaetodont...,41
2255,b6a4e9db-8075-4f41-99cd-7cfa98efee35,22986449,791049,animalia,chordata,chelidae,drive/MyDrive/rare_species/chordata_chelidae/2...,45
1427,32290981-2c0f-4b20-bd92-668d5b72d046,14159842,1181772,animalia,arthropoda,theraphosidae,drive/MyDrive/rare_species/arthropoda_therapho...,185
6429,e68d8202-e788-4f50-bd7b-6347fa19ce6d,20769923,1050315,animalia,chordata,thraupidae,drive/MyDrive/rare_species/chordata_thraupidae...,186


Now let's compare both Df families

In [29]:
# Get value counts per class
old_counts = metadata['family'].value_counts().rename("before_cleaning")
new_counts = clean_metadata['family'].value_counts().rename("after_cleaning")

# Combine into a DataFrame
comparison_df = pd.concat([old_counts, new_counts], axis=1).fillna(0).astype(int)

# Add difference column
comparison_df['removed'] = comparison_df['before_cleaning'] - comparison_df['after_cleaning']

# Sort by most affected classes
comparison_df = comparison_df.sort_values(by='removed', ascending=False)


pd.set_option('display.max_rows', None)
display(comparison_df)


Unnamed: 0_level_0,before_cleaning,after_cleaning,removed
family,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
agariciidae,120,112,8
salamandridae,270,267,3
plethodontidae,270,267,3
acipenseridae,90,88,2
pocilloporidae,60,58,2
alopiidae,60,58,2
ramphastidae,30,28,2
dactyloidae,300,299,1
diomedeidae,180,179,1
cercopithecidae,300,299,1


### Spliting the data

In [30]:
# Spliting the data while keeping the same distribuition of classes(target) - 70%/20%/10%

train, temp = train_test_split(clean_metadata, test_size=0.3,stratify=clean_metadata['target'],random_state=1)

val, test = train_test_split(temp,test_size=1/3,stratify=temp['target'],random_state=1)

In [31]:
train.shape , val.shape , test.shape

((8349, 8), (2386, 8), (1193, 8))

Loading the images

In [32]:
IMG_SIZE = (256, 256)
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

def df_to_dataset(dataframe):
    filepaths = dataframe['file_path'].values
    labels = dataframe['target'].values

    ds = tf.data.Dataset.from_tensor_slices((filepaths, labels))

    def process(file_path, label):
        image = tf.io.read_file(file_path)
        image = tf.image.decode_image(image, channels=3, expand_animations=False)
        image = tf.image.resize(image, IMG_SIZE)
        image = tf.cast(image, tf.float32) / 255.0
        return image, tf.one_hot(label, depth=num_classes)

    ds = ds.map(process, num_parallel_calls=AUTOTUNE)

    return ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)


In [33]:
train_df = df_to_dataset(train)
val_df = df_to_dataset(val)
test_df = df_to_dataset(test)



# 3. Models

In [34]:
augmentation_layer = Sequential(
    [
        RandomBrightness(factor=0.2, value_range=(0.0, 1.0)),
        RandomFlip(),
        RandomRotation(factor=0.2, fill_mode="reflect"),
        RandomContrast(factor = 0.4, value_range=(0, 1), seed=1)
    ],
    name="augmentation_layer")


In [35]:
lr_reduction = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0, verbose = 1)

In [None]:
# During the training we will keep the weights with the lowest validation loss
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, restore_best_weights = True)

In [None]:
# we will use the top 1 accuracy to choose the best model and use f1-score and top 5 accuracy to monitor the models performance. 
loss = CategoricalCrossentropy(name="loss")

# metrics
categorical_accuracy = CategoricalAccuracy(name="accuracy")
f1_score = F1Score(average="macro", name="f1_score")
top_5_accuracy = TopKCategoricalAccuracy(k=5, name="top_5_accuracy")


metrics = [categorical_accuracy]

In [38]:
input_shape = (256, 256,3)

epochs = 200
batch_size = 32

## Model 1

In [39]:
def build_CNN(input_shape, num_classes):
    weight_decay = 1e-4

    model = Sequential([
        Input(shape=input_shape),
        augmentation_layer,

        Conv2D(64, (3, 3), activation="relu"),
        BatchNormalization(),
        MaxPooling2D((2, 2)),

        Conv2D(128, (3, 3), activation="relu"),
        BatchNormalization(),
        MaxPooling2D((2, 2)),

        Conv2D(256, (3, 3), activation="relu", kernel_regularizer=l2(weight_decay)),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.2),

        Conv2D(256, (3, 3), activation="relu"),
        BatchNormalization(),
        MaxPooling2D((2, 2)),

        Conv2D(512, (3, 3), activation="relu"),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),

        Conv2D(256, (3, 3), activation="relu", padding="same", kernel_regularizer=l2(weight_decay)),
        BatchNormalization(),
        Dropout(0.4),

        Conv2D(128, (3, 3), activation="relu", padding="same", kernel_regularizer=l2(weight_decay)),
        BatchNormalization(),

        GlobalAveragePooling2D(),

        Dense(1024, activation="relu"),
        Dropout(0.5),


        Dense(512, activation="relu"),
        Dropout(0.5),

        Dense(num_classes, activation="softmax")
    ], name="CNN_1")

    model.summary()
    return model




In [40]:
model_1 = build_transfg_like(input_shape=input_shape, num_classes=num_classes)

optimizer = SGD(learning_rate=0.01, name="optimizer")

model_1.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [41]:
# train the model
_1 = model_1.fit(
    train_df,
    validation_data=val_df,
    epochs=epochs,
    callbacks=[lr_reduction, early_stop],
    verbose=1
)

Epoch 1/200
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 395ms/step - accuracy: 0.0100 - loss: 5.3668 - val_accuracy: 0.0260 - val_loss: 5.2864 - learning_rate: 0.0100
Epoch 2/200
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 378ms/step - accuracy: 0.0497 - loss: 5.2325 - val_accuracy: 0.0541 - val_loss: 5.1017 - learning_rate: 0.0100
Epoch 3/200
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 378ms/step - accuracy: 0.0636 - loss: 5.0874 - val_accuracy: 0.0608 - val_loss: 5.0185 - learning_rate: 0.0100
Epoch 4/200
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 381ms/step - accuracy: 0.0706 - loss: 5.0001 - val_accuracy: 0.0725 - val_loss: 4.9204 - learning_rate: 0.0100
Epoch 5/200
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 381ms/step - accuracy: 0.0803 - loss: 4.9272 - val_accuracy: 0.0662 - val_loss: 4.9077 - learning_rate: 0.0100
Epoch 6/200
[1m261/261[0m [32m━━━━━━━━━━━━━━

# Model 2

In [42]:
def build_CNN(input_shape, num_classes):
    weight_decay = 1e-4

    model = Sequential([
        Input(shape=input_shape),
        augmentation_layer,

        Conv2D(64, (3, 3), activation="relu"),
        BatchNormalization(),
        MaxPooling2D((2, 2)),

        Conv2D(128, (3, 3), activation="relu"),
        BatchNormalization(),
        MaxPooling2D((2, 2)),

        Conv2D(256, (3, 3), activation="relu", kernel_regularizer=l2(weight_decay)),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.2),

        Conv2D(256, (3, 3), activation="relu"),
        BatchNormalization(),
        MaxPooling2D((2, 2)),

        Conv2D(512, (3, 3), activation="relu"),
        BatchNormalization(),
        MaxPooling2D((2, 2)),

        Conv2D(512, (3, 3), activation="relu", padding="same", kernel_regularizer=l2(weight_decay)),
        BatchNormalization(),
        Dropout(0.4),

        Conv2D(512, (3, 3), activation="relu", padding="same", kernel_regularizer=l2(weight_decay)),
        BatchNormalization(),

        GlobalAveragePooling2D(),

        Dense(1024, activation="relu"),
        Dropout(0.5),


        Dense(512, activation="relu"),
        Dropout(0.5),

        Dense(num_classes, activation="softmax")
    ], name="CNN_2")

    model.summary()
    return model




In [43]:
model_2 = build_transfg_like(input_shape=input_shape, num_classes=num_classes)

optimizer = Adam(learning_rate= 1e-4, name="optimizer")

model_2.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [None]:
# train the model
_2 = model_2.fit(
    train_df,
    validation_data=val_df,
    epochs=epochs,
    callbacks=[lr_reduction, early_stop],
    verbose=1
)

Epoch 1/200
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 397ms/step - accuracy: 0.0781 - loss: 5.2827 - val_accuracy: 0.0176 - val_loss: 5.4084 - learning_rate: 1.0000e-04
Epoch 2/200
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 384ms/step - accuracy: 0.0737 - loss: 4.9635 - val_accuracy: 0.0616 - val_loss: 5.0673 - learning_rate: 1.0000e-04
Epoch 3/200
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 385ms/step - accuracy: 0.0882 - loss: 4.8019 - val_accuracy: 0.0943 - val_loss: 4.7498 - learning_rate: 1.0000e-04
Epoch 4/200
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 384ms/step - accuracy: 0.0947 - loss: 4.6913 - val_accuracy: 0.0989 - val_loss: 4.6132 - learning_rate: 1.0000e-04
Epoch 5/200
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 384ms/step - accuracy: 0.0998 - loss: 4.5996 - val_accuracy: 0.1069 - val_loss: 4.5119 - learning_rate: 1.0000e-04
Epoch 6/200
[1m261/261[

In [None]:
plt.figure(figsize=(14, 5))

plt.plot(_2.history['loss'], label='Train Loss')
plt.plot(_2.history['val_loss'], label='Val Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()


In [None]:
plt.figure(figsize=(14, 5))

# Plot accuracy
plt.plot(_2.history['accuracy'], label='Train Accuracy')
plt.plot(_2.history['val_accuracy'], label='Val Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()


plt.show()

## Eficient Net V2S

### First Model

In [None]:
def build_efficientnetv2s_model(input_shape=input_shape, num_classes=num_classes):
    inputs = Input(shape=input_shape, name="input_layer")
    x = augmentation_layer(inputs)

    base_model = EfficientNetV2S(
        include_top=False,
        weights="imagenet",
        input_shape=input_shape,
        include_preprocessing=True
    )
    base_model.trainable = True

    x = base_model(x)

    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    output = Dense(num_classes, activation="softmax")(x)

    model = Model(inputs=inputs, outputs=output, name="EfficientNetV2S_Custom")
    return model



In [None]:
# This is a model that learns faster than the others so we will define a lower patience

lr_reduction = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0, verbose = 1)
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5, restore_best_weights = True)

In [None]:
effnet = build_efficientnetv2s_model(input_shape=input_shape, num_classes=num_classes)
effnet.summary()

metrics = [
    categorical_accuracy,
    f1_score,
    top_5_accuracy,
]

effnet.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss=CategoricalCrossentropy(),
    metrics=metrics)


In [None]:
checkpoint_cb = ModelCheckpoint(
    filepath='efficientnetv2s_best_model.weights.h5',  # keras or .h5
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,                   # True = only weights, False = full model
    verbose=1
)


In [None]:
_3 = effnet.fit(
    train_df,
    validation_data=val_df,
    epochs=100,
    callbacks=[early_stop, lr_reduction, checkpoint_cb],
    verbose=1
)

In [None]:
ef_eva1 = _3.evaluate(val_df, verbose=1)

In [None]:

plt.figure(figsize=(14, 5))

# Plot loss
plt.plot(_3.history['loss'], label='Train Loss')
plt.plot(_3.history['val_loss'], label='Val Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
y_prob = effnet.predict(test_df, verbose=1)      # shape: (N, 202)
y_pred = np.argmax(y_prob, axis=1)               # predicted class ids

In [None]:
y_true = test['target'].values

assert len(y_true) == len(y_pred), "mismatch in lengths!"

In [None]:
target_names = label_encoder.classes_

report_df = (
    pd.DataFrame(
        classification_report(
            y_true,
            y_pred,
            target_names=target_names,
            output_dict=True,
            zero_division=0
        )
    )
    .transpose()
    .drop(index=["accuracy", "macro avg", "weighted avg"])
    .sort_values("recall")
)

pd.set_option("display.max_rows", None)
display(report_df)


### Second Model

With bias

In [None]:
# adding bias as seen in the tensorflow website https://www.tensorflow.org/tutorials/structured_data/imbalanced_data

# Count images per class
counts = clean_metadata['target'].value_counts().sort_index()  # must be sorted to match class indices
total = np.sum(counts)

# Compute log priors
initial_bias = np.log(counts / total)


In [None]:
def build_efficientnetv2s_model(input_shape=input_shape, num_classes=num_classes):
    inputs = Input(shape=input_shape, name="input_layer")
    x = augmentation_layer(inputs)
    base_model = EfficientNetV2S(
        include_top=False,
        weights="imagenet",
        input_shape=input_shape,
        include_preprocessing=True
    )

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    output = Dense(num_classes, activation="softmax",bias_initializer= Constant(initial_bias))(x)

    base_model.trainable = True  

    model = Model(inputs=base_model.input, outputs=output, name="EfficientNetV2S_Custom")
    return model


In [None]:
effnet_v2 = build_efficientnetv2s_model(input_shape=input_shape, num_classes=num_classes)
#effnet_v2.summary()

effnet_v2.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss=CategoricalCrossentropy(),
    metrics=metrics)


In [None]:
_4 = effnet_v2.fit(
    train_df,
    validation_data=val_df,
    epochs=200,
    callbacks=[early_stop, lr_reduction, checkpoint_cb],
    verbose=1,
)

In [None]:

plt.figure(figsize=(14, 5))

# Plot loss
plt.plot(_4.history['loss'], label='Train Loss')
plt.plot(_4.history['val_loss'], label='Val Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
y_prob = effnet_v2.predict(test_df, verbose=1)      # shape: (N, 202)
y_pred = np.argmax(y_prob, axis=1)               # predicted class ids

In [None]:
y_true = test['target'].values

assert len(y_true) == len(y_pred), "mismatch in lengths!"

In [None]:
report = classification_report(
    y_true,
    y_pred,
    target_names=label_encoder.classes_,
    output_dict=True,
    zero_division=0
)

# Convert to DataFrame
report_df = pd.DataFrame(report).transpose()

# Calculate top-5 accuracy separately
top5_acc = top_k_accuracy_score(y_true, y_prob, k=5)

# Keep only 'f1-score' column
report_df = report_df[['f1-score']]

# Insert top-5 manually
report_df.loc['top_5_accuracy'] = [top5_acc]

# Insert overall accuracy manually (global accuracy)
overall_accuracy = (y_true == y_pred).mean()
report_df.loc['accuracy'] = [overall_accuracy]

# Clean display
pd.set_option('display.max_rows', None)
display(report_df)

## ResNet50

In [None]:
# https://keras.io/guides/transfer_learning/

In [None]:
lr_reduction = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0, verbose = 1)
# During the training we will keep the weights with the lowest validation loss
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, restore_best_weights = True)

In [None]:
inputs = keras.Input(shape=input_shape)
x = augmentation_layer(inputs)

base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_tensor=x
)
base_model.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.4)(x)
x = Dense(256)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.4)(x)

outputs = Dense(num_classes, activation='softmax', bias_initializer=Constant(initial_bias))(x)

resnet50 = Model(inputs, outputs)



In [None]:
resnet50.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss=CategoricalCrossentropy(),
    metrics=metrics)


In [None]:
_6 = resnet50.fit(
    train_df,
    validation_data=val_df,
    epochs=40,
    callbacks=[early_stop, lr_reduction],
    verbose=1,
    #class_weight=class_weights_dict
)

Stopped at epoch 40 so that we can train with unfreezed layers

Now we are going to make the layers trainable and start training from where we stopped before

In [None]:
# Unfreezing some layers of ResNet50
for layer in model.layers:
        if not isinstance(layer, layers.BatchNormalization):
            layer.trainable = True

resnet50.compile(
    optimizer=Adam(learning_rate=1e-5),
    metrics=metrics
)


resnet50.fit(
    train_df,
    validation_data=val_df,
    epochs=epochs,
    callbacks=[early_stop, lr_reduction],
    verbose=1,
)


In [None]:
rn50_eva = resnet50.evaluate(val_df, verbose=1)

In [None]:
y_prob = resnet50.predict(val_df, verbose=1)      # shape: (N, 202)
y_pred = np.argmax(y_prob, axis=1)               # predicted class ids

In [None]:
y_true = val['target'].values

assert len(y_true) == len(y_pred), "mismatch in lengths!"

In [None]:
target_names = label_encoder.classes_

report_df = (
    pd.DataFrame(
        classification_report(
            y_true,
            y_pred,
            target_names=target_names,
            output_dict=True,
            zero_division=0
        )
    )
    .transpose()
    .drop(index=["accuracy", "macro avg", "weighted avg"])
    .sort_values("recall")
)

pd.set_option("display.max_rows", None)
display(report_df.head(202))


### EfficientNetV2S with transfer learning

In [None]:

inputs = keras.Input(shape=input_shape)

# Augmentation
x = augmentation_layer(inputs)

base_model = EfficientNetV2S(weights='imagenet',include_top=False,input_tensor=x)

# Freeze the base model
base_model.trainable = False

x = base_model.output


x = GlobalAveragePooling2D()(x)
x = Dense(512)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.4)(x)
x = Dense(256)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.4)(x)

# Output layer
outputs = Dense(num_classes, activation="softmax", bias_initializer=Constant(initial_bias))(x)

# Build the full model
effnetv2s_tl = Model(inputs, outputs)

In [None]:
effnetv2s_tl.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss=CategoricalCrossentropy(),
    metrics=metrics)


In [None]:
_7 = effnetv2s_tl.fit(
    train_df,
    validation_data=val_df,
    epochs=50,
    callbacks=[early_stop, lr_reduction],
    verbose=1,
    #class_weight=class_weights_dict
)

In [None]:
# Unfreezing some layers of EffNetB6
for layer in model.layers:
        if not isinstance(layer, layers.BatchNormalization):
            layer.trainable = True

effnetv2s_tl.compile(
    optimizer=Adam(learning_rate=1e-5),
    metrics=metrics
)


effnetv2s_tl.fit(
    train_df,
    validation_data=val_df,
    epochs=epochs,
    callbacks=[early_stop, lr_reduction],
    verbose=1,
)

In [None]:
efb6_eva = effnetv2s_tl.evaluate(val_df, verbose=1)

In [None]:
y_prob = effnetv2s_tl.predict(val_df, verbose=1)
y_pred = np.argmax(y_prob, axis=1)

In [None]:
y_true = val['target'].values

assert len(y_true) == len(y_pred), "mismatch in lengths!"

In [None]:
target_names = label_encoder.classes_

report_df = (
    pd.DataFrame(
        classification_report(
            y_true,
            y_pred,
            target_names=target_names,
            output_dict=True,
            zero_division=0
        )
    )
    .transpose()
    .drop(index=["accuracy", "macro avg", "weighted avg"])
    .sort_values("recall")
)

pd.set_option("display.max_rows", None)
display(report_df.head(202))
