# Imports

In [15]:
%load_ext autoreload
%autoreload 2

import skimage
import os
import glob
import importlib
import wandb
import numpy as np
import matplotlib.pyplot as plt
import egg_class_functions as ecf
import tensorflow as tf
import albumentations as A
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tensorflow.keras import layers
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from wandb.integration.keras import WandbMetricsLogger, WandbModelCheckpoint
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow import keras

importlib.reload(ecf)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


<module 'egg_class_functions' from '/home/tibor/Documents/Python/mosquito-egg-identification/egg_class_functions.py'>

# Variable setup

In [10]:
seg_data_path = "Data/processed/predicted_segmentation_data.csv"
image_paths = sorted(glob.glob("Data/raw/microscope/**/*.*", recursive=True), key=lambda x: (os.path.dirname(x), os.path.basename(x)))
BATCH_SIZE = 32
EPOCHS = 100
AUTOTUNE = tf.data.AUTOTUNE

In [3]:
df_pred = ecf.segmented_image_import(seg_data_path)
single_egg_df = df_pred.loc[df_pred["single"] == 1].reset_index(drop=True)
single_egg_df = single_egg_df.dropna()
single_egg_df["segment"] = single_egg_df.apply(ecf.rotate_and_pad_rgb_segment, axis=1)
single_egg_df['species'] = single_egg_df['species'].replace("aegypti_old", "aegypti")
single_egg_df['species'] = single_egg_df['species'].replace("albopictus_old", "albopictus")
train, test = train_test_split(single_egg_df, test_size=0.1)

X_train = train['segment']
X_train = np.stack(X_train.to_list()).astype(np.float32)
y_train = train['species']
X_test = test['segment']
X_test = np.stack(X_test.to_list()).astype(np.float32)
y_test = test['species']
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_train_onehot = to_categorical(y_train_encoded)

X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
    X_train, y_train_onehot, test_size=0.1, random_state=42)

In [4]:
print(f"albopictus: {sum(single_egg_df['species'] == 'albopictus')}")
print(f"aegypti: {sum(single_egg_df['species'] == 'aegypti')}")

albopictus: 298
aegypti: 268


### Albumentation image augmenation

In [7]:
transforms = A.Compose([
    # A.HorizontalFlip(p=0.5),
    # A.VerticalFlip(p=0.5),
    # A.ShiftScaleRotate(shift_limit=(0, 0.0625), scale_limit=0.0, rotate_limit=(-10, 10), p=0.5),
    # A.RandomBrightnessContrast(p=0.5),
    # A.RandomGamma(p=0.5),
    # A.RandomToneCurve(scale=0.1, per_channel=True, p=0.5),
    # A.RandomFog(p=0.5)
])

def albumentations_augment(image):
    """
    Image: Numpy-Array [H,W,3] float32 (0-255)
    Output: Augmentiertes Image (float32, 0-1)
    """
    image = image.astype(np.uint8)  # Albumentations erwartet uint8-Bilder
    augmented = transforms(image=image)
    aug_image = augmented['image'].astype(np.float32) / 255.0
    return aug_image


def tf_albumentations_augment(image, label):
    aug_image = tf.numpy_function(albumentations_augment, [image], tf.float32)
    aug_image.set_shape(image.shape)
    return aug_image, label

train_ds = ecf.prepare_dataset_alb(X_train_split, y_train_split, tf_albumentations_augment, BATCH_SIZE)
val_ds = ecf.prepare_dataset_alb(X_val_split, y_val_split, None, BATCH_SIZE, shuffle=False)

### Tensorflow image augmentation

In [5]:
data_augmentation = tf.keras.Sequential([
  layers.RandomFlip("horizontal_and_vertical"),
  layers.RandomRotation(0.0277),
  layers.RandomBrightness(factor=0.2),
  layers.RandomContrast(factor=0.2),
  #layers.RandomColorJitter(
  #    value_range=(0, 1),
  #    brightness_factor=0.2,
  #    contrast_factor=0.2,
  #    saturation_factor=0.5,
  #    hue_factor=(0.5, 0.5)
  #    ),
  #layers.RandomColorDegeneration(0.2),
  #layers.RandomHue(factor=(0.5, 0.5), value_range=(0, 1)),
  # layers.RandomSaturation(factor=0.5, value_range=(0, 1)),
  # layers.RandomGaussianBlur(factor=0.2, sigma=(0.1, 0.4), value_range=(0, 1)),

])

train_ds = ecf.prepare_dataset_tf(X_train_split, y_train_split, data_augmentation, BATCH_SIZE)
val_ds = ecf.prepare_dataset_tf(X_val_split, y_val_split, data_augmentation, BATCH_SIZE, shuffle=False)

I0000 00:00:1751866829.454485     306 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 7197 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:01:00.0, compute capability: 8.6


# Model Training

In [6]:
weight_for_albo = (1 / sum(single_egg_df['species'] == 'albopictus')) * (len(single_egg_df))
weight_for_aegy = (1 / sum(single_egg_df['species'] == 'aegypti')) * (len(single_egg_df))

class_weight = {0: weight_for_albo, 1: weight_for_aegy}

In [7]:
print('Weight for class 0: {:.2f}'.format(weight_for_albo))
print('Weight for class 1: {:.2f}'.format(weight_for_aegy))

Weight for class 0: 1.90
Weight for class 1: 2.11


In [16]:
base_model = tf.keras.applications.EfficientNetV2B0(
    input_shape=(200, 200, 3),
    include_top=False,
    weights="imagenet"
)
base_model.trainable = False

model = tf.keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dense(2, activation='softmax')
])

In [17]:
wandb.init(project="egg-classification", config={
    "architecture": "EfficientNetV2B0",
    "input_shape": (200, 200, 3),
    "epochs": EPOCHS,
    "batch_size": BATCH_SIZE,
    "optimizer": "adam",
    "loss": "categorical_crossentropy",
    "num_classes": 2
})


local_checkpoint = ModelCheckpoint(
    filepath="models/model.{epoch:02d}.h5",
    save_best_only=True,
    save_weights_only=False
)

wandb_checkpoint = WandbModelCheckpoint(
    filepath="models-wandb/model-{epoch:02d}.keras",
    save_best_only=True
)

optimizer = keras.optimizers.Adam(learning_rate=0.0001)

model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'],
              )

history = model.fit(train_ds,
                    epochs=EPOCHS,
                    validation_data=val_ds,
                    callbacks=[WandbMetricsLogger(),
                               wandb_checkpoint                               
                                ],
                    class_weight=class_weight
                    )

y_true = []
y_pred = []

for x, y in val_ds:
    preds = model.predict(x)
    y_true.extend(np.argmax(y.numpy(), axis=1))
    y_pred.extend(np.argmax(preds, axis=1))

# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
wandb.log({"confusion_matrix": wandb.plot.confusion_matrix(
    probs=None,
    y_true=y_true,
    preds=y_pred,
    class_names=["aegypti", "albopictus"]
)})

wandb.finish()

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 671ms/step - accuracy: 0.5720 - loss: 1.3377 - val_accuracy: 0.5882 - val_loss: 0.6714
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - accuracy: 0.6167 - loss: 1.3011 - val_accuracy: 0.5490 - val_loss: 0.6885
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - accuracy: 0.6300 - loss: 1.3149 - val_accuracy: 0.4706 - val_loss: 0.6877
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 61ms/step - accuracy: 0.6331 - loss: 1.2475 - val_accuracy: 0.6667 - val_loss: 0.6254
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 0.7064 - loss: 1.2183 - val_accuracy: 0.6275 - val_loss: 0.6462
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.6710 - loss: 1.2371 - val_accuracy: 0.6275 - val_loss: 0.6764
Epoch 7/100
[1m15/15[0m 

0,1
epoch/accuracy,▁▁▁▂▂▃▄▅▅▄▅▅▄▅▄▄▅▆▆▄▆▆▇▆▆▇▇▇█▆▇██▆█▅▇███
epoch/epoch,▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇███
epoch/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/loss,█▇▆▅▅▅▅▅▅▅▄▄▄▄▄▄▃▃▄▃▃▃▃▃▃▂▃▂▂▂▂▂▂▂▁▂▁▁▁▁
epoch/val_accuracy,▁▅▄▅▆▅▆▅▆▅▅▇▆▇▅▆▇▆▆▆▅▆▇▇▇▆▆█▆▆▇█▇▆█▇▆▇█▆
epoch/val_loss,▇█▆▇▆▅▃▇▅▅▃▆▅▄▄▄▄▄▅▇▃▄▇▄▄▂▄▄▂▁▁▃▅▂▂▃▄▃▄▂

0,1
epoch/accuracy,0.80568
epoch/epoch,99.0
epoch/learning_rate,0.0001
epoch/loss,0.87063
epoch/val_accuracy,0.80392
epoch/val_loss,0.52901
