#### The model3 is developed upon the model2. I added oversampling method into the data processing step to try to balance label and see if there is any improvement in the model.

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

import cv2

from sklearn.utils import class_weight
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import keras
from keras.preprocessing.image import load_img, img_to_array
from keras.applications import EfficientNetB2
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, concatenate, Flatten, Input, Concatenate
from keras import layers, models, regularizers, Model, optimizers
from keras.losses import CategoricalCrossentropy, CategoricalFocalCrossentropy
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
import tensorflow.keras.backend as K

from keras.utils import Sequence
from imblearn.over_sampling import RandomOverSampler
from imblearn.tensorflow import balanced_batch_generator

In [2]:
# Image Size
target_size = (260, 260)

# Batch Size
batch_size = 32

# Number of predictors
num_classes = 21

## Read data frame and Image Sampling

In [3]:
path = "../bttai-ajl-2025/train/train"
df = pd.read_csv("../bttai-ajl-2025/train.csv")

df['md5hash'] = df['md5hash'].astype(str) + '.jpg'

# Combine label and md5hash to form the correct path
df['file_path'] = df['label'] + '/' + df['md5hash']

In [4]:
df.columns

Index(['md5hash', 'fitzpatrick_scale', 'fitzpatrick_centaur', 'label',
       'nine_partition_label', 'three_partition_label', 'qc', 'ddi_scale',
       'file_path'],
      dtype='object')

In [5]:
df.groupby(['three_partition_label', 'nine_partition_label', 'label']).size()

three_partition_label  nine_partition_label          label                             
benign                 benign-dermal                 dermatofibroma                         55
                                                     pyogenic-granuloma                     79
                       benign-epidermal              epidermal-nevus                        64
                                                     prurigo-nodularis                     119
                                                     seborrheic-keratosis                   48
malignant              malignant-cutaneous-lymphoma  mycosis-fungoides                     127
                       malignant-dermal              kaposi-sarcoma                        109
                       malignant-epidermal           actinic-keratosis                     122
                                                     basal-cell-carcinoma                  328
                                                     basa

In [6]:
# Drop the image labelled wrongly
df = df[df["qc"] != "3 Wrongly labelled"].copy()


In [7]:
# Map the values in the qc to integers

df.qc = df.qc.map({
    "1 Diagnostic": 4,
    "5 Potentially": 3,
    "2 Caracteristic": 2,
    "4 Other": 1
}).fillna(0).astype(int)


In [8]:
df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,label,nine_partition_label,three_partition_label,qc,ddi_scale,file_path
0,fd06d13de341cc75ad679916c5d7e6a6.jpg,4,4,prurigo-nodularis,benign-epidermal,benign,0,34,prurigo-nodularis/fd06d13de341cc75ad679916c5d7...
1,a4bb4e5206c4e89a303f470576fc5253.jpg,1,1,basal-cell-carcinoma-morpheiform,malignant-epidermal,malignant,0,12,basal-cell-carcinoma-morpheiform/a4bb4e5206c4e...
2,c94ce27e389f96bda998e7c3fa5c4a2e.jpg,5,5,keloid,inflammatory,non-neoplastic,4,56,keloid/c94ce27e389f96bda998e7c3fa5c4a2e.jpg
3,ebcf2b50dd943c700d4e2b586fcd4425.jpg,3,3,basal-cell-carcinoma,malignant-epidermal,malignant,0,34,basal-cell-carcinoma/ebcf2b50dd943c700d4e2b586...
4,c77d6c895f05fea73a8f3704307036c0.jpg,1,1,prurigo-nodularis,benign-epidermal,benign,0,12,prurigo-nodularis/c77d6c895f05fea73a8f37043070...


In [9]:
# Since images with a number of 4, 3, 2, 1 are verified by experts already, we should put all these images into training

prioritized_df = df[df.qc.isin([4, 3, 2, 1])].copy()
prioritized_df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,label,nine_partition_label,three_partition_label,qc,ddi_scale,file_path
2,c94ce27e389f96bda998e7c3fa5c4a2e.jpg,5,5,keloid,inflammatory,non-neoplastic,4,56,keloid/c94ce27e389f96bda998e7c3fa5c4a2e.jpg
26,e99ba7397c33ba169192ffdb25b66ccf.jpg,5,2,seborrheic-keratosis,benign-epidermal,benign,4,56,seborrheic-keratosis/e99ba7397c33ba169192ffdb2...
75,6be528e219f8ce45c9782e2b05ae3c24.jpg,3,2,basal-cell-carcinoma,malignant-epidermal,malignant,4,34,basal-cell-carcinoma/6be528e219f8ce45c9782e2b0...
93,31749f92677e70999c28fe49fbc6dafc.jpg,2,2,eczema,inflammatory,non-neoplastic,4,12,eczema/31749f92677e70999c28fe49fbc6dafc.jpg
193,67c9271813f07f5311e2cb435a817403.jpg,2,3,basal-cell-carcinoma,malignant-epidermal,malignant,4,12,basal-cell-carcinoma/67c9271813f07f5311e2cb435...


In [10]:
# Get the unverified images

unpriporitized_df = df[~df.qc.isin([4, 3, 2, 1])].copy()
unpriporitized_df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,label,nine_partition_label,three_partition_label,qc,ddi_scale,file_path
0,fd06d13de341cc75ad679916c5d7e6a6.jpg,4,4,prurigo-nodularis,benign-epidermal,benign,0,34,prurigo-nodularis/fd06d13de341cc75ad679916c5d7...
1,a4bb4e5206c4e89a303f470576fc5253.jpg,1,1,basal-cell-carcinoma-morpheiform,malignant-epidermal,malignant,0,12,basal-cell-carcinoma-morpheiform/a4bb4e5206c4e...
3,ebcf2b50dd943c700d4e2b586fcd4425.jpg,3,3,basal-cell-carcinoma,malignant-epidermal,malignant,0,34,basal-cell-carcinoma/ebcf2b50dd943c700d4e2b586...
4,c77d6c895f05fea73a8f3704307036c0.jpg,1,1,prurigo-nodularis,benign-epidermal,benign,0,12,prurigo-nodularis/c77d6c895f05fea73a8f37043070...
5,9d5a90fa3f6934608add10e698001760.jpg,3,5,prurigo-nodularis,benign-epidermal,benign,0,34,prurigo-nodularis/9d5a90fa3f6934608add10e69800...


In [11]:
# shuffle the data frame

prioritized_df = prioritized_df.sample(frac=1).reset_index(drop=True)
prioritized_df

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,label,nine_partition_label,three_partition_label,qc,ddi_scale,file_path
0,ae5dbb5d80c5ccff3104ffe0221639e8.jpg,1,1,basal-cell-carcinoma-morpheiform,malignant-epidermal,malignant,4,12,basal-cell-carcinoma-morpheiform/ae5dbb5d80c5c...
1,29a86d89608e63ccfc57b1e40c827ead.jpg,4,3,melanoma,malignant-melanoma,malignant,4,34,melanoma/29a86d89608e63ccfc57b1e40c827ead.jpg
2,203f72e12bfd9132ba8f843eaa9a894f.jpg,2,1,melanoma,malignant-melanoma,malignant,4,12,melanoma/203f72e12bfd9132ba8f843eaa9a894f.jpg
3,9dccb89ae8b15e7b9063318022608d70.jpg,2,2,acne-vulgaris,inflammatory,non-neoplastic,4,12,acne-vulgaris/9dccb89ae8b15e7b9063318022608d70...
4,e502602902bb69fde51dd8185b272992.jpg,1,1,squamous-cell-carcinoma,malignant-epidermal,malignant,4,12,squamous-cell-carcinoma/e502602902bb69fde51dd8...
...,...,...,...,...,...,...,...,...,...
79,ab6f88d273564516c297046fe9ad1c81.jpg,5,4,kaposi-sarcoma,malignant-dermal,malignant,4,56,kaposi-sarcoma/ab6f88d273564516c297046fe9ad1c8...
80,0d8457afebb67905127da37baece43d8.jpg,5,4,melanoma,malignant-melanoma,malignant,4,56,melanoma/0d8457afebb67905127da37baece43d8.jpg
81,0cff6f3c9bb267f68c77740fc9c58587.jpg,4,1,acne,inflammatory,non-neoplastic,4,34,acne/0cff6f3c9bb267f68c77740fc9c58587.jpg
82,518264d189db5695195a516892b7a3f0.jpg,2,2,basal-cell-carcinoma,malignant-epidermal,malignant,4,12,basal-cell-carcinoma/518264d189db5695195a51689...


In [12]:
# Create a new col named skinColor_combined_label
# this new column is combined with different skin colors asscoiated with different type of skin disease
# As we train test split datatset by stratify skinColor_combined_label, this ensures we don't have only white skin in the training, and black skin in validation

unpriporitized_df["skinColor_combined_label"] = unpriporitized_df["fitzpatrick_scale"].astype(str) + "_" + unpriporitized_df["three_partition_label"] + "_" + unpriporitized_df["nine_partition_label"] + "_" + unpriporitized_df["label"]

In [13]:
# Get rarer skin colors asscoiated with a skin disease
indices = unpriporitized_df.skinColor_combined_label.value_counts() == 1
rare_disease = list(unpriporitized_df.skinColor_combined_label.value_counts().index[indices])

In [14]:
rare_disease

['-1_benign_benign-epidermal_seborrheic-keratosis',
 '6_malignant_malignant-melanoma_superficial-spreading-melanoma-ssm',
 '5_malignant_malignant-epidermal_basal-cell-carcinoma-morpheiform',
 '-1_benign_benign-epidermal_prurigo-nodularis',
 '6_malignant_malignant-melanoma_malignant-melanoma',
 '-1_malignant_malignant-epidermal_basal-cell-carcinoma-morpheiform',
 '-1_benign_benign-dermal_dermatofibroma']

In [15]:
small_df = unpriporitized_df[unpriporitized_df.skinColor_combined_label.isin(rare_disease)].copy()
unpriporitized_df = unpriporitized_df[~unpriporitized_df.skinColor_combined_label.isin(rare_disease)].copy()

In [16]:
small_df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,label,nine_partition_label,three_partition_label,qc,ddi_scale,file_path,skinColor_combined_label
27,71b67be202663f843f95b5f409b358e4.jpg,-1,-1,basal-cell-carcinoma-morpheiform,malignant-epidermal,malignant,0,-1,basal-cell-carcinoma-morpheiform/71b67be202663...,-1_malignant_malignant-epidermal_basal-cell-ca...
80,e815861f140e305baf441814e6dbda48.jpg,6,1,malignant-melanoma,malignant-melanoma,malignant,0,56,malignant-melanoma/e815861f140e305baf441814e6d...,6_malignant_malignant-melanoma_malignant-melanoma
472,31a8db3e9da7907512beaa575d638be4.jpg,5,3,basal-cell-carcinoma-morpheiform,malignant-epidermal,malignant,0,56,basal-cell-carcinoma-morpheiform/31a8db3e9da79...,5_malignant_malignant-epidermal_basal-cell-car...
748,9e8595b4c1edec4b70653523997c267f.jpg,-1,1,seborrheic-keratosis,benign-epidermal,benign,0,-1,seborrheic-keratosis/9e8595b4c1edec4b706535239...,-1_benign_benign-epidermal_seborrheic-keratosis
1177,02f11821915d67f20fb15a8e8b96c1d3.jpg,6,1,superficial-spreading-melanoma-ssm,malignant-melanoma,malignant,0,56,superficial-spreading-melanoma-ssm/02f11821915...,6_malignant_malignant-melanoma_superficial-spr...


In [17]:
# Train Test Split

unpriporitized_train_df, unpriporitized_validation_df = train_test_split(unpriporitized_df, 
                                                                         test_size = 0.2, 
                                                                         stratify=unpriporitized_df.skinColor_combined_label,
                                                                         random_state=42)

In [18]:
# Concatenate training data

train_df = pd.concat([prioritized_df, small_df, unpriporitized_train_df]).copy().reset_index(drop=True)
train_df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,label,nine_partition_label,three_partition_label,qc,ddi_scale,file_path,skinColor_combined_label
0,ae5dbb5d80c5ccff3104ffe0221639e8.jpg,1,1,basal-cell-carcinoma-morpheiform,malignant-epidermal,malignant,4,12,basal-cell-carcinoma-morpheiform/ae5dbb5d80c5c...,
1,29a86d89608e63ccfc57b1e40c827ead.jpg,4,3,melanoma,malignant-melanoma,malignant,4,34,melanoma/29a86d89608e63ccfc57b1e40c827ead.jpg,
2,203f72e12bfd9132ba8f843eaa9a894f.jpg,2,1,melanoma,malignant-melanoma,malignant,4,12,melanoma/203f72e12bfd9132ba8f843eaa9a894f.jpg,
3,9dccb89ae8b15e7b9063318022608d70.jpg,2,2,acne-vulgaris,inflammatory,non-neoplastic,4,12,acne-vulgaris/9dccb89ae8b15e7b9063318022608d70...,
4,e502602902bb69fde51dd8185b272992.jpg,1,1,squamous-cell-carcinoma,malignant-epidermal,malignant,4,12,squamous-cell-carcinoma/e502602902bb69fde51dd8...,


In [19]:
train_df.label.value_counts()

label
squamous-cell-carcinoma               326
basal-cell-carcinoma                  265
folliculitis                          191
acne-vulgaris                         188
melanoma                              146
eczema                                115
acne                                  103
mycosis-fungoides                     103
actinic-keratosis                      97
prurigo-nodularis                      96
kaposi-sarcoma                         89
keloid                                 88
dermatomyositis                        86
superficial-spreading-melanoma-ssm     69
pyogenic-granuloma                     64
malignant-melanoma                     62
epidermal-nevus                        52
dyshidrotic-eczema                     46
dermatofibroma                         44
seborrheic-keratosis                   38
basal-cell-carcinoma-morpheiform       35
Name: count, dtype: int64

In [20]:
unpriporitized_validation_df.label.value_counts()

label
squamous-cell-carcinoma               81
basal-cell-carcinoma                  63
acne-vulgaris                         46
folliculitis                          46
melanoma                              35
eczema                                28
actinic-keratosis                     25
acne                                  24
mycosis-fungoides                     24
prurigo-nodularis                     23
keloid                                21
kaposi-sarcoma                        20
dermatomyositis                       20
malignant-melanoma                    16
pyogenic-granuloma                    15
superficial-spreading-melanoma-ssm    14
dyshidrotic-eczema                    12
epidermal-nevus                       12
dermatofibroma                        11
seborrheic-keratosis                   9
basal-cell-carcinoma-morpheiform       8
Name: count, dtype: int64

## ImageDataGenerator

In [None]:
# Create image data geneerator to read images and apply data agumentation to only training dataset

# Modified code from Melissa
train_datagen = ImageDataGenerator(
    # we have to use efficient net preprocessing lib to be consistent
    preprocessing_function=keras.applications.efficientnet.preprocess_input,
    rotation_range = 30,
    shear_range=0.2,
    zoom_range=0.2,
    # we may not need this if we use efficient net since we have to resize anyway
    # rescale = 1./255,
    horizontal_flip = True,
    vertical_flip = True,
    brightness_range=[0.7, 1.2],
    height_shift_range = 0.1,
    width_shift_range = 0.1
)

validation_datagen = ImageDataGenerator(
    preprocessing_function=keras.applications.efficientnet.preprocess_input
)

In [None]:
class BalancedDataGenerator(Sequence):
    """
    
    Borrow code from: https://github.com/farhantandia/Tricks-for-Handling-Imbalanced-Dataset-Image-Classification/blob/main/notebook.ipynb

    This function is to oversample my image dataset. 
    The oversampling method tries to balance number of obseration for each type of label by taking a bootrsap sampling method.

    """
    def __init__(self, x, y, datagen, num_classes=num_classes, batch_size=batch_size):
        self.datagen = datagen
        self.num_classes = num_classes
        self.batch_size = min(batch_size, x.shape[0])
        self.data_shape = x.shape[1:]

        # Modified Part: to add index to iterate my batches
        self.current_index = 0 
        
        self.datagen.fit(x)
        self.gen, self.steps_per_epoch = balanced_batch_generator(
                x.reshape(x.shape[0], -1), y,
                sampler=RandomOverSampler(),
                batch_size=self.batch_size,
                keep_sparse=False
            )
    
    # Modified
    def __iter__(self):
        self.current_index = 0
        return self

    # Modified
    def __next__(self):
        if self.current_index < len(self):
            batch = self[self.current_index]
            self.current_index += 1
            return batch
        else:
            self.current_index = 0
            raise StopIteration

    def __len__(self):
        return self.steps_per_epoch

    # Modified: fix the problems of incorrect input and output dimension
    def __getitem__(self, idx):
    
        x_batch, y_batch = next(self.gen)
        x_batch = x_batch.reshape(-1, *self.data_shape)
        y_batch = keras.utils.to_categorical(y_batch, num_classes = self.num_classes)
        
        return next(self.datagen.flow(x_batch, y_batch, batch_size = self.batch_size))

In [None]:
def read_images(file_path):
    """
    
    Given a list of file path to read a list of images

    """
    images = []
    for path in file_path:
        # Load image
        path  = "../bttai-ajl-2025/train/train/" + path
         
        image = cv2.imread(path)
        if image is None:
            print(f"Warning: Could not read image at {path}")
            continue
        
        # Resize
        resized_image = cv2.resize(image, target_size)
        
        # Convert BGR to RGB (if needed)
        resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
        
        images.append(resized_image)

    # Convert to numpy array (optional)
    return np.array(images)

In [None]:
# read images

train_data = read_images(train_df.file_path)
y_train = train_df.label.copy()

validation_data = read_images(unpriporitized_validation_df.file_path)
y_validation = unpriporitized_validation_df.label

In [None]:
# encode my labels

label_encoder = LabelEncoder()

y_train = label_encoder.fit_transform(y_train)

In [None]:
val_data = validation_datagen.flow_from_dataframe(
    dataframe = unpriporitized_validation_df,
    directory = path,
    x_col = "file_path",
    y_col = "label",
    target_size = target_size,
    batch_size = batch_size,
    class_mode = "categorical",
    shuffle = False,
    seed=42
)

Found 553 validated image filenames belonging to 21 classes.


In [27]:
val_data.class_indices

{'acne': 0,
 'acne-vulgaris': 1,
 'actinic-keratosis': 2,
 'basal-cell-carcinoma': 3,
 'basal-cell-carcinoma-morpheiform': 4,
 'dermatofibroma': 5,
 'dermatomyositis': 6,
 'dyshidrotic-eczema': 7,
 'eczema': 8,
 'epidermal-nevus': 9,
 'folliculitis': 10,
 'kaposi-sarcoma': 11,
 'keloid': 12,
 'malignant-melanoma': 13,
 'melanoma': 14,
 'mycosis-fungoides': 15,
 'prurigo-nodularis': 16,
 'pyogenic-granuloma': 17,
 'seborrheic-keratosis': 18,
 'squamous-cell-carcinoma': 19,
 'superficial-spreading-melanoma-ssm': 20}

In [28]:
balanced_gen_train = BalancedDataGenerator(train_data, y_train, train_datagen, batch_size=batch_size)
steps_per_epoch = balanced_gen_train.steps_per_epoch

## Compute Class Weights

In [None]:
# Compute weights to mitigate imbalance issues

# Author: Melissa
train_classes = train_df.label
cw = class_weight.compute_class_weight(
    class_weight="balanced",
    classes=np.unique(train_classes),
    y=train_classes
)
class_weights = dict(enumerate(cw))

print("Class Weights:", class_weights)

Class Weights: {0: 1.064724919093851, 1: 0.5833333333333334, 2: 1.1305841924398625, 3: 0.4138364779874214, 4: 3.1333333333333333, 5: 2.492424242424242, 6: 1.2751937984496124, 7: 2.3840579710144927, 8: 0.9536231884057971, 9: 2.108974358974359, 10: 0.5741710296684118, 11: 1.2322097378277153, 12: 1.246212121212121, 13: 1.7688172043010753, 14: 0.7511415525114156, 15: 1.064724919093851, 16: 1.1423611111111112, 17: 1.7135416666666667, 18: 2.8859649122807016, 19: 0.33640081799591004, 20: 1.5893719806763285}


## Modeling

In [None]:
# Use efficientnetB2 

# Author: Melissa
base_model = EfficientNetB2(
    include_top=False,
    weights = "imagenet",
    input_shape=(260, 260, 3)
)

# directly fine tune the model for our case
base_model.trainable = True

In [None]:
# Model Structure
# Author: Melissa

x = base_model.output
x = layers.GlobalAveragePooling2D()(x)

# First Layer
x = layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)

outputs = layers.Dense(num_classes, activation="softmax")(x)

# make/define the model
model = models.Model(inputs=base_model.input, outputs=outputs)

In [None]:
class LossLearningRateScheduler(tf.keras.callbacks.History):
    """

    Function is from: https://github.com/farhantandia/Tricks-for-Handling-Imbalanced-Dataset-Image-Classification/blob/main/notebook.ipynb

    I modified function to adpot the cuurent tensorflow version

    A learning rate scheduler that relies on changes in loss function value to dictate whether learning rate is decayed or not.

    LossLearningRateScheduler has the following properties:
    base_lr: the starting learning rate

    lookback_epochs: the number of epochs in the past to compare with the loss function at the current epoch to determine if progress is being made.

    decay_threshold / decay_multiple: if loss function has not improved by a factor of decay_threshold * lookback_epochs, then decay_multiple will be applied to the learning rate.

    spike_epochs: list of the epoch numbers where you want to spike the learning rate.
    
    spike_multiple: the multiple applied to the current learning rate for a spike.

    """

    def __init__(self, base_lr, lookback_epochs, 
                 spike_epochs = None, spike_multiple = 10, 
                 decay_threshold = 0.002, decay_multiple = 0.7, 
                 loss_type = 'val_loss'):

        super(LossLearningRateScheduler, self).__init__()

        self.base_lr = base_lr
        self.lookback_epochs = lookback_epochs
        self.spike_epochs = spike_epochs
        self.spike_multiple = spike_multiple
        self.decay_threshold = decay_threshold
        self.decay_multiple = decay_multiple
        self.loss_type = loss_type


    def on_epoch_begin(self, epoch, logs=None):

        if len(self.epoch) > self.lookback_epochs:

            current_lr = tf.keras.backend.get_value(self.model.optimizer.learning_rate)

            target_loss = self.history[self.loss_type] 

            loss_diff =  target_loss[-int(self.lookback_epochs)] - target_loss[-1]

            if loss_diff <= np.abs(target_loss[-1]) * (self.decay_threshold * self.lookback_epochs):

                print(' '.join(('Changing learning rate from', str(current_lr), 'to', str(current_lr * self.decay_multiple))))
                # Modified
                self.model.optimizer.learning_rate.assign(current_lr * self.decay_multiple)
                current_lr = current_lr * self.decay_multiple

            else:

                print(' '.join(('Learning rate:', str(current_lr))))

            if self.spike_epochs is not None and len(self.epoch) in self.spike_epochs:
                print(' '.join(('Spiking learning rate from', str(current_lr), 'to', str(current_lr * self.spike_multiple))))
                # Modified
                self.model.optimizer.learning_rate.assign(self.base_lr)

        else:

            print(' '.join(('Setting learning rate to', str(self.base_lr))))
            # Modified
            self.model.optimizer.learning_rate.assign(self.base_lr)


        return tf.keras.backend.get_value(self.model.optimizer.learning_rate)

In [None]:
# Use catgorical Focal Cross entropy to handle multiclass problems


loss = CategoricalFocalCrossentropy(label_smoothing=0.1)

optimizer = optimizers.AdamW(learning_rate=0.0002)

model.compile(optimizer=optimizer, 
              loss=loss, 
              metrics=['accuracy']
             )

In [41]:
callbacks = [
    # Early stopping
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),

    # Reduce Palteau
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-7),

    # Adaptive Decay Learning Rate
    LossLearningRateScheduler(base_lr=0.0002, lookback_epochs=3)
]

In [42]:
history = model.fit(
    balanced_gen_train,
    epochs = 10,
    steps_per_epoch = steps_per_epoch,
    validation_data = val_data,
    callbacks = callbacks,
    class_weight = class_weights,
    shuffle = True
)

Setting learning rate to 0.0002
Epoch 1/10
[1m213/213[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - accuracy: 0.2448 - loss: 1.3340

  self._warn_if_super_not_called()


[1m213/213[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1212s[0m 6s/step - accuracy: 0.2453 - loss: 1.3332 - val_accuracy: 0.2875 - val_loss: 0.9448 - learning_rate: 2.0000e-04
Setting learning rate to 0.0002
Epoch 2/10
[1m213/213[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1132s[0m 5s/step - accuracy: 0.5393 - loss: 0.9127 - val_accuracy: 0.3707 - val_loss: 0.9323 - learning_rate: 2.0000e-04
Setting learning rate to 0.0002
Epoch 3/10
[1m213/213[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1075s[0m 5s/step - accuracy: 0.6641 - loss: 0.7876 - val_accuracy: 0.4105 - val_loss: 0.8977 - learning_rate: 2.0000e-04
Setting learning rate to 0.0002
Epoch 4/10
[1m213/213[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m820s[0m 4s/step - accuracy: 0.7312 - loss: 0.7184 - val_accuracy: 0.4430 - val_loss: 0.8481 - learning_rate: 2.0000e-04
Learning rate: 0.0002
Epoch 5/10
[1m213/213[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m788s[0m 4s/step - accuracy: 0.7900 - loss: 0.6699 - val

In [None]:
# model.save("All_Label_Model3.keras")

## Testing

In [44]:
test_df = pd.read_csv("../bttai-ajl-2025/test.csv")

test_path = "../bttai-ajl-2025/test/test/"

# Combine label and md5hash to form the correct path
test_df['file_path'] = test_path + test_df['md5hash'] + '.jpg'

In [45]:
test_df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,qc,ddi_scale,file_path
0,0844ae634f0e6e7ef1f73c2aeecbae0e,2,2,,12,../bttai-ajl-2025/test/test/0844ae634f0e6e7ef1...
1,3b290d262098f761d719aa07cf36c040,4,3,,34,../bttai-ajl-2025/test/test/3b290d262098f761d7...
2,cf561d08ac46d0fda678bff6621005ee,2,3,,12,../bttai-ajl-2025/test/test/cf561d08ac46d0fda6...
3,e6371069be05c6b0a95b4b3f1bacc9a5,4,3,,34,../bttai-ajl-2025/test/test/e6371069be05c6b0a9...
4,f76cddb37265f97508f159078dcc7e7c,5,5,,56,../bttai-ajl-2025/test/test/f76cddb37265f97508...


In [46]:
# For submission, you have to follow image order in test.csv

test_datagen = ImageDataGenerator(preprocessing_function=keras.applications.efficientnet.preprocess_input)

# Load test images (Ensure they are all inside a single folder, not subdirectories)
test_data = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col="file_path",      # Column containing file paths
    target_size=target_size, # Resize images (adjust for your model)
    batch_size=batch_size,          # Batch size
    class_mode=None,        # No labels for test set
    shuffle=False           # Keep order consistent
)

Found 1227 validated image filenames.


In [49]:
predictions = model.predict(test_data)

predicted_classes = np.argmax(predictions, axis=1)

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 660ms/step


In [52]:
predicted_classes_names = label_encoder.inverse_transform(predicted_classes)

In [53]:
test_submission = test_df.drop(columns=['fitzpatrick_scale', 'fitzpatrick_centaur', 'qc',
       'ddi_scale', 'file_path']).copy()

test_submission["label"] = predicted_classes_names

In [54]:
test_submission.head()

Unnamed: 0,md5hash,label
0,0844ae634f0e6e7ef1f73c2aeecbae0e,prurigo-nodularis
1,3b290d262098f761d719aa07cf36c040,basal-cell-carcinoma
2,cf561d08ac46d0fda678bff6621005ee,squamous-cell-carcinoma
3,e6371069be05c6b0a95b4b3f1bacc9a5,acne-vulgaris
4,f76cddb37265f97508f159078dcc7e7c,folliculitis


In [55]:
test_submission.to_csv("../Submission/test3.csv", index=False)

In [56]:
test_submission.head()

Unnamed: 0,md5hash,label
0,0844ae634f0e6e7ef1f73c2aeecbae0e,prurigo-nodularis
1,3b290d262098f761d719aa07cf36c040,basal-cell-carcinoma
2,cf561d08ac46d0fda678bff6621005ee,squamous-cell-carcinoma
3,e6371069be05c6b0a95b4b3f1bacc9a5,acne-vulgaris
4,f76cddb37265f97508f159078dcc7e7c,folliculitis
