#### I apply the same thing to here. Instead, we used oversample obersavtions in each label using combination skin color and label as our target variable in the oversampling function.

## Import Libraries

In [83]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

import cv2
import os

from sklearn.utils import class_weight
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import keras
from keras.preprocessing.image import load_img, img_to_array
from keras.applications import EfficientNetB2
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, concatenate, Flatten, Input, Concatenate
from keras import layers, models, regularizers, Model, optimizers
from keras.losses import CategoricalCrossentropy, CategoricalFocalCrossentropy
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
import tensorflow.keras.backend as K

from keras.utils import Sequence
from imblearn.over_sampling import RandomOverSampler
from imblearn.tensorflow import balanced_batch_generator

In [84]:
# Image Size
target_size = (260, 260)

# Batch Size
batch_size = 32

# Number of predictors
num_classes = 21

## Read data frame and Image Sampling

In [85]:
path = "../bttai-ajl-2025/train/train"
df = pd.read_csv("../bttai-ajl-2025/train.csv")

df['md5hash'] = df['md5hash'].astype(str) + '.jpg'

# Combine label and md5hash to form the correct path
df['file_path'] = df['label'] + '/' + df['md5hash']

In [86]:
df.columns

Index(['md5hash', 'fitzpatrick_scale', 'fitzpatrick_centaur', 'label',
       'nine_partition_label', 'three_partition_label', 'qc', 'ddi_scale',
       'file_path'],
      dtype='object')

In [87]:
df.groupby(['three_partition_label', 'nine_partition_label', 'label']).size()

three_partition_label  nine_partition_label          label                             
benign                 benign-dermal                 dermatofibroma                         55
                                                     pyogenic-granuloma                     79
                       benign-epidermal              epidermal-nevus                        64
                                                     prurigo-nodularis                     119
                                                     seborrheic-keratosis                   48
malignant              malignant-cutaneous-lymphoma  mycosis-fungoides                     127
                       malignant-dermal              kaposi-sarcoma                        109
                       malignant-epidermal           actinic-keratosis                     122
                                                     basal-cell-carcinoma                  328
                                                     basa

In [88]:
# Drop the image labelled wrongly
df = df[df["qc"] != "3 Wrongly labelled"].copy()


In [89]:
# Map the values in the qc to integers

df.qc = df.qc.map({
    "1 Diagnostic": 4,
    "5 Potentially": 3,
    "2 Caracteristic": 2,
    "4 Other": 1
}).fillna(0).astype(int)


In [90]:
df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,label,nine_partition_label,three_partition_label,qc,ddi_scale,file_path
0,fd06d13de341cc75ad679916c5d7e6a6.jpg,4,4,prurigo-nodularis,benign-epidermal,benign,0,34,prurigo-nodularis/fd06d13de341cc75ad679916c5d7...
1,a4bb4e5206c4e89a303f470576fc5253.jpg,1,1,basal-cell-carcinoma-morpheiform,malignant-epidermal,malignant,0,12,basal-cell-carcinoma-morpheiform/a4bb4e5206c4e...
2,c94ce27e389f96bda998e7c3fa5c4a2e.jpg,5,5,keloid,inflammatory,non-neoplastic,4,56,keloid/c94ce27e389f96bda998e7c3fa5c4a2e.jpg
3,ebcf2b50dd943c700d4e2b586fcd4425.jpg,3,3,basal-cell-carcinoma,malignant-epidermal,malignant,0,34,basal-cell-carcinoma/ebcf2b50dd943c700d4e2b586...
4,c77d6c895f05fea73a8f3704307036c0.jpg,1,1,prurigo-nodularis,benign-epidermal,benign,0,12,prurigo-nodularis/c77d6c895f05fea73a8f37043070...


In [91]:
# Since images with a number of 4, 3, 2, 1 are verified by experts already, we should put all these images into training

prioritized_df = df[df.qc.isin([4, 3, 2, 1])].copy()
prioritized_df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,label,nine_partition_label,three_partition_label,qc,ddi_scale,file_path
2,c94ce27e389f96bda998e7c3fa5c4a2e.jpg,5,5,keloid,inflammatory,non-neoplastic,4,56,keloid/c94ce27e389f96bda998e7c3fa5c4a2e.jpg
26,e99ba7397c33ba169192ffdb25b66ccf.jpg,5,2,seborrheic-keratosis,benign-epidermal,benign,4,56,seborrheic-keratosis/e99ba7397c33ba169192ffdb2...
75,6be528e219f8ce45c9782e2b05ae3c24.jpg,3,2,basal-cell-carcinoma,malignant-epidermal,malignant,4,34,basal-cell-carcinoma/6be528e219f8ce45c9782e2b0...
93,31749f92677e70999c28fe49fbc6dafc.jpg,2,2,eczema,inflammatory,non-neoplastic,4,12,eczema/31749f92677e70999c28fe49fbc6dafc.jpg
193,67c9271813f07f5311e2cb435a817403.jpg,2,3,basal-cell-carcinoma,malignant-epidermal,malignant,4,12,basal-cell-carcinoma/67c9271813f07f5311e2cb435...


In [92]:
# Get the unverified images

unpriporitized_df = df[~df.qc.isin([4, 3, 2, 1])].copy()
unpriporitized_df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,label,nine_partition_label,three_partition_label,qc,ddi_scale,file_path
0,fd06d13de341cc75ad679916c5d7e6a6.jpg,4,4,prurigo-nodularis,benign-epidermal,benign,0,34,prurigo-nodularis/fd06d13de341cc75ad679916c5d7...
1,a4bb4e5206c4e89a303f470576fc5253.jpg,1,1,basal-cell-carcinoma-morpheiform,malignant-epidermal,malignant,0,12,basal-cell-carcinoma-morpheiform/a4bb4e5206c4e...
3,ebcf2b50dd943c700d4e2b586fcd4425.jpg,3,3,basal-cell-carcinoma,malignant-epidermal,malignant,0,34,basal-cell-carcinoma/ebcf2b50dd943c700d4e2b586...
4,c77d6c895f05fea73a8f3704307036c0.jpg,1,1,prurigo-nodularis,benign-epidermal,benign,0,12,prurigo-nodularis/c77d6c895f05fea73a8f37043070...
5,9d5a90fa3f6934608add10e698001760.jpg,3,5,prurigo-nodularis,benign-epidermal,benign,0,34,prurigo-nodularis/9d5a90fa3f6934608add10e69800...


In [93]:
# shuffle the data frame

prioritized_df = prioritized_df.sample(frac=1).reset_index(drop=True)
prioritized_df

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,label,nine_partition_label,three_partition_label,qc,ddi_scale,file_path
0,1c31b43cce49c0e53349f4f1adb59b1d.jpg,3,1,superficial-spreading-melanoma-ssm,malignant-melanoma,malignant,4,34,superficial-spreading-melanoma-ssm/1c31b43cce4...
1,84993623f4cc766e19b092054f545f6b.jpg,1,1,actinic-keratosis,malignant-epidermal,malignant,4,12,actinic-keratosis/84993623f4cc766e19b092054f54...
2,3c64fc9d1d75d88cc8f4183803edc5f3.jpg,4,4,acne-vulgaris,inflammatory,non-neoplastic,4,34,acne-vulgaris/3c64fc9d1d75d88cc8f4183803edc5f3...
3,5add954298359b4281d027a5e03e2bd6.jpg,1,-1,basal-cell-carcinoma,malignant-epidermal,malignant,4,12,basal-cell-carcinoma/5add954298359b4281d027a5e...
4,e642ea55ce353ec974309c87f4da7373.jpg,5,-1,kaposi-sarcoma,malignant-dermal,malignant,4,56,kaposi-sarcoma/e642ea55ce353ec974309c87f4da737...
...,...,...,...,...,...,...,...,...,...
79,64ad852dc5e267ec279e02f7ec0b7307.jpg,3,4,basal-cell-carcinoma,malignant-epidermal,malignant,4,34,basal-cell-carcinoma/64ad852dc5e267ec279e02f7e...
80,0d8457afebb67905127da37baece43d8.jpg,5,4,melanoma,malignant-melanoma,malignant,4,56,melanoma/0d8457afebb67905127da37baece43d8.jpg
81,e99ba7397c33ba169192ffdb25b66ccf.jpg,5,2,seborrheic-keratosis,benign-epidermal,benign,4,56,seborrheic-keratosis/e99ba7397c33ba169192ffdb2...
82,b0a821a90fc14fb2996e9717ee37ded1.jpg,2,2,acne-vulgaris,inflammatory,non-neoplastic,4,12,acne-vulgaris/b0a821a90fc14fb2996e9717ee37ded1...


In [94]:
# Create a new col named skinColor_combined_label
# this new column is combined with different skin colors asscoiated with different type of skin disease
# As we train test split datatset by stratify skinColor_combined_label, this ensures we don't have only white skin in the training, and black skin in validation

# unpriporitized_df["skinColor_combined_label"] = unpriporitized_df["fitzpatrick_scale"].astype(str) + "_" + unpriporitized_df["three_partition_label"] + "_" + unpriporitized_df["nine_partition_label"] + "_" + unpriporitized_df["label"]
prioritized_df["skinColor_combined_label"] = prioritized_df["fitzpatrick_scale"].astype(str) + "_" + prioritized_df["label"]
unpriporitized_df["skinColor_combined_label"] = unpriporitized_df["fitzpatrick_scale"].astype(str) + "_" + unpriporitized_df["label"]

In [95]:
# Get rarer skin colors asscoiated with a skin disease
indices = unpriporitized_df.skinColor_combined_label.value_counts() == 1
rare_disease = list(unpriporitized_df.skinColor_combined_label.value_counts().index[indices])

In [96]:
rare_disease

['-1_seborrheic-keratosis',
 '6_superficial-spreading-melanoma-ssm',
 '5_basal-cell-carcinoma-morpheiform',
 '-1_prurigo-nodularis',
 '6_malignant-melanoma',
 '-1_basal-cell-carcinoma-morpheiform',
 '-1_dermatofibroma']

In [97]:
small_df = unpriporitized_df[unpriporitized_df.skinColor_combined_label.isin(rare_disease)].copy()
unpriporitized_df = unpriporitized_df[~unpriporitized_df.skinColor_combined_label.isin(rare_disease)].copy()

In [98]:
small_df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,label,nine_partition_label,three_partition_label,qc,ddi_scale,file_path,skinColor_combined_label
27,71b67be202663f843f95b5f409b358e4.jpg,-1,-1,basal-cell-carcinoma-morpheiform,malignant-epidermal,malignant,0,-1,basal-cell-carcinoma-morpheiform/71b67be202663...,-1_basal-cell-carcinoma-morpheiform
80,e815861f140e305baf441814e6dbda48.jpg,6,1,malignant-melanoma,malignant-melanoma,malignant,0,56,malignant-melanoma/e815861f140e305baf441814e6d...,6_malignant-melanoma
472,31a8db3e9da7907512beaa575d638be4.jpg,5,3,basal-cell-carcinoma-morpheiform,malignant-epidermal,malignant,0,56,basal-cell-carcinoma-morpheiform/31a8db3e9da79...,5_basal-cell-carcinoma-morpheiform
748,9e8595b4c1edec4b70653523997c267f.jpg,-1,1,seborrheic-keratosis,benign-epidermal,benign,0,-1,seborrheic-keratosis/9e8595b4c1edec4b706535239...,-1_seborrheic-keratosis
1177,02f11821915d67f20fb15a8e8b96c1d3.jpg,6,1,superficial-spreading-melanoma-ssm,malignant-melanoma,malignant,0,56,superficial-spreading-melanoma-ssm/02f11821915...,6_superficial-spreading-melanoma-ssm


In [100]:
# Train Test Split

unpriporitized_train_df, unpriporitized_validation_df = train_test_split(unpriporitized_df, 
                                                                         test_size = 0.05, 
                                                                         stratify=unpriporitized_df.skinColor_combined_label,
                                                                         random_state=42)

In [101]:
# Concatenate training data

train_df = pd.concat([prioritized_df, small_df, unpriporitized_train_df, unpriporitized_validation_df]).copy().reset_index(drop=True)
train_df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,label,nine_partition_label,three_partition_label,qc,ddi_scale,file_path,skinColor_combined_label
0,1c31b43cce49c0e53349f4f1adb59b1d.jpg,3,1,superficial-spreading-melanoma-ssm,malignant-melanoma,malignant,4,34,superficial-spreading-melanoma-ssm/1c31b43cce4...,3_superficial-spreading-melanoma-ssm
1,84993623f4cc766e19b092054f545f6b.jpg,1,1,actinic-keratosis,malignant-epidermal,malignant,4,12,actinic-keratosis/84993623f4cc766e19b092054f54...,1_actinic-keratosis
2,3c64fc9d1d75d88cc8f4183803edc5f3.jpg,4,4,acne-vulgaris,inflammatory,non-neoplastic,4,34,acne-vulgaris/3c64fc9d1d75d88cc8f4183803edc5f3...,4_acne-vulgaris
3,5add954298359b4281d027a5e03e2bd6.jpg,1,-1,basal-cell-carcinoma,malignant-epidermal,malignant,4,12,basal-cell-carcinoma/5add954298359b4281d027a5e...,1_basal-cell-carcinoma
4,e642ea55ce353ec974309c87f4da7373.jpg,5,-1,kaposi-sarcoma,malignant-dermal,malignant,4,56,kaposi-sarcoma/e642ea55ce353ec974309c87f4da737...,5_kaposi-sarcoma


In [102]:
train_df.label.value_counts()

label
squamous-cell-carcinoma               407
basal-cell-carcinoma                  328
folliculitis                          237
acne-vulgaris                         234
melanoma                              181
eczema                                143
acne                                  127
mycosis-fungoides                     127
actinic-keratosis                     122
prurigo-nodularis                     119
kaposi-sarcoma                        109
keloid                                109
dermatomyositis                       106
superficial-spreading-melanoma-ssm     83
pyogenic-granuloma                     79
malignant-melanoma                     78
epidermal-nevus                        64
dyshidrotic-eczema                     58
dermatofibroma                         55
seborrheic-keratosis                   47
basal-cell-carcinoma-morpheiform       43
Name: count, dtype: int64

In [103]:
unpriporitized_validation_df.label.value_counts()

label
squamous-cell-carcinoma               21
basal-cell-carcinoma                  16
folliculitis                          12
acne-vulgaris                         11
melanoma                               9
mycosis-fungoides                      7
acne                                   7
keloid                                 6
eczema                                 6
actinic-keratosis                      6
kaposi-sarcoma                         6
dermatomyositis                        5
prurigo-nodularis                      5
malignant-melanoma                     4
pyogenic-granuloma                     4
superficial-spreading-melanoma-ssm     4
dyshidrotic-eczema                     3
dermatofibroma                         2
basal-cell-carcinoma-morpheiform       2
epidermal-nevus                        2
seborrheic-keratosis                   1
Name: count, dtype: int64

## ImageDataGenerator

In [104]:
# Create image data geneerator to read images and apply data agumentation to only training dataset

train_datagen = ImageDataGenerator(
    # we have to use efficient net preprocessing lib to be consistent
    preprocessing_function=keras.applications.efficientnet.preprocess_input,
    rotation_range = 30,
    shear_range=0.2,
    zoom_range=0.2,
    # we may not need this if we use efficient net since we have to resize anyway
    # rescale = 1./255,
    horizontal_flip = True,
    vertical_flip = True,
    brightness_range=[0.7, 1.2],
    height_shift_range = 0.1,
    width_shift_range = 0.1
)

validation_datagen = ImageDataGenerator(
    preprocessing_function=keras.applications.efficientnet.preprocess_input
)

In [105]:
class BalancedDataGenerator(Sequence):
    def __init__(self, x, y, datagen, num_classes=num_classes, batch_size=batch_size):
        self.datagen = datagen
        self.num_classes = num_classes
        self.batch_size = min(batch_size, x.shape[0])
        self.data_shape = x.shape[1:]
        self.current_index = 0 
        
        self.datagen.fit(x)
        self.gen, self.steps_per_epoch = balanced_batch_generator(
                x.reshape(x.shape[0], -1), y,
                sampler=RandomOverSampler(),
                batch_size=self.batch_size,
                keep_sparse=False
            )
        
    def __iter__(self):
        self.current_index = 0
        return self

    def __next__(self):
        if self.current_index < len(self):
            batch = self[self.current_index]
            self.current_index += 1
            return batch
        else:
            self.current_index = 0
            raise StopIteration

    def __len__(self):
        return self.steps_per_epoch

    def __getitem__(self, idx):
    
        x_batch, y_batch = next(self.gen)
        x_batch = x_batch.reshape(-1, *self.data_shape)
        y_batch = keras.utils.to_categorical(y_batch, num_classes = self.num_classes)
        
        return next(self.datagen.flow(x_batch, y_batch, batch_size = self.batch_size))

In [106]:
def read_images(file_path):
    images = []
    for path in file_path:
        # Load image
        path  = "../bttai-ajl-2025/train/train/" + path
         
        image = cv2.imread(path)
        if image is None:
            print(f"Warning: Could not read image at {path}")
            continue
        
        # Resize
        resized_image = cv2.resize(image, target_size)
        
        # Convert BGR to RGB (if needed)
        resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
        
        images.append(resized_image)

    # Convert to numpy array (optional)
    return np.array(images)

In [107]:
train_data = read_images(train_df.file_path)
y_train = train_df.label.copy()
# validation_data = read_images(unpriporitized_validation_df.file_path)
# y_validation = unpriporitized_validation_df.label

In [108]:
y_train.value_counts()

label
squamous-cell-carcinoma               407
basal-cell-carcinoma                  328
folliculitis                          237
acne-vulgaris                         234
melanoma                              181
eczema                                143
acne                                  127
mycosis-fungoides                     127
actinic-keratosis                     122
prurigo-nodularis                     119
kaposi-sarcoma                        109
keloid                                109
dermatomyositis                       106
superficial-spreading-melanoma-ssm     83
pyogenic-granuloma                     79
malignant-melanoma                     78
epidermal-nevus                        64
dyshidrotic-eczema                     58
dermatofibroma                         55
seborrheic-keratosis                   47
basal-cell-carcinoma-morpheiform       43
Name: count, dtype: int64

In [109]:
def read_external_data(file_path):
    image_files = [f for f in os.listdir(file_path) 
                        if f.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff'))]
    images = []
    for path in image_files:
        # Load image
        path  = file_path + "/" + path
         
        image = cv2.imread(path)
        if image is None:
            print(f"Warning: Could not read image at {path}")
            continue
        
        # Resize
        resized_image = cv2.resize(image, target_size)
        
        # Convert BGR to RGB (if needed)
        resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
        
        images.append(resized_image)

    # Convert to numpy array (optional)
    return np.array(images)
y_train.value_counts()

label
squamous-cell-carcinoma               407
basal-cell-carcinoma                  328
folliculitis                          237
acne-vulgaris                         234
melanoma                              181
eczema                                143
acne                                  127
mycosis-fungoides                     127
actinic-keratosis                     122
prurigo-nodularis                     119
kaposi-sarcoma                        109
keloid                                109
dermatomyositis                       106
superficial-spreading-melanoma-ssm     83
pyogenic-granuloma                     79
malignant-melanoma                     78
epidermal-nevus                        64
dyshidrotic-eczema                     58
dermatofibroma                         55
seborrheic-keratosis                   47
basal-cell-carcinoma-morpheiform       43
Name: count, dtype: int64

In [112]:
# Oversampling using skin color and label

sampler = RandomOverSampler(random_state=42)

balanced_train_data, balanced_y_train = sampler.fit_resample(train_data.reshape(train_data.shape[0], -1), y_train)

# balanced_y_train = [disease[-1] for disease in balanced_y_train.str.split("_")]

image_shape = train_data.shape[1:]

balanced_train_data = balanced_train_data.reshape(-1, *image_shape)

balanced_train_data, balanced_y_train = shuffle(balanced_train_data, balanced_y_train, random_state=42)

In [113]:
# balanced_train_data, balanced_y_train = shuffle(added_extra_train_data, added_extra_y_train, random_state=42)

In [114]:
# Encode labels

label_encoder = LabelEncoder()

balanced_encoded_y_train = label_encoder.fit_transform(balanced_y_train)
balanced_onehot_encoded_y_train = keras.utils.to_categorical(balanced_encoded_y_train, num_classes = num_classes)

In [115]:
# apply data agumentation to my training data and make training data into batches

train_datagen.fit(balanced_train_data)

balanced_train_data = train_datagen.flow(balanced_train_data, balanced_onehot_encoded_y_train, batch_size = batch_size)

In [116]:
val_data = validation_datagen.flow_from_dataframe(
    dataframe = unpriporitized_validation_df,
    directory = path,
    x_col = "file_path",
    y_col = "label",
    target_size = target_size,
    batch_size = batch_size,
    class_mode = "categorical",
    shuffle = False,
    seed=42
)

Found 139 validated image filenames belonging to 21 classes.


In [117]:
val_data.class_indices

{'acne': 0,
 'acne-vulgaris': 1,
 'actinic-keratosis': 2,
 'basal-cell-carcinoma': 3,
 'basal-cell-carcinoma-morpheiform': 4,
 'dermatofibroma': 5,
 'dermatomyositis': 6,
 'dyshidrotic-eczema': 7,
 'eczema': 8,
 'epidermal-nevus': 9,
 'folliculitis': 10,
 'kaposi-sarcoma': 11,
 'keloid': 12,
 'malignant-melanoma': 13,
 'melanoma': 14,
 'mycosis-fungoides': 15,
 'prurigo-nodularis': 16,
 'pyogenic-granuloma': 17,
 'seborrheic-keratosis': 18,
 'squamous-cell-carcinoma': 19,
 'superficial-spreading-melanoma-ssm': 20}

## Compute Class Weights

In [119]:
# Compute weights to mitigate imbalance issues
# Author: Melissa

train_classes = y_train
cw = class_weight.compute_class_weight(
    class_weight="balanced",
    classes=np.unique(train_classes),
    y=train_classes
)
class_weights = dict(enumerate(cw))

print("Class Weights:", class_weights)

Class Weights: {0: 1.0708661417322836, 1: 0.5811965811965812, 2: 1.1147540983606556, 3: 0.4146341463414634, 4: 3.1627906976744184, 5: 2.4727272727272727, 6: 1.2830188679245282, 7: 2.3448275862068964, 8: 0.951048951048951, 9: 2.125, 10: 0.5738396624472574, 11: 1.2477064220183487, 12: 1.2477064220183487, 13: 1.7435897435897436, 14: 0.7513812154696132, 15: 1.0708661417322836, 16: 1.1428571428571428, 17: 1.7215189873417722, 18: 2.893617021276596, 19: 0.33415233415233414, 20: 1.6385542168674698}


## Modeling

In [120]:
class LossLearningRateScheduler(tf.keras.callbacks.History):
    """

    Function is from: https://github.com/farhantandia/Tricks-for-Handling-Imbalanced-Dataset-Image-Classification/blob/main/notebook.ipynb

    I modified function to adpot the cuurent tensorflow version

    A learning rate scheduler that relies on changes in loss function value to dictate whether learning rate is decayed or not.

    LossLearningRateScheduler has the following properties:
    base_lr: the starting learning rate

    lookback_epochs: the number of epochs in the past to compare with the loss function at the current epoch to determine if progress is being made.

    decay_threshold / decay_multiple: if loss function has not improved by a factor of decay_threshold * lookback_epochs, then decay_multiple will be applied to the learning rate.

    spike_epochs: list of the epoch numbers where you want to spike the learning rate.
    
    spike_multiple: the multiple applied to the current learning rate for a spike.

    """

    def __init__(self, base_lr, lookback_epochs, 
                 spike_epochs = None, spike_multiple = 10, 
                 decay_threshold = 0.002, decay_multiple = 0.7, 
                 loss_type = 'val_loss'):

        super(LossLearningRateScheduler, self).__init__()

        self.base_lr = base_lr
        self.lookback_epochs = lookback_epochs
        self.spike_epochs = spike_epochs
        self.spike_multiple = spike_multiple
        self.decay_threshold = decay_threshold
        self.decay_multiple = decay_multiple
        self.loss_type = loss_type


    def on_epoch_begin(self, epoch, logs=None):

        if len(self.epoch) > self.lookback_epochs:

            current_lr = tf.keras.backend.get_value(self.model.optimizer.learning_rate)

            target_loss = self.history[self.loss_type] 

            loss_diff =  target_loss[-int(self.lookback_epochs)] - target_loss[-1]

            if loss_diff <= np.abs(target_loss[-1]) * (self.decay_threshold * self.lookback_epochs):

                print(' '.join(('Changing learning rate from', str(current_lr), 'to', str(current_lr * self.decay_multiple))))
                # Modified
                self.model.optimizer.learning_rate.assign(current_lr * self.decay_multiple)
                current_lr = current_lr * self.decay_multiple

            else:

                print(' '.join(('Learning rate:', str(current_lr))))

            if self.spike_epochs is not None and len(self.epoch) in self.spike_epochs:
                print(' '.join(('Spiking learning rate from', str(current_lr), 'to', str(current_lr * self.spike_multiple))))
                # Modified
                self.model.optimizer.learning_rate.assign(self.base_lr)

        else:

            print(' '.join(('Setting learning rate to', str(self.base_lr))))
            # Modified
            self.model.optimizer.learning_rate.assign(self.base_lr)


        return tf.keras.backend.get_value(self.model.optimizer.learning_rate)

In [121]:
# Load model2 and unfree all layers to fine tune
model = keras.models.load_model("Fine_Tuned_All_Label_Model2_7.keras")
model.trainable = True

In [122]:
callbacks = [
    # Adaptive Decay Learning Rate
    LossLearningRateScheduler(base_lr=3.4299999356335316e-07, lookback_epochs=10)
]

In [123]:
history = model.fit(
    balanced_train_data,
    epochs = 10,
    validation_data = val_data,
    callbacks = callbacks,
    class_weight = class_weights,
)

  self._warn_if_super_not_called()


Setting learning rate to 3.4299999356335316e-07
Epoch 1/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m850s[0m 3s/step - accuracy: 0.9562 - loss: 0.3068 - val_accuracy: 0.9712 - val_loss: 0.2597
Setting learning rate to 3.4299999356335316e-07
Epoch 2/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m777s[0m 3s/step - accuracy: 0.9546 - loss: 0.3100 - val_accuracy: 0.9712 - val_loss: 0.2596
Setting learning rate to 3.4299999356335316e-07
Epoch 3/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m753s[0m 3s/step - accuracy: 0.9552 - loss: 0.3080 - val_accuracy: 0.9712 - val_loss: 0.2609
Setting learning rate to 3.4299999356335316e-07
Epoch 4/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m767s[0m 3s/step - accuracy: 0.9563 - loss: 0.3061 - val_accuracy: 0.9712 - val_loss: 0.2600
Setting learning rate to 3.4299999356335316e-07
Epoch 5/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m754s[0m 3s/step - accuracy: 0.9551 - 

In [124]:
model.save("Fine_Tuned_All_Label_Model2_9.keras")

## Testing

In [125]:
test_df = pd.read_csv("../bttai-ajl-2025/test.csv")

test_path = "../bttai-ajl-2025/test/test/"

# Combine label and md5hash to form the correct path
test_df['file_path'] = test_path + test_df['md5hash'] + '.jpg'

In [126]:
test_df.head()

Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,qc,ddi_scale,file_path
0,0844ae634f0e6e7ef1f73c2aeecbae0e,2,2,,12,../bttai-ajl-2025/test/test/0844ae634f0e6e7ef1...
1,3b290d262098f761d719aa07cf36c040,4,3,,34,../bttai-ajl-2025/test/test/3b290d262098f761d7...
2,cf561d08ac46d0fda678bff6621005ee,2,3,,12,../bttai-ajl-2025/test/test/cf561d08ac46d0fda6...
3,e6371069be05c6b0a95b4b3f1bacc9a5,4,3,,34,../bttai-ajl-2025/test/test/e6371069be05c6b0a9...
4,f76cddb37265f97508f159078dcc7e7c,5,5,,56,../bttai-ajl-2025/test/test/f76cddb37265f97508...


In [127]:
# For submission, you have to follow image order in test.csv

test_datagen = ImageDataGenerator(preprocessing_function=keras.applications.efficientnet.preprocess_input)

# Load test images (Ensure they are all inside a single folder, not subdirectories)
test_data = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col="file_path",      # Column containing file paths
    target_size=target_size, # Resize images (adjust for your model)
    batch_size=batch_size,          # Batch size
    class_mode=None,        # No labels for test set
    shuffle=False           # Keep order consistent
)

Found 1227 validated image filenames.


In [132]:
predictions = model.predict(test_data)

predicted_classes = np.argmax(predictions, axis=1)

[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 572ms/step


In [133]:
predicted_classes_names = label_encoder.inverse_transform(predicted_classes)

In [134]:
test_submission = test_df.drop(columns=['fitzpatrick_scale', 'fitzpatrick_centaur', 'qc',
       'ddi_scale', 'file_path']).copy()

test_submission["label"] = predicted_classes_names

In [135]:
test_submission.head()

Unnamed: 0,md5hash,label
0,0844ae634f0e6e7ef1f73c2aeecbae0e,malignant-melanoma
1,3b290d262098f761d719aa07cf36c040,basal-cell-carcinoma
2,cf561d08ac46d0fda678bff6621005ee,squamous-cell-carcinoma
3,e6371069be05c6b0a95b4b3f1bacc9a5,acne
4,f76cddb37265f97508f159078dcc7e7c,folliculitis


In [136]:
test_submission.to_csv("../Submission/test7.csv", index=False)

In [137]:
test_submission.head()

Unnamed: 0,md5hash,label
0,0844ae634f0e6e7ef1f73c2aeecbae0e,malignant-melanoma
1,3b290d262098f761d719aa07cf36c040,basal-cell-carcinoma
2,cf561d08ac46d0fda678bff6621005ee,squamous-cell-carcinoma
3,e6371069be05c6b0a95b4b3f1bacc9a5,acne
4,f76cddb37265f97508f159078dcc7e7c,folliculitis
