In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import VGG16, VGG19, ResNet50, ResNet152, EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

BATCH_SIZE = 5


In [2]:
data_dir = 'dataset/gatos/clean'

import os
from sklearn.model_selection import train_test_split


# Create DataFrame of all images
filepaths = []
labels = []
list_of_classes = ['Healthy', 'Leve', 'Ligera', 'Moderate', 'Severe']

for klass in os.listdir(data_dir):
    try:
        intklass = int(klass)
        label = list_of_classes[intklass]
        classpath = os.path.join(data_dir, klass)
        
        if os.path.isdir(classpath):
            flist = os.listdir(classpath)
            for f in flist:
                fpath = os.path.join(classpath, f)
                filepaths.append(fpath)
                labels.append(label)
    except (ValueError, IndexError):
        print(f"Skipping directory {klass}: not a valid class index")

# Create DataFrame
all_data_df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})

# Split data into train, validation and test sets (70%, 15%, 15%)
train_df, valid_df = train_test_split(all_data_df, test_size=0.3, random_state=42, stratify=all_data_df['labels'])

print(f"Train samples: {len(train_df)}, Validation samples: {len(valid_df)}")
print("\nClass distribution:")
print("Train:", train_df['labels'].value_counts())
print("Validation:", valid_df['labels'].value_counts())

# Set up data generators
datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    rotation_range=5,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    brightness_range=[0.9, 1.1],
    shear_range=0.5
)

test_datagen = ImageDataGenerator(rescale=1./255)  # Only rescaling for validation and test

# Create data generators from DataFrames
train_data = datagen.flow_from_dataframe(
    train_df, 
    x_col='filepaths',
    y_col='labels',
    target_size=(224, 224),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

val_data = test_datagen.flow_from_dataframe(
    valid_df, 
    x_col='filepaths',
    y_col='labels',
    target_size=(224, 224),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

Train samples: 58, Validation samples: 25

Class distribution:
Train: labels
Leve        21
Healthy     15
Ligera      13
Moderate     8
Severe       1
Name: count, dtype: int64
Validation: labels
Leve        9
Healthy     7
Ligera      5
Moderate    3
Severe      1
Name: count, dtype: int64
Found 58 validated image filenames belonging to 5 classes.
Found 25 validated image filenames belonging to 5 classes.


In [3]:
drop_classes = []
train_df = train_df[~train_df['labels'].isin(drop_classes)]
valid_df = valid_df[~valid_df['labels'].isin(drop_classes)]
list_of_classes = [c for c in list_of_classes if c not in drop_classes]

In [4]:
print(train_df['labels'].value_counts())

labels
Leve        21
Healthy     15
Ligera      13
Moderate     8
Severe       1
Name: count, dtype: int64


In [5]:
datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    rotation_range=5,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    brightness_range=[0.9, 1.1],
    shear_range=0.5
)

train_gen = datagen.flow_from_dataframe(train_df, x_col='filepaths', y_col='labels', 
                                        target_size=(224, 224), class_mode='categorical', batch_size=BATCH_SIZE)

val_gen = datagen.flow_from_dataframe(valid_df, x_col='filepaths', y_col='labels', 
                                      target_size=(224, 224), class_mode='categorical', batch_size=BATCH_SIZE)



Found 58 validated image filenames belonging to 5 classes.
Found 25 validated image filenames belonging to 5 classes.


In [15]:
#Loading the resnet model
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

inputs = layers.Input(shape=(224, 224, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(len(list_of_classes), activation='softmax', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)

model = Model(inputs, outputs)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [16]:
model.summary()
for layer in base_model.layers[-20:]:
    layer.trainable = True
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')


In [17]:
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
import wandb

# Compile Model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Early Stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Learning Rate Scheduler
def lr_scheduler(epoch, lr):
    return lr * 0.1 if epoch > 5 else lr

lr_callback = LearningRateScheduler(lr_scheduler)

history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,
    callbacks=[
        early_stopping, 
        lr_callback,
    ]
)


Epoch 1/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 4s/step - accuracy: 0.2284 - loss: 2.5401 - val_accuracy: 0.1200 - val_loss: 2.3620 - learning_rate: 0.0010
Epoch 2/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 4s/step - accuracy: 0.2129 - loss: 2.4523 - val_accuracy: 0.3600 - val_loss: 2.1995 - learning_rate: 0.0010
Epoch 3/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 4s/step - accuracy: 0.3429 - loss: 2.1829 - val_accuracy: 0.3600 - val_loss: 2.1722 - learning_rate: 0.0010
Epoch 4/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 4s/step - accuracy: 0.2217 - loss: 2.2758 - val_accuracy: 0.3600 - val_loss: 2.1570 - learning_rate: 0.0010
Epoch 5/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 4s/step - accuracy: 0.2083 - loss: 2.1972 - val_accuracy: 0.3600 - val_loss: 2.1451 - learning_rate: 0.0010
Epoch 6/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 4s/s

In [18]:
val_loss, val_acc = model.evaluate(val_gen)
print(f"Validation Loss: {val_loss}, Validation Accuracy: {val_acc}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 891ms/step - accuracy: 0.3228 - loss: 2.0516
Validation Loss: 2.0404160022735596, Validation Accuracy: 0.36000001430511475
