In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import EfficientNetV2B0
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split

In [None]:
# Configs
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 10
DATA_DIR = '/content/drive/MyDrive/multiclass_data'
CSV_PATH = '/content/drive/MyDrive/multiclass_data/HAM10000_metadata.csv'

In [None]:
# Load metadata
df = pd.read_csv(CSV_PATH)

In [None]:
# Get class labels
class_names = os.listdir(DATA_DIR)
class_names = sorted([cls for cls in class_names if os.path.isdir(os.path.join(DATA_DIR, cls))])
class_to_idx = {cls: i for i, cls in enumerate(class_names)}

In [None]:
# Filter metadata to include only images in multiclass_data
valid_files = []
for cls in class_names:
    valid_files += [f for f in os.listdir(os.path.join(DATA_DIR, cls)) if f.endswith('.jpg')]
valid_ids = [f[:-4] for f in valid_files]
df = df[df['image_id'].isin(valid_ids)].copy()

In [None]:
# Assign label indices
df['label'] = df['dx'].map(class_to_idx)

# Encode metadata
df['sex'] = LabelEncoder().fit_transform(df['sex'].fillna('unknown'))
df['age'] = df['age'].fillna(df['age'].mean()) / 100.0  # Normalize
loc_encoded = OneHotEncoder(sparse_output=False).fit_transform(df['localization'].fillna('unknown').values.reshape(-1, 1))
df['localization'] = loc_encoded.tolist()

In [None]:
# Load data
def load_data(df):
    images, ages, sexes, locs, labels = [], [], [], [], []
    for _, row in df.iterrows():
        cls = row['dx']
        img_path = os.path.join(DATA_DIR, cls, f"{row['image_id']}.jpg")
        img = load_img(img_path, target_size=IMG_SIZE)
        img_array = img_to_array(img) / 255.0
        images.append(img_array)
        ages.append(row['age'])
        sexes.append(row['sex'])
        locs.append(row['localization'])
        labels.append(row['label'])
    return (
        np.array(images),
        np.array(ages).reshape(-1, 1),
        np.array(sexes).reshape(-1, 1),
        np.array(locs),
        tf.keras.utils.to_categorical(np.array(labels), num_classes=len(class_names))
    )

In [None]:
images, ages, sexes, locs, labels = load_data(df)

# Train/val split
X_train, X_val, age_train, age_val, sex_train, sex_val, loc_train, loc_val, y_train, y_val = train_test_split(
    images, ages, sexes, locs, labels, test_size=0.2, stratify=labels.argmax(axis=1), random_state=42
)

In [None]:
# Build model
img_input = layers.Input(shape=IMG_SIZE + (3,))
age_input = layers.Input(shape=(1,))
sex_input = layers.Input(shape=(1,))
loc_input = layers.Input(shape=(locs.shape[1],))

base = EfficientNetV2B0(include_top=False, weights='imagenet', input_tensor=img_input)
x = layers.GlobalAveragePooling2D()(base.output)

meta = layers.Concatenate()([age_input, sex_input, loc_input])
meta = layers.Dense(32, activation='relu')(meta)

combined = layers.Concatenate()([x, meta])
combined = layers.Dense(64, activation='relu')(combined)
combined = layers.Dropout(0.5)(combined)
output = layers.Dense(len(class_names), activation='softmax')(combined)

model = Model(inputs=[img_input, age_input, sex_input, loc_input], outputs=output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-b0_notop.h5
[1m24274472/24274472[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# Compile and train
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(
    [X_train, age_train, sex_train, loc_train], y_train,
    validation_data=([X_val, age_val, sex_val, loc_val], y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE
)

Epoch 1/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m447s[0m 8s/step - accuracy: 0.6322 - loss: 0.8355 - val_accuracy: 0.2634 - val_loss: 1.3408
Epoch 2/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m359s[0m 7s/step - accuracy: 0.8190 - loss: 0.5011 - val_accuracy: 0.3146 - val_loss: 1.1976
Epoch 3/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m382s[0m 7s/step - accuracy: 0.8734 - loss: 0.3618 - val_accuracy: 0.3504 - val_loss: 1.0762
Epoch 4/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m407s[0m 8s/step - accuracy: 0.8966 - loss: 0.3106 - val_accuracy: 0.5908 - val_loss: 1.1912
Epoch 5/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m445s[0m 8s/step - accuracy: 0.9113 - loss: 0.2421 - val_accuracy: 0.2583 - val_loss: 1.6136
Epoch 6/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m405s[0m 8s/step - accuracy: 0.9528 - loss: 0.1609 - val_accuracy: 0.2864 - val_loss: 1.5517
Epoch 7/10
[1m49/49[0m [32m━━━━

<keras.src.callbacks.history.History at 0x7be17125fd50>

In [None]:
# Save model
model.export('multiclass_skin_classifier')

Saved artifact at 'multiclass_skin_classifier'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor'), TensorSpec(shape=(None, 1), dtype=tf.float32, name='keras_tensor_1'), TensorSpec(shape=(None, 1), dtype=tf.float32, name='keras_tensor_2'), TensorSpec(shape=(None, 13), dtype=tf.float32, name='keras_tensor_3')]
Output Type:
  TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)
Captures:
  136208194292816: TensorSpec(shape=(1, 1, 1, 3), dtype=tf.float32, name=None)
  136208215210704: TensorSpec(shape=(1, 1, 1, 3), dtype=tf.float32, name=None)
  136208194291088: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136208194293968: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136208194294160: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136208215209552: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136208215210128: TensorSpec(shape=(), dtype=tf.re

In [None]:
import shutil

shutil.make_archive('multiclass_skin_classifier', 'zip', 'multiclass_skin_classifier')

'/content/multiclass_skin_classifier.zip'