# Skin Disease Classification - Basic CNN (Kaggle Notebook)

This notebook is a **starter template** for training a simple CNN using an image classification dataset in Kaggle.

1. Add your dataset in the right panel (**Add data**)
2. Update `DATASET_DIR` and (if needed) `TRAIN_SUBDIR` / `VAL_SUBDIR`
3. Run all cells


In [None]:
# Core imports
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

print('TensorFlow version:', tf.__version__)
SEED = 42
tf.keras.utils.set_random_seed(SEED)


In [None]:
# === Dataset configuration ===
# Replace with your Kaggle dataset mount path
# Example: '/kaggle/input/skin-disease-datasaet'
DATASET_DIR = '/kaggle/input/YOUR_DATASET_FOLDER'

# Option A: dataset has class folders directly under DATASET_DIR
#   DATASET_DIR/class_a/*.jpg
#   DATASET_DIR/class_b/*.jpg
# In this case, keep TRAIN_SUBDIR and VAL_SUBDIR as None.

# Option B: dataset already split into train/val directories
#   DATASET_DIR/train/class_a/*.jpg
#   DATASET_DIR/val/class_a/*.jpg
# Set TRAIN_SUBDIR='train', VAL_SUBDIR='val'.
TRAIN_SUBDIR = None
VAL_SUBDIR = None

IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 12
VALIDATION_SPLIT = 0.2  # only used when VAL_SUBDIR is None

assert os.path.exists(DATASET_DIR), f'Path not found: {DATASET_DIR}'
print('Using dataset directory:', DATASET_DIR)


In [None]:
# Build tf.data datasets
if TRAIN_SUBDIR and VAL_SUBDIR:
    train_dir = os.path.join(DATASET_DIR, TRAIN_SUBDIR)
    val_dir = os.path.join(DATASET_DIR, VAL_SUBDIR)

    train_ds = tf.keras.utils.image_dataset_from_directory(
        train_dir,
        label_mode='categorical',
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        seed=SEED
    )

    val_ds = tf.keras.utils.image_dataset_from_directory(
        val_dir,
        label_mode='categorical',
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        seed=SEED
    )
else:
    train_ds = tf.keras.utils.image_dataset_from_directory(
        DATASET_DIR,
        validation_split=VALIDATION_SPLIT,
        subset='training',
        label_mode='categorical',
        seed=SEED,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE
    )

    val_ds = tf.keras.utils.image_dataset_from_directory(
        DATASET_DIR,
        validation_split=VALIDATION_SPLIT,
        subset='validation',
        label_mode='categorical',
        seed=SEED,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE
    )

class_names = train_ds.class_names
num_classes = len(class_names)
print('Classes:', class_names)
print('Number of classes:', num_classes)

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(AUTOTUNE)
val_ds = val_ds.cache().prefetch(AUTOTUNE)


In [None]:
# Quick sanity-check visualization
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(min(9, len(images))):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype('uint8'))
        plt.title(class_names[np.argmax(labels[i].numpy())])
        plt.axis('off')
plt.show()


In [None]:
# Basic CNN model
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal'),
    tf.keras.layers.RandomRotation(0.05),
    tf.keras.layers.RandomZoom(0.1),
], name='data_augmentation')

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(*IMAGE_SIZE, 3)),
    data_augmentation,
    tf.keras.layers.Rescaling(1./255),

    tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


In [None]:
# Training
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=3,
        restore_best_weights=True
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=2
    )
]

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=callbacks
)


In [None]:
# Plot train/validation metrics
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Train Accuracy')
plt.plot(epochs_range, val_acc, label='Val Accuracy')
plt.legend(loc='lower right')
plt.title('Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Train Loss')
plt.plot(epochs_range, val_loss, label='Val Loss')
plt.legend(loc='upper right')
plt.title('Loss')
plt.show()


In [None]:
# Save model artifact in Kaggle working directory
save_path = '/kaggle/working/basic_skin_cnn.keras'
model.save(save_path)
print('Model saved to:', save_path)
