In [27]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

In [28]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("haroonalam16/20-skin-diseases-dataset")

print("Path to dataset files:", path)

Using Colab cache for faster access to the '20-skin-diseases-dataset' dataset.
Path to dataset files: /kaggle/input/20-skin-diseases-dataset


Chuẩn bị data

In [30]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

data_dir = "/kaggle/input/20-skin-diseases-dataset/Dataset"

# Kiểm tra xem có thư mục train/test không
print(os.listdir(data_dir))

#Tăng đa dạng ảnh
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20, #xoay ảnh
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# --- Train generator ---
train_generator = train_datagen.flow_from_directory(
    os.path.join(data_dir, 'train'),   # đường dẫn train
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical'
)

# --- Test generator ---
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    os.path.join(data_dir, 'test'),    # đường dẫn test
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical'  #one-hot
)


['test', 'train']
Found 2609 images belonging to 19 classes.
Found 897 images belonging to 20 classes.


In [34]:
path = '/kaggle/input/20-skin-diseases-dataset/Dataset/train/'

In [35]:
class_names = sorted(os.listdir(path))
num_classes = len(class_names)

img_size = (192, 192, 3) #size,size,màu

print('classes: ', class_names)

classes:  ['Acne and Rosacea Photos', 'Actinic Keratosis Basal Cell Carcinoma and other Malignant Lesions', 'Atopic Dermatitis Photos', 'Cellulitis Impetigo and other Bacterial Infections', 'Eczema Photos', 'Exanthems and Drug Eruptions', 'Herpes HPV and other STDs Photos', 'Light Diseases and Disorders of Pigmentation', 'Lupus and other Connective Tissue diseases', 'Melanoma Skin Cancer Nevi and Moles', 'Poison Ivy Photos and other Contact Dermatitis', 'Psoriasis pictures Lichen Planus and related diseases', 'Seborrheic Keratoses and other Benign Tumors', 'Systemic Disease', 'Tinea Ringworm Candidiasis and other Fungal Infections', 'Urticaria Hives', 'Vascular Tumors', 'Vasculitis Photos', 'Warts Molluscum and other Viral Infections']


In [36]:

import cv2

labels = []
images = []

print('images:\n')
for cl in class_names:
    print(cl, end=' -> ')
    for img in os.listdir(path + cl):
        label = np.zeros(num_classes)
        label[class_names.index(cl)] = 1
        labels.append(label)

        image = np.asarray(cv2.resize(cv2.imread(path + cl + '/' + img, cv2.IMREAD_COLOR), img_size[0:2])[:, :, ::-1])#That line remove Blue

        #image = np.asarray(cv2.resize(cv2.imread(data_path + cl + '/' + img, cv2.COLOR_BGR2RGB), img_size[0:2]))
        images.append(image)
    print('done')


labels = np.asarray(labels)
images = np.asarray(images)

print(f'\n\nlabels shape: {labels.shape}')
print(f'images shape: {images.shape}')

images:

Acne and Rosacea Photos -> done
Actinic Keratosis Basal Cell Carcinoma and other Malignant Lesions -> done
Atopic Dermatitis Photos -> done
Cellulitis Impetigo and other Bacterial Infections -> done
Eczema Photos -> done
Exanthems and Drug Eruptions -> done
Herpes HPV and other STDs Photos -> done
Light Diseases and Disorders of Pigmentation -> done
Lupus and other Connective Tissue diseases -> done
Melanoma Skin Cancer Nevi and Moles -> done
Poison Ivy Photos and other Contact Dermatitis -> done
Psoriasis pictures Lichen Planus and related diseases -> done
Seborrheic Keratoses and other Benign Tumors -> done
Systemic Disease -> done
Tinea Ringworm Candidiasis and other Fungal Infections -> done
Urticaria Hives -> done
Vascular Tumors -> done
Vasculitis Photos -> done
Warts Molluscum and other Viral Infections -> done


labels shape: (2609, 19)
images shape: (2609, 192, 192, 3)


# Chia train và test

In [37]:
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.15, random_state=42)

print(f'train images shape: {X_train.shape}\ntrain labels shape: {y_train.shape}\n\nvalidation images shape: {X_val.shape}\nvalidation labels shape: {y_val.shape}\n')

train images shape: (2217, 192, 192, 3)
train labels shape: (2217, 19)

validation images shape: (392, 192, 192, 3)
validation labels shape: (392, 19)



In [38]:
import tensorflow as tf

cnn_model = tf.keras.Sequential()

#chuẩn hóa ảnh pixel từ [0, 255] → [0, 1]
cnn_model.add(tf.keras.layers.Rescaling(scale=1. / 255, input_shape=(img_size)))

# Convolutional Dò tìm các đặc trưng của ảnh - 64 filter size 3x3 , relu - loải bỏ âm , same - cùng size input
cnn_model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn_model.add(tf.keras.layers.MaxPooling2D(pool_size=2))

# Convolutional
cnn_model.add(tf.keras.layers.Conv2D(128, (2, 2), activation='relu', padding='same'))
cnn_model.add(tf.keras.layers.Conv2D(128, (2, 2), activation='relu', padding='same'))
cnn_model.add(tf.keras.layers.MaxPooling2D(pool_size=2))

# Convolutional block 3
cnn_model.add(tf.keras.layers.Conv2D(256, (2, 2), activation='relu', padding='same'))
cnn_model.add(tf.keras.layers.Conv2D(256, (2, 2), activation='relu', padding='same'))
#Giảm số lượng tham số, tránh overfitting - giảm 2 lần
cnn_model.add(tf.keras.layers.MaxPooling2D(pool_size=2))

# Convolutional block 4
cnn_model.add(tf.keras.layers.Conv2D(512, (2, 2), activation='relu', padding='same'))
cnn_model.add(tf.keras.layers.Conv2D(512, (2, 2), activation='relu', padding='same'))
cnn_model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
#Chuyển từ ma trận 2D thành vector 1D
cnn_model.add(tf.keras.layers.Flatten())

# Dense block - Thực hiện phân loại / dự đoán dựa trên đặc trưng trích xuất.
cnn_model.add(tf.keras.layers.Dense(256, activation='relu'))
cnn_model.add(tf.keras.layers.Dense(128, activation='relu'))
cnn_model.add(tf.keras.layers.Dense(64, activation='relu'))
cnn_model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))


cnn_model.compile(optimizer='Adamax', loss='categorical_crossentropy', metrics=['accuracy'])

cnn_model.summary()


  super().__init__(**kwargs)


In [41]:
from tensorflow.keras.callbacks import ModelCheckpoint

# tạo folder lưu model nếu chưa có
import os
os.makedirs('cnn_model', exist_ok=True)

# Tạo callback ModelCheckpoint
checkpoint_callback = ModelCheckpoint(
    filepath='cnn_model/model{epoch:02d}.keras',  # <--- thêm .keras
    save_best_only=False,  # True nếu chỉ muốn lưu model tốt nhất
    save_weights_only=False,  # True nếu chỉ lưu weights
    verbose=1
)


In [None]:
history = cnn_model.fit(images, labels, epochs=20, validation_data=(X_val, y_val), callbacks=[checkpoint_callback])

Epoch 1/20
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24s/step - accuracy: 0.2265 - loss: 2.7257 
Epoch 1: saving model to cnn_model/model01.keras
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2037s[0m 25s/step - accuracy: 0.2272 - loss: 2.7231 - val_accuracy: 0.3265 - val_loss: 2.3534
Epoch 2/20
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24s/step - accuracy: 0.3136 - loss: 2.3570 
Epoch 2: saving model to cnn_model/model02.keras
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2073s[0m 25s/step - accuracy: 0.3137 - loss: 2.3568 - val_accuracy: 0.3265 - val_loss: 2.3070
Epoch 3/20
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23s/step - accuracy: 0.3329 - loss: 2.2851 
Epoch 3: saving model to cnn_model/model03.keras
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2009s[0m 24s/step - accuracy: 0.3330 - loss: 2.2850 - val_accuracy: 0.3469 - val_loss: 2.2492
Epoch 4/20
[1m82/82[0m [32m━━━━━━━━━━

In [None]:

accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(accuracy))

plt.figure()
plt.plot(epochs, accuracy, label='Training Accuracy')
plt.plot(epochs, loss, label='Training Loss')
plt.legend()
plt.title('Training Accuracy and Loss')

plt.figure()
plt.plot(epochs, val_accuracy, label='Validation Accuracy')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.legend()
plt.title('Validation Accuracy and Loss')

plt.show()
