In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("pacificrm/skindiseasedataset")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'skindiseasedataset' dataset.
Path to dataset files: /kaggle/input/skindiseasedataset


In [None]:
import os
print(os.listdir(path))

['SkinDisease', 'Readme.md']


In [None]:
skin_path = os.path.join(path, "SkinDisease")
print(os.listdir(skin_path))

['SkinDisease']


In [None]:
skin_path1 = os.path.join(skin_path,"SkinDisease")
print(os.listdir(skin_path1))

['test', 'train']


In [None]:
train_path = os.path.join(skin_path1,'train')
print(os.listdir(train_path))

['Benign_tumors', 'Unknown_Normal', 'Tinea', 'Eczema', 'Actinic_Keratosis', 'Vascular_Tumors', 'Acne', 'Infestations_Bites', 'Rosacea', 'Seborrh_Keratoses', 'Moles', 'Vitiligo', 'SkinCancer', 'Vasculitis', 'Lichen', 'Candidiasis', 'DrugEruption', 'Sun_Sunlight_Damage', 'Bullous', 'Warts', 'Psoriasis', 'Lupus']


In [None]:
#import the req lib
import cv2
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt

import torch
from torchvision import transforms

In [None]:
#loading of the image and converting to color space ->hsv
def load_image(path):
  img = cv2.imread(path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  return img

In [None]:
def resize_img(img,size=(128,128)):
  return cv2.resize(img,size,interpolation=cv2.INTER_AREA)

In [None]:
#for now we'll leave watermark removal aside..compare the model with and without implementing it.
#also remove

In [None]:
#remove hair function
def remove_hair(img):
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # Morphological Blackhat to detect hair
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 17))
    blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel)

    # Threshold hair mask -->binarized..how understand again??
    _, mask = cv2.threshold(blackhat, 10, 255, cv2.THRESH_BINARY)

    # Inpaint hair pixels
    result = cv2.inpaint(img, mask, 1, cv2.INPAINT_TELEA)
    return result


In [None]:
#very important step sharpens the lesion(skin disease affected area)
#check the params carefully
def sharpen(img):
    blur = cv2.GaussianBlur(img, (0, 0), 3)
    sharp = cv2.addWeighted(img, 1.5, blur, -0.5, 0) #src1,alpha,src2,beta,gamma
    return sharp

In [None]:
keep_classes = [
    'Vitiligo','Acne','Psoriasis','Eczema','Tinea',
]

def preprocess(path):
  img = cv2.imread(path)
  if img is None:
    return None
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  img = resize_img(img,size=(128,128))
  img = remove_hair(img)
  img = sharpen(img)
  #normalize
  img = img.astype(np.float32)/255
  return img

In [None]:
#here the folder is going to be train_path and later test_path
def load_img_from_folder(folder):
  imgaes = []
  labels = []
  valid_classes = []

  class_names = sorted(os.listdir(folder))

  for class_name in class_names:

    if class_name not in keep_classes:
      continue

    class_path = os.path.join(folder,class_name)
    print(f"Loading class: {class_name}") #debug!

    if class_name not in valid_classes:
      valid_classes.append(class_name)

    for filename in os.listdir(class_path):
      img_path = os.path.join(class_path,filename)
      img = preprocess(img_path)
      if img is None:
        continue
      imgaes.append(img)
      labels.append(valid_classes.index(class_name))
  return imgaes,labels


In [None]:
X,y = load_img_from_folder(train_path)

Loading class: Acne
Loading class: Eczema
Loading class: Psoriasis
Loading class: Tinea
Loading class: Vitiligo


In [None]:
import tensorflow as tf
from tensorflow.keras import layers , models
from sklearn.model_selection import train_test_split

In [None]:
X = np.array(X)
y = np.array(y)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42,stratify=y)
#to ensure that there is equal class distribution!
#not that the split is betweeen train and valuation not test since we have a seperate folder for test

In [None]:
num_classes = len(set(y))
print(num_classes)
print(set(y))

#we had intially put 10 classes but some classes many unwanted images or corrupted files therfore we end with 8

5
{np.int64(0), np.int64(1), np.int64(2), np.int64(3), np.int64(4)}


In [None]:
#now we convert to categorical cols because if cols are numbers the models also sees these numbers as priority and gives more importancet to 1 and so on
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_val = tf.keras.utils.to_categorical(y_val, num_classes)


In [None]:
def build_model():
    num_classes = len(set(y))
    model = models.Sequential([

        # Block 1
        layers.Conv2D(32, (3,3), activation="relu", padding="same", input_shape=(128,128,3)),
        layers.Conv2D(32, (3,3), activation="relu", padding="same"),
        layers.MaxPooling2D(2,2),

        # Block 2
        layers.Conv2D(64, (3,3), activation="relu", padding="same"),
        layers.Conv2D(64, (3,3), activation="relu", padding="same"),
        layers.MaxPooling2D(2,2),

        # Block 3
        layers.Conv2D(128, (3,3), activation="relu", padding="same"),
        layers.Conv2D(128, (3,3), activation="relu", padding="same"),
        layers.MaxPooling2D(2,2),

        layers.Flatten(),
        layers.Dense(256, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax")
    ])

    model.compile(
        loss="categorical_crossentropy",
        optimizer="adam",
        metrics=["accuracy"]
    )

    return model

model = build_model()
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
print("X_train:", X_train.shape, X_train.dtype)
print("y_train:", y_train.shape, y_train.dtype)
print("X_val:", X_val.shape, X_val.dtype)
print("y_val:", y_val.shape, y_val.dtype)


X_train: (3248, 128, 128, 3) float32
y_train: (3248, 5) float64
X_val: (812, 128, 128, 3) float32
y_val: (812, 5) float64


In [None]:
# Fit the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=15,
    batch_size=16,     # safe batch size for Colab
    verbose=1
)

Epoch 1/15
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m425s[0m 2s/step - accuracy: 0.2566 - loss: 1.5801 - val_accuracy: 0.3559 - val_loss: 1.4505
Epoch 2/15
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m448s[0m 2s/step - accuracy: 0.3511 - loss: 1.4554 - val_accuracy: 0.3399 - val_loss: 1.4856
Epoch 3/15
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m446s[0m 2s/step - accuracy: 0.3603 - loss: 1.4291 - val_accuracy: 0.4002 - val_loss: 1.3947
Epoch 4/15
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m417s[0m 2s/step - accuracy: 0.4306 - loss: 1.3403 - val_accuracy: 0.4126 - val_loss: 1.3671
Epoch 5/15
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m459s[0m 2s/step - accuracy: 0.4595 - loss: 1.2529 - val_accuracy: 0.4557 - val_loss: 1.2665
Epoch 6/15
[1m203/203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m425s[0m 2s/step - accuracy: 0.5217 - loss: 1.1443 - val_accuracy: 0.4507 - val_loss: 1.2715
Epoch 7/15
[1m203/203

In [None]:

train_acc = history.history["accuracy"][-1]
val_acc = history.history["val_accuracy"][-1]

print(f"Training Accuracy: {train_acc:.4f}")
print(f"Validation Accuracy: {val_acc:.4f}")

Training Accuracy: 0.8747
Validation Accuracy: 0.5517
