# Skin Disease Model

## Import all the Dependencies



In [None]:
import numpy as np
from tensorflow.keras import models, layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import img_to_array
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import backend as K

In [None]:
import zipfile
zip_file_path = '/content/archive (16).zip'

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
  zip_ref.extractall('/content')

## Set all the Constants


In [None]:
INIT_LR = 0.000001
BATCH_SIZE = 32
EPOCHS = 50
IMAGE_SIZE =224
default_image_size = tuple((IMAGE_SIZE, IMAGE_SIZE))
image_size = 0
data_dir = "/content/skin-disease-datasaet/train_set"
CHANNELS=3
AUTOTUNE = tf.data.AUTOTUNE

# Initializing, Exploring & Partioning the Dataset

## Function to Split Dataset


Function to Split Dataset
Dataset should be bifurcated into 3 subsets, namely:

1. Training: Dataset to be used while training
2. Validation: Dataset to be tested against while training
3. Test: Dataset to be tested against after we trained a model

In [None]:
import tensorflow as tf

IMAGE_SIZE = 224
BATCH_SIZE = 32
SEED = 123

# Load full training dataset from train folder
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "/content/skin-disease-datasaet/train_set",
    seed=SEED,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE
)

# Load full test dataset from test folder
full_test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "/content/skin-disease-datasaet/test_set",
    seed=SEED,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=1,  # Important for selecting exact number of images
    shuffle=True   # Shuffle so we get random 20 images for test
)

# Flatten test dataset to individual images
full_test_ds = full_test_ds.unbatch()

# Take first 20 images for test
test_ds = full_test_ds.take(20).batch(BATCH_SIZE)

# Skip first 20 and use the rest as validation
val_ds = full_test_ds.skip(20).batch(BATCH_SIZE)

# Prefetch for performance
train_ds = train_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=tf.data.AUTOTUNE)


Found 924 files belonging to 8 classes.
Found 233 files belonging to 8 classes.


## Reading and Partitioning the Dataset

We create a Tensorflow Dataset Object and directly read it from the directory using image_dataset_from_directory and then split it using the function we created above

In [None]:
dataset = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  seed=123,
  image_size=default_image_size,
  batch_size=BATCH_SIZE
)


train_ds, val_ds, test_ds = get_dataset_partitions_tf(dataset)

Found 924 files belonging to 8 classes.


## Checking the Available Classes


In [None]:

class_names = dataset.class_names
n_classes = len(class_names)
print(n_classes, class_names)

8 ['BA- cellulitis', 'BA-impetigo', 'FU-athlete-foot', 'FU-nail-fungus', 'FU-ringworm', 'PA-cutaneous-larva-migrans', 'VI-chickenpox', 'VI-shingles']


## Displaying Some Sample Images


In [None]:

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

## Checking Batch Size


In [None]:

for image_batch, labels_batch in train_ds:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

## Cache, Shuffle, and Prefetch the Dataset


In [None]:
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)

In [None]:
test_ds

# Building the Model


## Creating a Layer for Resizing and Normalization


Before we feed our images to network, we should be resizing it to the desired size. Moreover, to improve model performance, we should normalize the image pixel value (keeping them in range 0 and 1 by dividing by 256). This should happen while training as well as inference. Hence we can add that as a layer in our Sequential Model.

In [None]:
resize_and_rescale = tf.keras.Sequential([
  layers.Resizing(IMAGE_SIZE, IMAGE_SIZE),
  layers.Rescaling(1./255),
])

## Data Augmentation


Data Augmentation is needed when we have less data, this boosts the accuracy of our model by augmenting the data.

In [None]:
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
     layers.RandomFlip("vertical") ,   # top-bottom flip
    # layers.RandomTranslation(0.1, 0.1),  # 10% horizontal & vertical shift
    layers.RandomRotation(0.2),
    layers.RandomBrightness(0.2),
    layers.RandomZoom(0.1),
    layers.RandomTranslation(0.1, 0.1),
    layers.RandomContrast(0.1),
])

## Checking what is the expected dimension order for channel


In [None]:

input_shape = (IMAGE_SIZE, IMAGE_SIZE, CHANNELS)
batch_input_shape = (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS)
chanDim = -1
if K.image_data_format() == "channels_first":
    input_shape = (CHANNELS, IMAGE_SIZE, IMAGE_SIZE)
    batch_input_shape = (BATCH_SIZE, CHANNELS, IMAGE_SIZE, IMAGE_SIZE)
    chanDim = 1

## Model Architecture


We use a CNN coupled with a Softmax activation in the output layer. We also add the initial layers for resizing, normalization and Data Augmentation.



In [None]:
model = models.Sequential([
    resize_and_rescale,
    data_augmentation,
    layers.Conv2D(32, kernel_size = (3,3), activation='relu', input_shape=input_shape),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64,  kernel_size = (3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64,  kernel_size = (3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    # layers.Conv2D(128, (3, 3), activation='relu'),
    # layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(n_classes, activation='softmax'),
])

model.build(input_shape=batch_input_shape)

## Model Summary


In [None]:
# model.summary()


## Compiling the Model

We use adam Optimizer, SparseCategoricalCrossentropy for losses, accuracy as a metric

In [None]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

## Training the Network


In [None]:

# history = model.fit(
#     train_ds,
#     batch_size=BATCH_SIZE,
#     validation_data=val_ds,
#     verbose=1,
#     epochs=20,
# )

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

# Callback for saving best model based on validation accuracy
checkpoint = ModelCheckpoint(
    "best_model.h5",                # file name for saving best model
    monitor="val_accuracy",         # metric to monitor
    mode="max",                     # because higher accuracy is better
    save_best_only=True,            # only save the best one
    verbose=1
)

history = model.fit(
    train_ds,
    steps_per_epoch=47,
    epochs=20,
    validation_data=val_ds,
    callbacks=[checkpoint]
)


## Testing the Model


In [None]:
print("[INFO] Calculating model accuracy")
scores = model.evaluate(test_ds)
print(f"Test Accuracy: {round(scores[1],4)*100}%")

## Plotting the Accuracy and Loss Curves


In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(len(history.history['accuracy']))

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

## Saving the Model


We append the model to the list of models as a new version



In [None]:

# # import os
# # model_version = max([int(i) for i in (os.listdir("../models")+[0])]) + 1
# model.save(f"/content/modle.h5")

## Function for Inference


In [None]:
# model.save(f"/content/modle.h5")
# def predict(model, img):
#     img_array = tf.keras.preprocessing.image.img_to_array(images[i].numpy())
#     img_array = tf.expand_dims(img_array, 0) # Create a batch

#     predictions = model.predict(img_array)

#     predicted_class = class_names[np.argmax(predictions[0])]
#     confidence = round(100 * (np.max(predictions[0])), 2)
#     return predicted_class, confidence

## Plotting the Inference Data

In [None]:
# plt.figure(figsize=(15, 15))
# for images, labels in test_ds.take(1):
#     for i in range(9):
#         ax = plt.subplot(3, 3, i + 1)
#         plt.imshow(images[i].numpy().astype("uint8"))
#         predicted_class, confidence = predict(model, images[i].numpy())

#         actual_class = class_names[labels[i]]
#         plt.title(f"Actual: {actual_class},\n Predicted: {predicted_class}.\n Confidence: {confidence}%")
#         plt.axis("off")

In [None]:
# import tensorflow as tf
# from tensorflow.keras.applications import ResNet50
# from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
# from tensorflow.keras.models import Model


# # Pretrained ResNet50 base
# # inputs = tf.keras.Input(shape=(224,224,3))
# # x = data_augmentation(inputs)
# base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224,224,3))

# # Freeze base layers
# base_model.trainable = False

# # Add custom classifier
# x = GlobalAveragePooling2D()(base_model.output)
# x = Dense(512, activation='relu')(x)
# x = Dropout(0.4)(x)
# output = Dense(n_classes, activation='softmax')(x) # Use n_classes instead of hardcoded 8

# model = Model(inputs=base_model.input, outputs=output)

# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
#               loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), # Change loss function
#               metrics=['accuracy'])

In [None]:
# history = model.fit(
#     train_ds,
#     batch_size=BATCH_SIZE,
#     validation_data=val_ds,
#     verbose=1,
#     epochs=8,
# )

In [None]:
# acc = history.history['accuracy']
# val_acc = history.history['val_accuracy']

# loss = history.history['loss']
# val_loss = history.history['val_loss']

# epochs_range = range(len(history.history['accuracy']))

# plt.figure(figsize=(8, 8))
# plt.subplot(1, 2, 1)
# plt.plot(epochs_range, acc, label='Training Accuracy')
# plt.plot(epochs_range, val_acc, label='Validation Accuracy')
# plt.legend(loc='lower right')
# plt.title('Training and Validation Accuracy')

# plt.subplot(1, 2, 2)
# plt.plot(epochs_range, loss, label='Training Loss')
# plt.plot(epochs_range, val_loss, label='Validation Loss')
# plt.legend(loc='upper right')
# plt.title('Training and Validation Loss')
# plt.show()

In [None]:
# def predict(model, img):
#     img_array = tf.keras.preprocessing.image.img_to_array(images[i].numpy())
#     img_array = tf.expand_dims(img_array, 0) # Create a batch

#     predictions = model.predict(img_array)

#     predicted_class = class_names[np.argmax(predictions[0])]
#     confidence = round(100 * (np.max(predictions[0])), 2)
#     return predicted_class, confidence

In [None]:
# print("[INFO] Calculating model accuracy")
# scores = model.evaluate(test_ds)
# print(f"Test Accuracy: {round(scores[1],4)*100}%")

In [None]:
# import matplotlib.pyplot as plt

# # Assuming test_ds is created from test_set folder like:
# # test_ds = image_dataset_from_directory("dataset/test_set", ...)

# # You can hardcode the folder name here since you're looping over test_ds
# origin_folder = "test_set"

# plt.figure(figsize=(15, 15))
# for images, labels in test_ds.take(1):
#     for i in range(9):
#         ax = plt.subplot(3, 3, i + 1)
#         plt.imshow(images[i].numpy().astype("uint8"))

#         # Assuming predict returns: (predicted_class: str, confidence: float)
#         predicted_class, _ = predict(model, images[i].numpy())

#         actual_class = class_names[labels[i]]
#         plt.title(f"Folder: {origin_folder}\nActual: {actual_class}\nPredicted: {predicted_class}")
#         plt.axis("off")

# plt.tight_layout()
# plt.show()


In [None]:
# model.save(f"/content/modle.h5")
