In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import KFold

2024-07-04 06:47:08.367382: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-04 06:47:08.367490: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-04 06:47:08.552526: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
import os
import shutil
import random
from sklearn.model_selection import train_test_split

def split_dataset(source_dir, target_dir, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    # Create target directories
    for split in ['train', 'val', 'test']:
        split_dir = os.path.join(target_dir, split)
        if not os.path.exists(split_dir):
            os.makedirs(split_dir)

    # Get all class folders
    class_dirs = [d for d in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, d))]

    for class_dir in class_dirs:
        # Create class directories in train, val, and test
        for split in ['train', 'val', 'test']:
            class_split_dir = os.path.join(target_dir, split, class_dir)
            if not os.path.exists(class_split_dir):
                os.makedirs(class_split_dir)

        # Get all images in the class
        class_path = os.path.join(source_dir, class_dir)
        images = [img for img in os.listdir(class_path) if img.endswith(('.png', '.jpg', '.jpeg'))]

        # Split the data
        train_val, test = train_test_split(images, test_size=test_ratio, random_state=42)
        train, val = train_test_split(train_val, test_size=val_ratio/(train_ratio+val_ratio), random_state=42)

        # Copy images to respective directories
        for img in train:
            shutil.copy(os.path.join(class_path, img), os.path.join(target_dir, 'train', class_dir, img))
        for img in val:
            shutil.copy(os.path.join(class_path, img), os.path.join(target_dir, 'val', class_dir, img))
        for img in test:
            shutil.copy(os.path.join(class_path, img), os.path.join(target_dir, 'test', class_dir, img))

    print("Dataset split completed.")

# Usage
source_directory = '/kaggle/input/skin-diseases-image-dataset/IMG_CLASSES'
target_directory = '/kaggle/working/'

split_dataset(source_directory, target_directory)

Dataset split completed.


In [4]:

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# Check for available GPUs and configure TensorFlow to use them
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    print(f"Found {len(physical_devices)} GPU(s)")
    for device in physical_devices:
        tf.config.experimental.set_memory_growth(device, True)
    print("GPU is enabled")
else:
    print("No GPU found. Running on CPU")


Found 2 GPU(s)
GPU is enabled


In [5]:


# Set paths
base_dir = '/kaggle/working/dataset'
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')


In [6]:

# ImageDataGenerator for data augmentation and rescaling
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='reflect',
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input
)

valid_datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=tf.keras.applications.efficientnet.preprocess_input)
test_datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=tf.keras.applications.efficientnet.preprocess_input)


In [9]:


# Load datasets
batch_size = 32
img_size = (224, 224)
train_generator = train_datagen.flow_from_directory('/kaggle/working/train', target_size=img_size,
                                                    batch_size=batch_size, class_mode='categorical')
valid_generator = valid_datagen.flow_from_directory('/kaggle/working/val', target_size=img_size,
                                                    batch_size=batch_size, class_mode='categorical')
test_generator = test_datagen.flow_from_directory('/kaggle/working/test', target_size=img_size,
                                                  batch_size=batch_size, class_mode='categorical', shuffle=False)


Found 18997 images belonging to 10 classes.
Found 4078 images belonging to 10 classes.
Found 4078 images belonging to 10 classes.


In [10]:

# Function to create the model
def create_model():
    base_model = EfficientNetB0(input_shape=(*img_size, 3), include_top=False, weights='imagenet')
    base_model.trainable = False

    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dropout(0.5),
        Dense(256, activation='relu', kernel_regularizer=l2(0.01)),
        Dropout(0.5),
        Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
        Dropout(0.3),
        Dense(10, activation='softmax')
    ])

    return model


In [11]:

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)

# Training parameters
epochs = 50
n_splits = 5

In [12]:


# K-fold Cross-validation
kfold = KFold(n_splits=n_splits, shuffle=True)
cv_scores = []

for fold, (train_index, val_index) in enumerate(kfold.split(train_generator.filenames)):
    print(f"Fold {fold+1}/{n_splits}")
    
    model = create_model()
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    

Fold 1/5
Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Fold 2/5
Fold 3/5
Fold 4/5
Fold 5/5


IndentationError: unexpected indent (2010335002.py, line 2)

In [13]:

    # Train the model
    history = model.fit(
        train_generator,
        steps_per_epoch=len(train_index) // batch_size,
        epochs=epochs,
        validation_data=valid_generator,
        validation_steps=len(val_index) // batch_size,
        callbacks=[early_stopping, reduce_lr]
    )
    

Epoch 1/50


  self._warn_if_super_not_called()


[1m  2/474[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m25s[0m 54ms/step - accuracy: 0.1016 - loss: 8.1917   

I0000 00:00:1720076374.001502     183 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1720076374.059685     183 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m288/474[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m1:46[0m 571ms/step - accuracy: 0.2831 - loss: 3.9898

W0000 00:00:1720076537.845674     183 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 533ms/step - accuracy: 0.2861 - loss: 3.4786

W0000 00:00:1720076632.973195     184 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m323s[0m 589ms/step - accuracy: 0.2861 - loss: 3.4767 - val_accuracy: 0.2934 - val_loss: 2.1488 - learning_rate: 0.0010
Epoch 2/50
[1m120/474[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m2:36[0m 442ms/step - accuracy: 0.2735 - loss: 2.1641

  self.gen.throw(typ, value, traceback)


[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 130ms/step - accuracy: 0.2875 - loss: 2.1496 - val_accuracy: 0.3179 - val_loss: 2.1158 - learning_rate: 0.0010
Epoch 3/50


W0000 00:00:1720076713.750552     184 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m244s[0m 508ms/step - accuracy: 0.2916 - loss: 2.1408 - val_accuracy: 0.2990 - val_loss: 2.1220 - learning_rate: 0.0010
Epoch 4/50
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 114ms/step - accuracy: 0.2887 - loss: 2.1342 - val_accuracy: 0.2417 - val_loss: 2.1972 - learning_rate: 0.0010
Epoch 5/50
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 502ms/step - accuracy: 0.2898 - loss: 2.1382 - val_accuracy: 0.2953 - val_loss: 2.1274 - learning_rate: 0.0010
Epoch 6/50
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 111ms/step - accuracy: 0.2932 - loss: 2.1357 - val_accuracy: 0.2781 - val_loss: 2.1416 - learning_rate: 0.0010
Epoch 7/50
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 517ms/step - accuracy: 0.2953 - loss: 2.1273 - val_accuracy: 0.2945 - val_loss: 2.1272 - learning_rate: 0.0010
Epoch 8/50
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

In [14]:

    # Evaluate the model
    scores = model.evaluate(test_generator)
    cv_scores.append(scores[1])
    print(f"Fold {fold+1} - Test accuracy: {scores[1]*100:.2f}%")

[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 171ms/step - accuracy: 0.1707 - loss: 2.2658
Fold 5 - Test accuracy: 29.33%


In [None]:

    # Fine-tuning
    base_model = model.layers[0]
    base_model.trainable = True
    for layer in base_model.layers[:-30]:
        layer.trainable = False
    
    model.compile(optimizer=Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
    
    history_fine = model.fit(
        train_generator,
        steps_per_epoch=len(train_index) // batch_size,
        epochs=epochs // 2,  # Train for fewer epochs during fine-tuning
        validation_data=valid_generator,
        validation_steps=len(val_index) // batch_size,
        callbacks=[early_stopping, reduce_lr]
    )
    
   

Epoch 1/25
[1m  2/474[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m29s[0m 63ms/step - accuracy: 0.2734 - loss: 2.1740   

W0000 00:00:1720078796.582608     186 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 470ms/step - accuracy: 0.2892 - loss: 2.1501

W0000 00:00:1720079024.644946     184 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m294s[0m 522ms/step - accuracy: 0.2892 - loss: 2.1501 - val_accuracy: 0.2921 - val_loss: 2.1381 - learning_rate: 1.0000e-05
Epoch 2/25
[1m 45/474[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m7:31[0m 1s/step - accuracy: 0.3029 - loss: 2.1261 

W0000 00:00:1720079089.767981     184 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 174ms/step - accuracy: 0.2914 - loss: 2.1461 - val_accuracy: 0.2517 - val_loss: 2.1894 - learning_rate: 1.0000e-05
Epoch 3/25


W0000 00:00:1720079125.698972     183 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 504ms/step - accuracy: 0.2857 - loss: 2.1452 - val_accuracy: 0.2924 - val_loss: 2.1381 - learning_rate: 1.0000e-05
Epoch 4/25
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 112ms/step - accuracy: 0.2909 - loss: 2.1420 - val_accuracy: 0.2947 - val_loss: 2.1099 - learning_rate: 1.0000e-05
Epoch 5/25
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 504ms/step - accuracy: 0.2901 - loss: 2.1461 - val_accuracy: 0.2934 - val_loss: 2.1369 - learning_rate: 1.0000e-05
Epoch 6/25
[1m 12/474[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:22[0m 439ms/step - accuracy: 0.2711 - loss: 2.1595

In [None]:

    
    # Plot training & validation accuracy and loss
    plt.figure(figsize=(14, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title(f'Fold {fold+1} - Training and Validation Accuracy')
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title(f'Fold {fold+1} - Training and Validation Loss')
    plt.show()

   
    
    # Evaluate after fine-tuning
    scores_fine = model.evaluate(test_generator)
    print(f"Fold {fold+1} (after fine-tuning) - Test accuracy: {scores_fine[1]*100:.2f}%")
    
    # Plot fine-tuning results
    plt.figure(figsize=(14, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history_fine.history['accuracy'], label='Training Accuracy')
    plt.plot(history_fine.history['val_accuracy'], label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title(f'Fold {fold+1} (Fine-tuning) - Training and Validation Accuracy')
    
    plt.subplot(1, 2, 2)
    plt.plot(history_fine.history['loss'], label='Training Loss')
    plt.plot(history_fine.history['val_loss'], label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title(f'Fold {fold+1} (Fine-tuning) - Training and Validation Loss')
    plt.show()

print(f"Average CV accuracy: {np.mean(cv_scores)*100:.2f}% (+/- {np.std(cv_scores)*100:.2f}%)")