In [7]:
import os
import pandas as pd
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
import random

In [8]:
random.seed(42)

## Preprocessing

In [9]:
import os
import cv2
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the path to your dataset
dataset_path =r"D:\MS-AI\2ndSem\AIPM\Lab\AIPM---Skin-Disease-Detection\dataset"

# Define image size
image_size = (64, 64)

# Create an ImageDataGenerator for data augmentation
datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values to [0, 1]
    rotation_range=30,  # Random rotation between -30 and 30 degrees
    horizontal_flip=True,  # Random horizontal flip
)

# This will store image data and labels
metadata = []

def preprocess_images_from_folder(folder_path, class_name):
    """
    Process all images from a folder and store the image data and labels into the metadata list.
    :param folder_path: The path to the folder containing images
    :param class_name: The class name (folder name) of the images
    """
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith(('png', 'jpg', 'jpeg')):
                img_path = os.path.join(root, file)
                
                # Read the image using OpenCV
                img = cv2.imread(img_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR (OpenCV) to RGB

                # Resize the image to the target size
                img_resized = cv2.resize(img, image_size)

                # Normalize the image
                img_resized_norm = img_resized / 255.0

                # Add the image data (as a 3D array) and label to metadata
                metadata.append({
                    'image_data': img_resized_norm,
                    'class': class_name
                })

                # Apply augmentation if necessary (not saving augmented images for CSV)
                img_resized_norm = np.expand_dims(img_resized_norm, axis=0)  # Add batch dimension
                augmented_gen = datagen.flow(img_resized_norm, batch_size=1)

                for _ in range(1):  # Generate augmented images
                    augmented_img = next(augmented_gen)[0]

                    # Add augmented image data (as a 3D array) and label to metadata
                    metadata.append({
                        'image_data': augmented_img,
                        'class': class_name
                    })

def process_dataset(dataset_path):
    """
    Loop through the 'train' and 'test' directories and preprocess all images
    :param dataset_path: The root path of the dataset
    """
    # Process the 'train' folder
    train_dir = os.path.join(dataset_path, 'train')
    for class_name in os.listdir(train_dir):
        class_path = os.path.join(train_dir, class_name)
        preprocess_images_from_folder(class_path, class_name)

    # Process the 'test' folder
    test_dir = os.path.join(dataset_path, 'test')
    for class_name in os.listdir(test_dir):
        class_path = os.path.join(test_dir, class_name)
        preprocess_images_from_folder(class_path, class_name)

# Collect metadata and save to CSV
def save_metadata_to_csv(metadata):
    metadata_df = pd.DataFrame(metadata)
    metadata_df.to_csv('image_metadata.csv', index=False)
    print("Metadata saved to 'image_metadata.csv'")

# Save image data as a numpy file (.npz) containing arrays
def save_metadata_to_npz(metadata, filename='image_data.npz'):
    # Convert metadata to numpy arrays
    images = np.array([entry['image_data'] for entry in metadata])
    labels = np.array([entry['class'] for entry in metadata])

    # Save both images and labels in a compressed .npz file
    np.savez_compressed(filename, images=images, labels=labels)
    print(f"Metadata saved to {filename}")


# Run the preprocessing
process_dataset(dataset_path)

# Usage
save_metadata_to_npz(metadata)


Metadata saved to image_data.npz


In [10]:
# Loading data from the .npz file
data = np.load('image_data.npz')
X = data['images']  # This will have shape (num_samples, 64, 64, 3)
y = data['labels']

In [11]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Convert text labels to numeric labels
y = label_encoder.fit_transform(y)
y_categorical = to_categorical(y, num_classes=23)
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)
X_train = np.array(X_train, dtype=np.float32)


## Grid Search

## Model creation

In [12]:
# Initialize the CNN model
model = Sequential()

# Add convolutional and pooling layers
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

# Add fully connected layers
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))  # Dropout for regularization
model.add(Dense(23, activation='softmax')) 

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Display model summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
# Split data manually (instead of using validation_split)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Train the model with explicit validation data
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))


# Save the trained model
model.save('skin_disease_model.h5')


Epoch 1/10
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 35ms/step - accuracy: 0.0977 - loss: 2.9986 - val_accuracy: 0.1355 - val_loss: 2.9142
Epoch 2/10
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 34ms/step - accuracy: 0.1253 - loss: 2.9067 - val_accuracy: 0.1352 - val_loss: 2.8901
Epoch 3/10
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 34ms/step - accuracy: 0.1338 - loss: 2.8737 - val_accuracy: 0.1379 - val_loss: 2.8662
Epoch 4/10
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 34ms/step - accuracy: 0.1457 - loss: 2.8262 - val_accuracy: 0.1502 - val_loss: 2.8336
Epoch 5/10
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 34ms/step - accuracy: 0.1671 - loss: 2.7647 - val_accuracy: 0.1529 - val_loss: 2.8201
Epoch 6/10
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 34ms/step - accuracy: 0.1713 - loss: 2.7218 - val_accuracy: 0.1542 - val_loss: 2.8189
Epoch 7/10
[1m7



In [14]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')


[1m245/245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.1765 - loss: 2.8355
Test Accuracy: 17.59%


In [None]:
# Train the model with explicit validation data
historymodel2 = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))


# Save the trained model
model.save('skin_disease_model2.h5')
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy for model 2: {accuracy * 100:.2f}%')