In [None]:
# dermai_skin_disease_classifier.py

# 1. Import Required Libraries
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# 2. Load Dataset
IMAGE_DIR = 'HAM10000_images_part_1'  # Path to images folder
CSV_PATH = 'HAM10000_metadata.csv'    # Path to CSV file

metadata = pd.read_csv(CSV_PATH)
print("[INFO] Classes:\n", metadata['dx'].value_counts())

# 3. Preprocess Images
images = []
labels = []

for idx, row in metadata.iterrows():
    img_path = os.path.join(IMAGE_DIR, row['image_id'] + '.jpg')
    img = cv2.imread(img_path)
    if img is not None:
        img = cv2.resize(img, (224, 224))
        images.append(img)
        labels.append(row['dx'])

images = np.array(images) / 255.0  # Normalize
labels = np.array(labels)

# 4. Encode Labels
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)
labels_categorical = to_categorical(labels_encoded)

# 5. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    images, labels_categorical, test_size=0.2, random_state=42)

# 6. Build CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(7, activation='softmax')  # 7 classes
])

# 7. Compile Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 8. Train Model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# 9. Evaluate Model
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

class_names = le.classes_
print("\nClassification Report:\n", classification_report(y_true, y_pred, target_names=class_names))

# 10. Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# 11. ROC AUC Score (One-vs-Rest)
auc_score = roc_auc_score(y_test, y_pred_probs, multi_class='ovr')
print(f"Multiclass ROC AUC Score: {auc_score:.4f}")

# 12. Accuracy Plot
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()

# 13. Save Model
model.save('dermai_cnn_model.h5')
print("[INFO] Model saved as 'dermai_cnn_model.h5'")
