In [None]:
# Install Kaggle API (if not installed)
!pip install -q kaggle


In [None]:
from google.colab import files

# Upload kaggle.json (downloaded from Kaggle settings)
files.upload()


Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"rakeshreddybokuri","key":"dacaeb5963376ca1dc3589d32d23fefb"}'}

In [None]:
# Create a hidden directory for Kaggle API key
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/

# Set proper permissions
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
# Create a Kaggle dataset directory if not exists
!mkdir -p ~/.kaggle

# Ensure correct permissions
!chmod 600 ~/.kaggle/kaggle.json

# Download the HAM10000 dataset from Kaggle
!kaggle datasets download -d kmader/skin-cancer-mnist-ham10000

# Unzip the dataset
!unzip -q skin-cancer-mnist-ham10000.zip -d data

# Remove the zip file to save space
!rm skin-cancer-mnist-ham10000.zip

# List the extracted files
!ls data


Dataset URL: https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000
License(s): CC-BY-NC-SA-4.0
Downloading skin-cancer-mnist-ham10000.zip to /content
100% 5.19G/5.20G [00:41<00:00, 212MB/s]
100% 5.20G/5.20G [00:41<00:00, 134MB/s]
ham10000_images_part_1	HAM10000_images_part_2	hmnist_28_28_RGB.csv
HAM10000_images_part_1	HAM10000_metadata.csv	hmnist_8_8_L.csv
ham10000_images_part_2	hmnist_28_28_L.csv	hmnist_8_8_RGB.csv


In [None]:
import pandas as pd

# Define the metadata file path
metadata_path = "data/HAM10000_metadata.csv"

# Read the metadata file
metadata = pd.read_csv(metadata_path, encoding="utf-8")

# Display basic information about the dataset
print(metadata.info())
print(metadata.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10015 entries, 0 to 10014
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   lesion_id     10015 non-null  object 
 1   image_id      10015 non-null  object 
 2   dx            10015 non-null  object 
 3   dx_type       10015 non-null  object 
 4   age           9958 non-null   float64
 5   sex           10015 non-null  object 
 6   localization  10015 non-null  object 
dtypes: float64(1), object(6)
memory usage: 547.8+ KB
None
     lesion_id      image_id   dx dx_type   age   sex localization
0  HAM_0000118  ISIC_0027419  bkl   histo  80.0  male        scalp
1  HAM_0000118  ISIC_0025030  bkl   histo  80.0  male        scalp
2  HAM_0002730  ISIC_0026769  bkl   histo  80.0  male        scalp
3  HAM_0002730  ISIC_0025661  bkl   histo  80.0  male        scalp
4  HAM_0001466  ISIC_0031633  bkl   histo  75.0  male          ear


In [None]:
# Check for missing values
print(metadata.isnull().sum())

# Fill missing values if necessary
metadata.fillna("Unknown", inplace=True)


lesion_id        0
image_id         0
dx               0
dx_type          0
age             57
sex              0
localization     0
dtype: int64


  metadata.fillna("Unknown", inplace=True)


In [None]:
# Check unique diagnosis classes
print(metadata["dx"].value_counts())


dx
nv       6705
mel      1113
bkl      1099
bcc       514
akiec     327
vasc      142
df        115
Name: count, dtype: int64


In [None]:
import os
import shutil

# Define paths
image_folders = ["data/HAM10000_images_part_1", "data/HAM10000_images_part_2"]
organized_path = "data/HAM10000_organized"

# Create main directory if not exists
os.makedirs(organized_path, exist_ok=True)

# Create a directory for each class (based on 'dx' column)
for class_name in metadata["dx"].unique():
    os.makedirs(os.path.join(organized_path, class_name), exist_ok=True)

# Move images to respective class folders
for _, row in metadata.iterrows():
    image_id = row["image_id"] + ".jpg"  # Add .jpg extension
    class_name = row["dx"]  # Get diagnosis class

    # Find the image in both folders
    src = None
    for folder in image_folders:
        src_path = os.path.join(folder, image_id)
        if os.path.exists(src_path):
            src = src_path
            break  # Stop searching once found

    # If image exists, move it to its class folder
    if src:
        dst = os.path.join(organized_path, class_name, image_id)
        shutil.move(src, dst)

print("✅ Images successfully organized into class folders!")


✅ Images successfully organized into class folders!


here


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define image size and batch size
img_size = (224, 224)  # EfficientNetB3 requires 224x224 images
batch_size = 32

# Set up ImageDataGenerator for preprocessing & augmentation
train_datagen = ImageDataGenerator(
    rescale=1.0/255.0,   # Normalize pixel values (0-1)
    rotation_range=20,   # Random rotation
    width_shift_range=0.2,  # Random horizontal shift
    height_shift_range=0.2,  # Random vertical shift
    shear_range=0.2,   # Shearing transformations
    zoom_range=0.2,   # Random zoom
    horizontal_flip=True,  # Randomly flip images
    validation_split=0.2   # 20% for validation
)

# Load training data (80%)
train_generator = train_datagen.flow_from_directory(
    "data/HAM10000_organized",
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

# Load validation data (20%)
val_generator = train_datagen.flow_from_directory(
    "data/HAM10000_organized",
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

# Get class labels
class_labels = list(train_generator.class_indices.keys())
print("Classes:", class_labels)


Found 8015 images belonging to 7 classes.
Found 2000 images belonging to 7 classes.
Classes: ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model

# Load EfficientNetB3 without the top classification layer
base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze EfficientNetB3 layers (so we only train our classifier at first)
base_model.trainable = False

# Add custom classification layers
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(512, activation='relu')(x)
x = Dropout(0.3)(x)  # Dropout to prevent overfitting
x = Dense(len(class_labels), activation='softmax')(x)  # Output layer with number of classes

# Create the final model
model = Model(inputs=base_model.input, outputs=x)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5
[1m43941136/43941136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# Train the model for a few epochs (EfficientNetB3 is still frozen)
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5  # Start with 5 epochs, adjust if needed
)


  self._warn_if_super_not_called()


Epoch 1/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m456s[0m 4s/step - accuracy: 0.6568 - loss: 1.2203 - val_accuracy: 0.6874 - val_loss: 1.1499
Epoch 2/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m446s[0m 4s/step - accuracy: 0.6582 - loss: 1.1873 - val_accuracy: 0.6874 - val_loss: 1.1129
Epoch 3/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m448s[0m 4s/step - accuracy: 0.6851 - loss: 1.1312 - val_accuracy: 0.6874 - val_loss: 1.0981
Epoch 4/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m444s[0m 4s/step - accuracy: 0.6837 - loss: 1.1444 - val_accuracy: 0.6874 - val_loss: 1.1267
Epoch 5/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m462s[0m 4s/step - accuracy: 0.6884 - loss: 1.1246 - val_accuracy: 0.6874 - val_loss: 1.1030


In [None]:
# Unfreeze EfficientNetB3 layers for fine-tuning
base_model.trainable = True

# Compile again with a lower learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train again (fine-tuning EfficientNetB3)
history_finetune = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5  # Train for more epochs if needed
)


Epoch 1/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1382s[0m 10s/step - accuracy: 0.1227 - loss: 2.7701 - val_accuracy: 0.6874 - val_loss: 1.4287
Epoch 2/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1272s[0m 10s/step - accuracy: 0.6529 - loss: 1.2195 - val_accuracy: 0.6874 - val_loss: 1.2454
Epoch 3/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1286s[0m 10s/step - accuracy: 0.7069 - loss: 0.9378 - val_accuracy: 0.6874 - val_loss: 1.2850
Epoch 4/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1283s[0m 10s/step - accuracy: 0.7182 - loss: 0.8413 - val_accuracy: 0.6874 - val_loss: 1.1426
Epoch 5/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1299s[0m 10s/step - accuracy: 0.7249 - loss: 0.8022 - val_accuracy: 0.6934 - val_loss: 0.8941


In [None]:
# Save the trained model
model.save("efficientnet_skin_model.h5")
print("Model saved successfully!")




Model saved successfully!


In [None]:
from tensorflow.keras.models import load_model

# Load the trained model
model = load_model("efficientnet_skin_model.h5")
print("Model loaded successfully!")




Model loaded successfully!


In [None]:
from tensorflow.keras.preprocessing import image
import numpy as np

# Define function to make predictions
def predict_skin_disease(image_path):
    img = image.load_img(image_path, target_size=(224, 224))  # Resize to match model input
    img_array = image.img_to_array(img) / 255.0  # Normalize pixel values
    img_array = np.expand_dims(img_array, axis=0)  # Expand dimensions for batch

    # Make prediction
    prediction = model.predict(img_array)
    predicted_class = class_labels[np.argmax(prediction)]  # Get class with highest probability
    confidence = round(np.max(prediction) * 100, 2)  # Confidence percentage

    return predicted_class, confidence


In [None]:
# Provide a sample image path from the dataset
test_image_path = "/content/data/HAM10000_organized/nv/ISIC_0024308.jpg"  # Replace with an actual image path

# Make a prediction
predicted_class, confidence = predict_skin_disease(test_image_path)

# Print the result
print(f"Predicted Class: {predicted_class}")
print(f"Confidence: {confidence}%")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
Predicted Class: nv
Confidence: 99.15%


In [None]:
from tensorflow.keras.models import load_model

# Load the trained model
model = load_model("/content/efficientnet_skin_model.h5")

print("Model loaded successfully!")




Model loaded successfully!


In [None]:
import tensorflow as tf
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# Recompile the model with the correct optimizer & loss function
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print("Model recompiled successfully!")


Model recompiled successfully!


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define image size and batch size
img_size = (224, 224)  # EfficientNetB3 input size
batch_size = 32

# Set up ImageDataGenerator for preprocessing & augmentation
train_datagen = ImageDataGenerator(
    rescale=1.0/255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
)

# Load training data (80%)
train_generator = train_datagen.flow_from_directory(
    "data/HAM10000_organized",
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

# Load validation data (20%)
val_generator = train_datagen.flow_from_directory(
    "data/HAM10000_organized",
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

# Get class labels
class_labels = list(train_generator.class_indices.keys())
print("Classes:", class_labels)


Found 4002 images belonging to 7 classes.
Found 998 images belonging to 7 classes.
Classes: ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']


In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# Get class counts from the training generator
class_counts = train_generator.classes

# Compute class weights to balance the dataset
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(class_counts), y=class_counts)

# Convert class weights to dictionary format
class_weight_dict = {i: class_weights[i] for i in range(len(class_labels))}

print("Class Weights:", class_weight_dict)


Class Weights: {0: 3.889212827988338, 1: 2.6841046277665996, 2: 1.2648546144121366, 3: 12.704761904761904, 4: 1.6428571428571428, 5: 0.20827478532396565, 6: 10.994505494505495}


In [None]:
history_weighted = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,  # Adjust if needed
    class_weight=class_weight_dict  # Apply computed class weights
)


  self._warn_if_super_not_called()


Epoch 1/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1528s[0m 12s/step - accuracy: 0.7207 - loss: 1.9670 - val_accuracy: 0.7285 - val_loss: 0.7768
Epoch 2/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1448s[0m 11s/step - accuracy: 0.7094 - loss: 1.5737 - val_accuracy: 0.7535 - val_loss: 0.7675
Epoch 3/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1445s[0m 11s/step - accuracy: 0.6787 - loss: 1.4374 - val_accuracy: 0.7064 - val_loss: 0.8549
Epoch 4/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1450s[0m 12s/step - accuracy: 0.6639 - loss: 1.3928 - val_accuracy: 0.7134 - val_loss: 0.8772
Epoch 5/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1476s[0m 12s/step - accuracy: 0.6547 - loss: 1.2020 - val_accuracy: 0.6974 - val_loss: 0.9113


In [None]:
# Save the model in Keras format
model.save("/content/efficientnet_skin_model_balanced.keras")

print("Model saved successfully in Keras format!")


Model saved successfully in Keras format!


In [None]:
from tensorflow.keras.models import load_model

# Load the trained model from .keras format
model = load_model("/content/efficientnet_skin_model_balanced.keras")

print("Model loaded successfully from .keras format!")


Model loaded successfully from .keras format!


In [None]:
from tensorflow.keras.preprocessing import image
import numpy as np

# Define function to make predictions
def predict_skin_disease(image_path):
    img = image.load_img(image_path, target_size=(224, 224))  # Resize to match model input
    img_array = image.img_to_array(img) / 255.0  # Normalize pixel values
    img_array = np.expand_dims(img_array, axis=0)  # Expand dimensions for batch

    # Make prediction
    prediction = model.predict(img_array)
    predicted_class = class_labels[np.argmax(prediction)]  # Get class with highest probability
    confidence = round(np.max(prediction) * 100, 2)  # Confidence percentage

    return predicted_class, confidence


In [None]:
# Provide a sample image path from the dataset
test_image_path = "/content/data/HAM10000_organized/bkl/ISIC_0024382.jpg"  # Replace with an actual image path

# Make a prediction
predicted_class, confidence = predict_skin_disease(test_image_path)

# Print the result
print(f"Predicted Class: {predicted_class}")
print(f"Confidence: {confidence}%")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
Predicted Class: bcc
Confidence: 21.15%


In [None]:
# Unfreeze more layers of EfficientNetB3
for layer in model.layers[-50:]:  # Unfreeze last 50 layers
    layer.trainable = True

# Recompile the model with a very low learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-6),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train again with fine-tuning
history_finetune = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,  # Train for a few more epochs
    class_weight=class_weight_dict
)


Epoch 1/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1513s[0m 12s/step - accuracy: 0.6673 - loss: 1.0837 - val_accuracy: 0.7265 - val_loss: 0.8237
Epoch 2/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1452s[0m 12s/step - accuracy: 0.6764 - loss: 1.1114 - val_accuracy: 0.6984 - val_loss: 0.8569
Epoch 3/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1448s[0m 11s/step - accuracy: 0.6801 - loss: 1.1349 - val_accuracy: 0.7124 - val_loss: 0.8646
Epoch 4/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1444s[0m 11s/step - accuracy: 0.6605 - loss: 1.0873 - val_accuracy: 0.6934 - val_loss: 0.8831
Epoch 5/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1446s[0m 11s/step - accuracy: 0.6531 - loss: 1.0974 - val_accuracy: 0.7144 - val_loss: 0.8511


In [None]:
model.save("/content/efficientnet_skin_model_final.keras")
print("Model saved successfully in Keras format!")


Model saved successfully in Keras format!


In [None]:
# Provide a sample image path from the dataset
test_image_path = "/content/data/HAM10000_organized/vasc/ISIC_0025197.jpg"  # Replace with an actual image path

# Make a prediction
predicted_class, confidence = predict_skin_disease(test_image_path)

# Print the result
print(f"Predicted Class: {predicted_class}")
print(f"Confidence: {confidence}%")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
Predicted Class: vasc
Confidence: 85.85%


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#  Unfreeze more layers of EfficientNetB3
for layer in model.layers[-100:]:
    layer.trainable = True

#  Apply stronger augmentations
train_datagen = ImageDataGenerator(
    rescale=1.0/255.0,
    rotation_range=40,
    width_shift_range=0.4,
    height_shift_range=0.4,
    shear_range=0.4,
    zoom_range=0.4,
    horizontal_flip=True,
    brightness_range=[0.7, 1.3],  # Stronger brightness shifts
    validation_split=0.2
)

print("Fine-tuning enabled & augmentation enhanced!")


Fine-tuning enabled & augmentation enhanced!


In [None]:
#  Use label smoothing to reduce overconfidence
loss_fn = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1)

#  Compile with a lower learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=5e-7),
              loss=loss_fn,
              metrics=['accuracy'])

#  Train for additional fine-tuning
history_finetune_extra = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,  # More epochs for better accuracy
    class_weight=class_weight_dict
)

print("Fine-tuning completed!")


Epoch 1/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1553s[0m 12s/step - accuracy: 0.6706 - loss: 1.2432 - val_accuracy: 0.7004 - val_loss: 1.0946
Epoch 2/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1469s[0m 12s/step - accuracy: 0.6674 - loss: 1.2743 - val_accuracy: 0.7234 - val_loss: 1.0891
Epoch 3/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1463s[0m 12s/step - accuracy: 0.6628 - loss: 1.2735 - val_accuracy: 0.7074 - val_loss: 1.1012
Epoch 4/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1466s[0m 12s/step - accuracy: 0.6524 - loss: 1.2981 - val_accuracy: 0.7084 - val_loss: 1.0875
Epoch 5/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1466s[0m 12s/step - accuracy: 0.6774 - loss: 1.2159 - val_accuracy: 0.7234 - val_loss: 1.0796
Fine-tuning completed!


In [None]:
#  Save the final improved model in Keras format
model.save("/content/efficientnet_skin_model_final.keras")

print("Final model saved successfully!")


Final model saved successfully!
