In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

2025-04-23 03:44:34.786870: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745379875.040707      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745379875.117822      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Load the CSV file
data_path = '/kaggle/input/ham10000-preprocessed-dataset/balanced and standardized images meta for custom.csv'  # Change this to your CSV file path
image_folder = '/kaggle/input/ham10000-preprocessed-dataset/balanced_and_standardized_images/balanced_and_standardized_images'  # Change this to your image folder path
df = pd.read_csv(data_path)

# Define parameters
IMG_SIZE = 96
BATCH_SIZE = 32
LEARNING_RATE = 0.0001
DROPOUT_RATE = 0.5
EPOCHS = 20

# Get unique classes and create label mapping
classes = df['dx'].unique()
class_to_idx = {c: i for i, c in enumerate(classes)}
df['label'] = df['dx'].map(class_to_idx)

# Extract labels and encode them
label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["dx"])  # Encode the 'dx' column

In [3]:
# Train-test split
train_df, test_df = train_test_split(df, test_size=0.4, stratify=df['label'], random_state=42)

In [4]:
# Image Data Generators with Augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # 20% of train set will be used as validation
)

train_generator = train_datagen.flow_from_dataframe(
    train_df, directory=image_folder, x_col='image_id', y_col='dx',
    target_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE,
    class_mode='categorical', subset='training'
)

val_generator = train_datagen.flow_from_dataframe(
    train_df, directory=image_folder, x_col='image_id', y_col='dx',
    target_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE,
    class_mode='categorical', subset='validation'
)

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    test_df, directory=image_folder, x_col='image_id', y_col='dx',
    target_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE,
    class_mode='categorical', shuffle=False
)

Found 22529 validated image filenames belonging to 7 classes.
Found 5632 validated image filenames belonging to 7 classes.
Found 18774 validated image filenames belonging to 7 classes.


In [5]:
# Load ResNet50 model
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
base_model.trainable = False  # Freeze the base model

# Add custom top layers
x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(DROPOUT_RATE)(x)
x = Dense(128, activation="relu")(x)
x = Dropout(DROPOUT_RATE)(x)
output = Dense(len(label_encoder.classes_), activation="softmax")(x)

# Compile the model
model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE), loss="categorical_crossentropy", metrics=["accuracy"])

# Early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

2025-04-23 03:48:03.975360: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [6]:
# Train the model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS,
    callbacks=[early_stop]
)

Epoch 1/20


  self._warn_if_super_not_called()


[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m580s[0m 800ms/step - accuracy: 0.1527 - loss: 2.1947 - val_accuracy: 0.1589 - val_loss: 1.9431
Epoch 2/20
[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m507s[0m 717ms/step - accuracy: 0.1493 - loss: 1.9937 - val_accuracy: 0.2125 - val_loss: 1.9411
Epoch 3/20
[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m456s[0m 645ms/step - accuracy: 0.1533 - loss: 1.9569 - val_accuracy: 0.1625 - val_loss: 1.9424
Epoch 4/20
[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m509s[0m 720ms/step - accuracy: 0.1576 - loss: 1.9477 - val_accuracy: 0.1967 - val_loss: 1.9432
Epoch 5/20
[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m461s[0m 652ms/step - accuracy: 0.1644 - loss: 1.9428 - val_accuracy: 0.1951 - val_loss: 1.9413
Epoch 6/20
[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m457s[0m 645ms/step - accuracy: 0.1723 - loss: 1.9381 - val_accuracy: 0.2362 - val_loss: 1.9354
Epoch 7/20
[1m

In [7]:
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns

def plot_model_training_curve(history):
    fig = make_subplots(rows=1, cols=2, subplot_titles=['Model Accuracy', 'Model Loss'])
    fig.add_trace(
        go.Scatter(
            y=history.history['accuracy'], 
            name='train_acc'), 
        row=1, col=1)
    fig.add_trace(
        go.Scatter(
            y=history.history['val_accuracy'], 
            name='val_acc'), 
        row=1, col=1)
    fig.add_trace(
        go.Scatter(
            y=history.history['loss'], 
            name='train_loss'), 
        row=1, col=2)
    fig.add_trace(
        go.Scatter(
            y=history.history['val_loss'], 
            name='val_loss'), 
        row=1, col=2)
    fig.show()

In [8]:
plot_model_training_curve(history)

In [9]:
# Test function
def evaluate_model():
    test_loss, test_acc = model.evaluate(test_generator)
    print(f'\nTest Accuracy: {test_acc * 100:.2f}%')

# Run evaluation
evaluate_model()


Your `PyDataset` class should call `super().__init__(**kwargs)` in its constructor. `**kwargs` can include `workers`, `use_multiprocessing`, `max_queue_size`. Do not pass these arguments to `fit()`, as they will be ignored.



[1m587/587[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m393s[0m 670ms/step - accuracy: 0.2522 - loss: 1.8526

Test Accuracy: 24.69%


In [10]:
model.save('/kaggle/working/resnet 60-40.keras') 

In [11]:
model.save_weights('/kaggle/working/resnet 60-40.weights.h5') 