Imports

In [None]:
import numpy as np
import pandas as pd
import os
from kaggle_datasets import KaggleDatasets
from tensorflow import keras
import tensorflow.python.keras.layers
from keras.preprocessing.image import ImageDataGenerator

#Model Imports
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam

#Adding class weights
from sklearn.utils.class_weight import compute_class_weight

#Adding Oversampler
from imblearn.over_sampling import RandomOverSampler

#Plotting Imports
import matplotlib.pyplot as plt

#Confusion matrix import
from sklearn.metrics import confusion_matrix

#Train test split import
from sklearn.model_selection import train_test_split

#Import to calculate AUC
from sklearn.metrics import roc_auc_score

Import Data Sets


In [None]:
#Import the training dataset
dataPath = "/kaggle/input/c/siim-isic-melanoma-classification/train.csv"
data_df = pd.read_csv(dataPath)

In [None]:
#Ouput the head of the data datafreame
data_df.head()

Drop Duplicates, people on kaggle complained about duplicate image names.


In [None]:
#Drop duplicates based on "image_name" column
data_df = data_df.drop_duplicates(subset=['image_name'])

data_df = data_df.reindex()

Split the data into a 20% testing dataframe,

In [None]:
#Set the test data to be 20% of the total data.
test_size = len(data_df) * 0.2

#Get the test data from the data df
test_df = data_df.sample(n=int(test_size))

#Output the size of the test data
print("The length of the testing data is: " + str(len(test_df)))

In [None]:
#Show the testing data frame
test_df.head()

Create the training dataframe.


In [None]:
#Create the training dataframe, set it to the data_df
train_df = data_df

#Drop all of the testing data
train_df.drop(test_df.index, inplace=True)

#Output the size of the training data
print("The length of the training data is: " + str(len(train_df)))

In [None]:
train_df.head()

Change the type so that it can be used in the ImageDataGenerator.


In [None]:
train_df['target'] = train_df['target'].astype(str)
test_df['target'] = test_df['target'].astype(str)

Create the ImagePath column, so that the images can be found within the ImageDataGenerator.


In [None]:
#Create a column for the full image path
train_df['image_path'] = '/kaggle/input/c/siim-isic-melanoma-classification/jpeg/train/' + train_df['image_name'] +'.jpg'
test_df['image_path'] = '/kaggle/input/c/siim-isic-melanoma-classification/jpeg/train/' + test_df['image_name'] +'.jpg'

#Show head of dataframe
train_df.head()
train_df['image_path']

Create the ImageDataGenerators for training,testing and validation.

In [None]:
# Initialize the ImageDataGenerator object
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.3)

#Defining batch size
batch_size = 32

In [None]:
# Create a training generator
training_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='image_path',
    y_col='target',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    subset='training'
)


In [None]:
# Create a validation generator
validation_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='image_path',
    y_col='target',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation'
)

In [None]:
#Create the testing generator
testing_generator = datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='image_path',
    y_col='target',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation'
)

Create the base model and download the weights.

In [None]:
#Define the image shape
image_shape = (224, 224, 3)

#Weights pathway
vgg_weights = '/kaggle/input/vgg16-weights/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'

#Define the base model
base_model = VGG16(weights=vgg_weights, include_top=False, input_shape=image_shape, classes =2)

Creating the first Model.

In [None]:
#Define model parameters
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.summary()

Calculating the training and validation steps.


In [None]:
#Calculate training steps
train_steps = len(training_generator) // batch_size

#Calculate the validation steps
val_steps = len(validation_generator) // batch_size

#Define number of epochs
num_epochs = 10

Train the first model.

In [None]:
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name="auc")])

history = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps)

In [None]:
#Save the model
# save the model architecture to a JSON file
model_json = model.to_json()
with open('my_model.json', 'w') as json_file:
    json_file.write(model_json)

# save the model weights to an HDF5 file
model.save_weights('my_model_weights.h5')
model.save('my_model.h5')

In [None]:
#Evaluate the model
model_result = model.evaluate(testing_generator, verbose=1)


In [None]:
#Output the accuracy and loss
# Extract the loss and accuracy values from the returned v alues
loss = model_result[0]
accuracy = model_result[1]

print("Loss: ", loss)
print("Accuracy: ", accuracy)

Create confusion matrix

In [None]:
model_predictions = model.predict(testing_generator)

#Convert predicted probabilities to class labels
predicted_labels = np.argmax(model_predictions, axis=1)

#Extract true class labels from testing data
true_labels = testing_generator.classes


In [None]:
#Calculate confusion matrix
model_confusion_matrix = confusion_matrix(true_labels, predicted_labels)

print("Confusion Matrix:")
print(model_confusion_matrix)

The model correctly predicted the negative class (benign) in all 1961 cases (true negative), but incorrectly predicted the positive class (malignant) in all 26 cases (false negative), with no false positives (predicted positive but true negative) or true positives (predicted positive and true positive). This suggests that the model has a relatively high specificity (ability to correctly identify the negative class) but a low sensitivity (ability to correctly identify the positive class)

In [None]:
#Plotting validation and training accuracy
plt.plot(history.history['accuracy'], label='Training accuracy')
plt.plot(history.history['val_accuracy'], label='Validation accuracy')
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
#Creating a loss curve
plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.title('Model loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
#Plotting validation and training accuracy
plt.plot(history.history['auc'], label='Training AUC')
plt.plot(history.history['val_auc'], label='Validation AUC')
plt.title('Model AUC')
plt.xlabel('Epoch')
plt.ylabel('AUC')
plt.legend()
plt.show()

In [None]:
#Calculate class weights
class_weights = compute_class_weight('balanced', classes=np.unique(train_df['target']), y=train_df['target'])

# convert to dictionary
class_weight_dict = dict(enumerate(class_weights))

In [None]:
#Visualise the class weights.
class_labels = np.unique(train_df['target'])
plt.bar(class_labels, class_weights)
plt.title('Class weights')
plt.xlabel('Class label')
plt.ylabel('Weight')
plt.show()

Create the second model using the class weights.

In [None]:
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps, class_weight=class_weight_dict)

In [None]:
#Save the model
# save the model architecture to a JSON file
model_json = model.to_json()
with open('my_model2.json', 'w') as json_file:
    json_file.write(model_json)

# save the model weights to an HDF5 file
model.save_weights('my_model2_weights.h5')
model.save('my_model2.h5')

In [None]:
#Evaluate the model
model_result = model.evaluate(testing_generator, verbose=1)

In [None]:
#Output the accuracy and loss
# Extract the loss and accuracy values from the returned values
loss = model_result[0]
accuracy = model_result[1]

print("Loss: ", loss)
print("Accuracy: ", accuracy)

In [None]:
model_predictions = model.predict(testing_generator)

#Convert predicted probabilities to class labels
predicted_labels = np.argmax(model_predictions, axis=1)

#Extract true class labels from testing data
true_labels = testing_generator.classes


In [None]:
#Calculate confusion matrix
model_confusion_matrix = confusion_matrix(true_labels, predicted_labels)

print("Confusion Matrix:")
print(model_confusion_matrix)

In [None]:
#Plotting validation and training accuracy
plt.plot(history.history['accuracy'], label='Training accuracy')
plt.plot(history.history['val_accuracy'], label='Validation accuracy')
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
#Creating a loss curve
plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.title('Model loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

Using different image processing techniques.


In [None]:
# Define data training datagen parameters
training_datagen = ImageDataGenerator(
    rescale=1./255,  # Rescale pixel values
    rotation_range=20,  # Randomly rotate image
    height_shift_range=0.2,  # Randomly shift images vertically
    width_shift_range=0.2,  # Randomly shift images horizontally 
    zoom_range=0.2,  # Random zoom
    shear_range=0.2,  # Randomly shear 
    horizontal_flip=True,  # Flip horizonal
    vertical_flip=True   # Flip vertical
)

In [None]:
# Create ImageDataGenerator for training data
training_generator = training_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='image_path',
    y_col='target',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    subset='training'
)

In [None]:
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

history3 = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps)

In [None]:
#Evaluate the model
model_result = model.evaluate(testing_generator, verbose=1)

In [None]:
#Output the accuracy and loss
# Extract the loss and accuracy values from the returned values
loss = model_result[0]
accuracy = model_result[1]

print("Loss: ", loss)
print("Accuracy: ", accuracy)

In [None]:
model_predictions = model.predict(testing_generator)

#Convert predicted probabilities to class labels
predicted_labels = np.argmax(model_predictions, axis=1)

#Extract true class labels from testing data
true_labels = testing_generator.classes

In [None]:
#Calculate confusion matrix
model_confusion_matrix = confusion_matrix(true_labels, predicted_labels)

print("Confusion Matrix:")
print(model_confusion_matrix)

In [None]:
true_labels = testing_generator.labels 

# Calculate AUC
auc = roc_auc_score(true_labels, model_predictions)

print("AUC:", auc)

In [None]:
# save the model weights to an HDF5 file
model.save_weights('my_model3_weights.h5')
model.save('my_model3.h5')

Create a new model using new parameters.


In [None]:
#Changing the parameters of the model
model = Sequential([
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding = 'same', input_shape=(224,224,3)),
    MaxPooling2D(pool_size=(2, 2), strides=2),
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding = 'same'),
    MaxPooling2D(pool_size=(2, 2), strides=2),
    Flatten(),
    Dense(units= 1, activation='sigmoid')
])



In [None]:
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

history4 = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps)

In [None]:
#Evaluate the model
model_result = model.evaluate(testing_generator, verbose=1)

In [None]:
#Output the accuracy and loss
# Extract the loss and accuracy values from the returned values
loss = model_result[0]
accuracy = model_result[1]

print("Loss: ", loss)
print("Accuracy: ", accuracy)

In [None]:
model_predictions = model.predict(testing_generator)

#Convert predicted probabilities to class labels
predicted_labels = np.argmax(model_predictions, axis=1)

#Extract true class labels from testing data
true_labels = testing_generator.classes

In [None]:
#Calculate confusion matrix
model_confusion_matrix = confusion_matrix(true_labels, predicted_labels)

print("Confusion Matrix:")
print(model_confusion_matrix)

In [None]:
true_labels = testing_generator.labels 

# Calculate AUC
auc = roc_auc_score(true_labels, model_predictions)

print("AUC:", auc)

In [None]:
# save the model weights to an HDF5 file
model.save_weights('my_model4_weights.h5')
model.save('my_model4.h5')

In [None]:
#Calculate class weights
class_weights = compute_class_weight('balanced',classes=np.unique(train_df['target']), y=train_df['target'])

#Convert to dictionary
class_weight_dict = dict(enumerate(class_weights))

In [None]:
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

history5 = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps, class_weight = class_weight_dict)

In [None]:
# save the model weights to an HDF5 file
model.save_weights('my_model5_weights.h5')
model.save('my_model5.h5')

In [None]:
#Evaluate the model
model_result = model.evaluate(testing_generator, verbose=1)

In [None]:
#Output the accuracy and loss
# Extract the loss and accuracy values from the returned values
loss = model_result[0]
accuracy = model_result[1]

print("Loss: ", loss)
print("Accuracy: ", accuracy)

In [None]:
model_predictions = model.predict(testing_generator)

#Convert predicted probabilities to class labels
predicted_labels = np.argmax(model_predictions, axis=1)

#Extract true class labels from testing data
true_labels = testing_generator.classes

In [None]:
#Calculate confusion matrix
model_confusion_matrix = confusion_matrix(true_labels, predicted_labels)

print("Confusion Matrix:")
print(model_confusion_matrix)

In [None]:
true_labels = testing_generator.labels 

# Calculate AUC
auc = roc_auc_score(true_labels, model_predictions)

print("AUC:", auc)

In [None]:
class_labels = np.unique(train_df['target'])
plt.bar(class_labels, class_weights)
plt.title('Class weights')
plt.xlabel('Class label')
plt.ylabel('Weight')
plt.show()

In [None]:
print(class_weights)

In [None]:
# Calculate class frequencies
class_frequencies = np.bincount(train_df['target'])
# Calculate total number of samples
total_samples = len(train_df)
# Calculate class weights
class_weights = total_samples / (len(class_frequencies) * class_frequencies)

Create heavier class weights as the first set didnt improve the model.

In [None]:
#Get the class weihts
classweight1 = class_weights[0]
classweight2 = class_weights[1]

#Make them twice as strong for the minority class.
classweight1 = classweight1 / 2
classweight2 = classweight2 * 2

class_weights[0] = classweight1
class_weights[1] = classweight2
print(class_weights)

In [None]:
class_labels = np.unique(train_df['target'])
plt.bar(class_labels, class_weights)
plt.title('Class weights')
plt.xlabel('Class label')
plt.ylabel('Weight')
plt.show()

In [None]:
# convert to dictionary
class_weight_dict = dict(enumerate(class_weights))

In [None]:
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

history5 = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps, class_weight = class_weight_dict)

In [None]:
# Define data training datagen parameters
training_datagen = ImageDataGenerator(
    rescale=1./255,  # Rescale pixel values
    rotation_range=20,  # Randomly rotate image
    height_shift_range=0.2,  # Randomly shift images vertically
    width_shift_range=0.2,  # Randomly shift images horizontally 
    zoom_range=0.2,  # Random zoom
    shear_range=0.2,  # Randomly shear 
    horizontal_flip=True,  # Flip horizonal
    vertical_flip=True   # Flip vertical
)

Create the oversampler and the oversampled dataframe.

In [None]:
# Create a RandomOverSampler object
oversampler = RandomOverSampler()

# Extract the feature data (X) and target data (y) from train_df
X = train_df['image_path']
y = train_df['target']

# Reshape X to a 2D array
X = X.values.reshape(-1, 1)

# Apply the oversampling to X and y
X_oversampled, y_oversampled = oversampler.fit_resample(X, y)

# Convert X_oversampled back to a dataframe
X_oversampled_df = pd.DataFrame(X_oversampled, columns=['image_path'])

# Concatenate X_oversampled_df with y_oversampled to get the oversampled train_df
train_df_oversampled = pd.concat([X_oversampled_df, y_oversampled], axis=1)

In [None]:
train_df_oversampled

In [None]:
# Create a custom generator that takes the oversampled data as input
oversampled_train_generator = training_datagen.flow_from_dataframe(
    dataframe=train_df_oversampled,
    x_col='image_path', 
    y_col='target', 
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    subset='training'
    
)

In [None]:
len(train_df)

In [None]:
len(train_df_oversampled)

Calculate the new training steps needed for the oversampled training dataframe.

In [None]:
#Calculate training steps
train_steps = len(oversampled_train_generator) // batch_size

#Calculate the validation steps
val_steps = len(validation_generator) // batch_size

#Define number of epochs
num_epochs = 10

In [None]:
print("Train steps: ", train_steps)
print("Validation steps: ", val_steps)

Compile a new model with the oversampled dataframe.

In [None]:
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

history6 = model.fit(oversampled_train_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps)

In [None]:
model.summary()

In [None]:
# Get the weights of the model
model_weights = model.get_weights()

# Loop through the list of weights to access individual weight arrays
for i, weight_array in enumerate(model_weights):
    print("Layer {} - Weight Shape: {}".format(i, weight_array.shape))
    print("Layer {} - Weights: {}".format(i, weight_array))

In [None]:
#Evaluate the model
model_result = model.evaluate(testing_generator, verbose=1)

In [None]:
#Output the accuracy and loss
# Extract the loss and accuracy values from the returned values
loss = model_result[0]
accuracy = model_result[1]

print("Loss: ", loss)
print("Accuracy: ", accuracy)

In [None]:
model_predictions = model.predict(testing_generator)

#Convert predicted probabilities to class labels
predicted_labels = np.argmax(model_predictions, axis=1)

#Extract true class labels from testing data
true_labels = testing_generator.classes

In [None]:
#print(true_labels)
print(predicted_labels)

In [None]:
#Calculate confusion matrix
model_confusion_matrix = confusion_matrix(true_labels, predicted_labels)

print("Confusion Matrix:")
print(model_confusion_matrix)

In [None]:
true_labels = testing_generator.labels 

# Calculate AUC
auc = roc_auc_score(true_labels, model_predictions)

print("AUC:", auc)

In [None]:
# save the model weights to an HDF5 file
model.save_weights('my_model6_weights.h5')
model.save('my_model6.h5')

In [None]:
training_datagen = ImageDataGenerator(
    rescale=1./255
)

In [None]:
# Create a custom generator that takes the oversampled data as input
oversampled_train_generator = training_datagen.flow_from_dataframe(
    dataframe=train_df_oversampled,
    x_col='image_path', 
    y_col='target', 
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    subset='training'
    
)

In [None]:
# Calculate class frequencies
class_frequencies = np.bincount(train_df_oversampled['target'])
# Calculate total number of samples
total_samples = len(train_df_oversampled)
# Calculate class weights
class_weights = total_samples / (len(class_frequencies) * class_frequencies)

In [None]:
#Show the class weights of the oversampled dataframe they should be equal.
class_labels = np.unique(train_df_oversampled['target'])
plt.bar(class_labels, class_weights)
plt.title('Class weights')
plt.xlabel('Class label')
plt.ylabel('Weight')
plt.show()

In [None]:
#Changing the parameters of the model
model2 = Sequential([
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding = 'same', input_shape=(224,224,3)),
    MaxPooling2D(pool_size=(2, 2), strides=2),
    Dropout(0.5)
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding = 'same'),
    MaxPooling2D(pool_size=(2, 2), strides=2),
    Flatten(),
    Dense(units= 1, activation='sigmoid')
])

In [None]:
#Calculate training steps
train_steps = len(oversampled_train_generator) // batch_size

#Calculate the validation steps
val_steps = len(validation_generator) // batch_size

#Define number of epochs
num_epochs = 10

In [None]:
model2.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])

history6 = model2.fit(oversampled_train_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps)

In [None]:
# save the model weights to an HDF5 file
model2.save_weights('my_model7_weights.h5')
model2.save('my_model7.h5')

In [None]:
#Evaluate the model
model_result = model2.evaluate(testing_generator, verbose=1)

In [None]:
#Output the accuracy and loss
# Extract the loss and accuracy values from the returned values
loss = model_result[0]
accuracy = model_result[1]

print("Loss: ", loss)
print("Accuracy: ", accuracy)

In [None]:
model_predictions2 = model2.predict(testing_generator)

In [None]:
y_true = testing_generator.labels


auc = roc_auc_score(y_true, model_predictions2)

In [None]:
print(auc)

In [None]:
threshold = 0.5
# Convert continuous predictions into binary labels
y_pred_binary = np.where(model_predictions2 > threshold, 1, 0)
print(confusion_matrix(y_true, y_pred_binary))

In [None]:
#Changing the parameters of the model
model2 = Sequential([
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding = 'same', input_shape=(224,224,3)),
    MaxPooling2D(pool_size=(2, 2), strides=2),
    Dropout(0.5),
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding = 'same'),
    MaxPooling2D(pool_size=(2, 2), strides=2),
    Flatten(),
    Dense(units= 1, activation='sigmoid')
])

In [None]:
model2.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])

history6 = model2.fit(oversampled_train_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps)

In [None]:
#Evaluate the model
model_result = model2.evaluate(testing_generator, verbose=1)

In [None]:
#Output the accuracy and loss
# Extract the loss and accuracy values from the returned values
loss = model_result[0]
accuracy = model_result[1]

print("Loss: ", loss)
print("Accuracy: ", accuracy)

In [None]:
#Plotting validation and training accuracy
plt.plot(history6.history['accuracy'], label='Training accuracy')
plt.plot(history6.history['val_accuracy'], label='Validation accuracy')
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
#Creating a loss curve
plt.plot(history6.history['loss'], label='Training loss')
plt.plot(history6.history['val_loss'], label='Validation loss')
plt.title('Model loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
#Define model parameters
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(optimizer=Adam(lr=0.01), loss='binary_crossentropy', metrics=['accuracy'])

history7 = model.fit(oversampled_train_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps)

Create a new model.

In [None]:
model = Sequential()
model.add(Conv2D(16, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name="auc")])

history8 = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps, class_weight = class_weights)

Recreate the training datagens with limited image pre-processing, only resizing and flipping.

In [None]:
# Initialize the ImageDataGenerator object
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2,
                            horizontal_flip=True, vertical_flip=True)

#Defining batch size
batch_size = 32

# Create an iterator for your images
training_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='image_path',
    y_col='target',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    subset='training'
)

# Create an iterator for your images
validation_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='image_path',
    y_col='target',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation'
)

#Create the testing generator
testing_generator = datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='image_path',
    y_col='target',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation'
)

In [None]:
#Calculate training steps
train_steps = len(training_generator) // batch_size

#Calculate the validation steps
val_steps = len(validation_generator) // batch_size

#Define number of epochs
num_epochs = 100

In [None]:
#Define model parameters
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [None]:
#Calculate class weights
class_weights = compute_class_weight('balanced', classes=np.unique(train_df['target']), y=train_df['target'])

# convert to dictionary
class_weight_dict = dict(enumerate(class_weights))

In [None]:
print(class_weight_dict)

In [None]:
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name="auc")])

history = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps,class_weight=class_weight_dict)

In [None]:
#Evaluate the model
model_result = model.evaluate(testing_generator, verbose=1)
#Output the accuracy and loss
# Extract the loss and accuracy values from the returned values
loss = model_result[0]
accuracy = model_result[1]

print("Loss: ", loss)
print("Accuracy: ", accuracy)

In [None]:
model_predictions = model.predict(testing_generator)
y_true = testing_generator.labels




In [None]:
auc = roc_auc_score(y_true, model_predictions)

print("AUC: ", auc)
threshold = 0.5
# Convert continuous predictions into binary labels
y_pred_binary = np.where(model_predictions > threshold, 1, 0)
print(confusion_matrix(y_true, y_pred_binary))

Create custom loss function to prioritise the AUC.

In [None]:
import tensorflow as tf
from keras import backend as K

def custom_loss(y_true, y_pred):
    # Calculate binary cross-entropy loss
    bce = K.binary_crossentropy(y_true, y_pred)

    # Calculate AUC score
    auc = K.mean(tf.keras.backend.binary_crossentropy(y_true, y_pred))

    weight = 0.8 
    loss = (1 - weight) * bce + weight * (1 - auc)

    return loss

In [None]:
model.compile(optimizer=Adam(lr=0.0001), loss=custom_loss, metrics=['accuracy', keras.metrics.AUC(name="auc")])

history8 = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps,class_weight=class_weight_dict)

In [None]:
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name="auc")])

history = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps,class_weight=class_weight_dict)

In [None]:
#Calculate class weights
class_weights = compute_class_weight('balanced', classes=np.unique(train_df['target']), y=train_df['target'])

# convert to dictionary
class_weight_dict = dict(enumerate(class_weights))


In [None]:
print(class_weight_dict)

In [None]:
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name="auc")])

history = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps,class_weight=class_weight_dict)

In [None]:
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name="auc")])

history11 = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps,class_weight=class_weight_dict)

Model5 Pretrained model with frozen weights.

In [None]:
    def load_pretrained_model():
        
        vgg_weights = '/kaggle/input/vgg16-weights/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
        
        #Create the base model
        base_model = VGG16(weights=vgg_weights, include_top=False, input_shape=image_shape, classes =2)
    
    
        # freeze the first 15 layers of the base model. All other layers are trainable.
        for layer in base_model.layers[0:15]:
            layer.trainable = False

        return base_model
    
    # Create a new sequentail model and add the pretrained model defined above.
    model = Sequential()
    model.add(load_pretrained_model())  
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

In [None]:
num_epochs = 20

model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name="auc")])

history11 = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps)

In [None]:
num_epochs = 20

model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name="auc")])

history11 = model.fit(training_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps, class_weight = class_weight_dict)

Model5 trained on the oversampled training data.

In [None]:
# Create a custom generator that takes the oversampled data as input
oversampled_train_generator = datagen.flow_from_dataframe(
    dataframe=train_df_oversampled,
    x_col='image_path', 
    y_col='target', 
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    subset='training'
    
)

#Calculate training steps
train_steps = len(oversampled_train_generator) // batch_size

#Calculate the validation steps
val_steps = len(validation_generator) // batch_size

#Define number of epochs
num_epochs = 10

model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name="auc")])

history11 = model.fit(oversampled_train_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps)

In [None]:
#Evaluate the model
model_result = model.evaluate(testing_generator, verbose=1)
#Output the accuracy and loss
# Extract the loss and accuracy values from the returned values
loss = model_result[0]
accuracy = model_result[1]

print("Loss: ", loss)
print("Accuracy: ", accuracy)

In [None]:
model_predictions = model.predict(testing_generator)
y_true = testing_generator.labels



In [None]:
auc = roc_auc_score(y_true, model_predictions)

print("AUC: ", auc)
threshold = 0.5
# Convert continuous predictions into binary labels
y_pred_binary = np.where(model_predictions > threshold, 1, 0)
print(confusion_matrix(y_true, y_pred_binary))

In [None]:
#Plotting validation and training accuracy
plt.plot(history11.history['accuracy'], label='Training accuracy')
plt.plot(history11.history['val_accuracy'], label='Validation accuracy')
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
#Creating a loss curve
plt.plot(history11.history['auc'], label='Training loss')
plt.plot(history11.history['val_auc'], label='Validation loss')
plt.title('Model AUC')
plt.xlabel('Epoch')
plt.ylabel('AUC')
plt.legend()
plt.show()

In [None]:
#Creating a AUC curve
plt.plot(history11.history['loss'], label='Training loss')
plt.plot(history11.history['val_loss'], label='Validation loss')
plt.title('Model loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Create a custom generator that takes the oversampled data as input
oversampled_train_generator = datagen.flow_from_dataframe(
    dataframe=train_df_oversampled,
    x_col='image_path', 
    y_col='target', 
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    subset='training'
    
)

#Calculate training steps
train_steps = len(oversampled_train_generator) // batch_size

#Calculate the validation steps
val_steps = len(validation_generator) // batch_size

#Define number of epochs
num_epochs = 30


model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name="auc")])

history12 = model.fit(oversampled_train_generator, steps_per_epoch=train_steps, epochs=num_epochs, 
                    validation_data=validation_generator, validation_steps=val_steps)

In [None]:
#Evaluate the model
model_result = model.evaluate(testing_generator, verbose=1)
#Output the accuracy and loss
# Extract the loss and accuracy values from the returned values
loss = model_result[0]
accuracy = model_result[1]

print("Loss: ", loss)
print("Accuracy: ", accuracy)

In [None]:
model_predictions = model.predict(testing_generator)
y_true = testing_generator.labels


In [None]:
auc = roc_auc_score(y_true, model_predictions)

print("AUC: ", auc)
threshold = 0.5
# Convert continuous predictions into binary labels
y_pred_binary = np.where(model_predictions > threshold, 1, 0)
print(confusion_matrix(y_true, y_pred_binary))

In [None]:
#Plotting validation and training accuracy
plt.plot(history12.history['accuracy'], label='Training accuracy')
plt.plot(history12.history['val_accuracy'], label='Validation accuracy')
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
#Creating a AUC curve
plt.plot(history12.history['auc'], label='AUC accuracy')
plt.plot(history12.history['val_auc'], label='AUC accuracy')
plt.title('Model AUC')
plt.xlabel('Epoch')
plt.ylabel('AUC')
plt.legend()
plt.show()

In [None]:
#Creating a loss curve
plt.plot(history12.history['loss'], label='Training loss')
plt.plot(history12.history['val_loss'], label='Validation loss')
plt.title('Model loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()