# CNN Model VGG16

## Imports and inital setup

In [112]:
# Imports
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
import sys
import os

sys.path.append("..")
from helperfunctions import modelhelper as mh

SEED = 42
NUM_EPOCHS = 30
BATCH_SIZE = 32

# File path variables
# please make sure to use the correct path to the meta data file

FILEPATH_JPGS = './../data/jpgs/'
FILEPATH_PROCESSED="./../data/processed/"
FILEPATH_OUTPUT = './../data/jpgs/'  # Replace with your folder path

TARGET_LABEL="dx"
BALANCE_LABEL="dx"
IMAGE_SIZE = (224, 224)

### Loading (augmented) metadata as test, train, validation from files

In [98]:
# Read the metadata file
train_df = pd.read_csv(FILEPATH_PROCESSED+"train_from_Metadata_processed.csv")
validation_df = pd.read_csv(FILEPATH_PROCESSED+"validation_from_Metadata_processed.csv")
test_df = pd.read_csv(FILEPATH_PROCESSED+"test_from_Metadata_processed.csv")

train_df.sample(15)

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,dx_binary,image_path
1286,HAM_0000994,ISIC_0027609.jpg,bcc,histo,80.0,male,scalp,vidir_modern,skin_cancer,./../data/jpgs/ISIC_0027609.jpg
6,HAM_0001821,ISIC_0025308.jpg,nv,follow_up,40.0,male,trunk,vidir_molemax,not_skin_cancer,./../data/jpgs/ISIC_0025308.jpg
6290,HAM_0000024,aug_EdShCQISIC_0027141.jpg,df,consensus,45.0,female,lower extremity,vidir_molemax,not_skin_cancer,./../data/jpgs/aug_EdShCQISIC_0027141.jpg
1257,HAM_0004235,ISIC_0030452.jpg,bcc,histo,80.0,male,upper extremity,rosendahl,skin_cancer,./../data/jpgs/ISIC_0030452.jpg
3817,HAM_0006887,aug_5kb6hXISIC_0031993.jpg,akiec,histo,45.0,male,face,rosendahl,skin_cancer,./../data/jpgs/aug_5kb6hXISIC_0031993.jpg
283,HAM_0003794,ISIC_0029429.jpg,nv,follow_up,50.0,male,lower extremity,vidir_molemax,not_skin_cancer,./../data/jpgs/ISIC_0029429.jpg
176,HAM_0001826,ISIC_0029509.jpg,nv,follow_up,65.0,male,back,vidir_molemax,not_skin_cancer,./../data/jpgs/ISIC_0029509.jpg
6654,HAM_0004036,aug_GkrtUJISIC_0027648.jpg,df,histo,55.0,male,upper extremity,rosendahl,not_skin_cancer,./../data/jpgs/aug_GkrtUJISIC_0027648.jpg
5623,HAM_0005632,ISIC_0029232.jpg,bkl,confocal,85.0,female,face,vidir_modern,not_skin_cancer,./../data/jpgs/ISIC_0029232.jpg
20,HAM_0001875,ISIC_0031231.jpg,nv,follow_up,40.0,female,lower extremity,vidir_molemax,not_skin_cancer,./../data/jpgs/ISIC_0031231.jpg


## Setting up the image data generator for training and validation

Note: each Keras Application expects a specific kind of input preprocessing. For VGG16, call tf.keras.applications.vgg16.preprocess_input on your inputs before passing them to the model. vgg16.preprocess_input will convert the input images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling

In [99]:
# Setting up the Image Data Generator for the train data set - including augmentation


datagen_train = ImageDataGenerator(
    rescale=1.0 / 255.0,  # Rescale pixel values to [0, 1], important for CNNs to perform better, deactivate to see images down below
    preprocessing_function=preprocess_input,  # VGG16 specific preprocessing
)

datagen_validation = ImageDataGenerator(
    rescale=1.0 / 255.0, #see above
    preprocessing_function=preprocess_input,  # VGG16 specific preprocessing
)

validation_generator = datagen_validation.flow_from_dataframe(
    dataframe=validation_df,
    directory=FILEPATH_JPGS,
    x_col="image_id",
    y_col=TARGET_LABEL,
    class_mode="categorical",
    target_size=IMAGE_SIZE,
    batch_size=32
)

train_data_generator = datagen_train.flow_from_dataframe(
        dataframe=train_df,
        directory=FILEPATH_JPGS,
        x_col="image_id",
        y_col=TARGET_LABEL,
        class_mode="categorical",
        target_size=IMAGE_SIZE,
        batch_size=32
)

Found 2003 validated image filenames belonging to 7 classes.
Found 7006 validated image filenames belonging to 7 classes.


# Adding an neural network model to test the data

## VGG16 Model
The VGG16 is a pre-trained Convolutional Neural Network (CNN) model proposed by K. Simonyan and A. Zisserman from the University of Oxford's Visual Geometry Group Lab. The model was proposed in their 2014 paper "Very Deep Convolutional Networks for Large-Scale Image Recognition" and won the 1st and 2nd places in the ImageNet Large Scale Visual Recognition Challenge (ILSVRC) in 2014 geeksforgeeks.org. It is considered one of the best vision model architectures to date due to its simplicity and high performance builtin.com.

[Source:Tensorflow Applications](https://www.tensorflow.org/api_docs/python/tf/keras/applications/vgg16/VGG16)

Architecture:

*Input:*

Conv 1-1,
Conv 1-2,
Pooling

Conv 2-1,
Conv 2-2,
Pooling

Conv 3-1,
Conv 3-2,
Conv 3-3,
Pooling

Conv 4-1,
Conv 4-2,
Conv 4-3,
Pooling

Conv 5-1,
Conv 5-2,
Conv 5-3,
Pooling

Dense,
Dense,
Dense

*Output*


In [100]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense
from tensorflow.keras.models import Model

# Load the pre-trained VGG16 model
vgg16_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# Freeze the VGG16 model
vgg16_model.trainable = False
# Add custom layers on top of VGG16
x = Conv2D(64, (3, 3), activation='relu')(vgg16_model.output)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(7, activation='softmax')(x)  # Replace num_classes with your number of classes

# Create the combined model
model = Model(inputs=vgg16_model.input, outputs=x)




loading the VGG16 model
from matplotlib.dates import drange


vgg16_model = VGG16(
    include_top=False, # do not include the top layer, we will add our own
    weights= "imagenet", # use the weights that the model was trained on
    input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3), # the input shape of the images (3 channels, width, height)
    pooling="max", # the type of pooling to use when we add the top layer (average pooling)
)

# Add custom layers on top of VGG16
x = Conv2D(64, (3, 3), activation='relu')(base_model.output)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(, activation='softmax')(x)

In [101]:

from tensorflow.keras.optimizers import SGD

opt = tf.keras.optimizers.legacy.SGD(learning_rate=0.001, momentum=0.9)

# compile your model with the optimizer
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

# Summary of the model architecture
model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_9 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

## Model Training

In [102]:
# Train the model

history = model.fit(
    train_data_generator,
    epochs=NUM_EPOCHS,              
    verbose=1,                      # Adjust verbosity level
    batch_size=BATCH_SIZE,                # Set the batch size, default is 32, can be increased to speed up training
    callbacks=None,                 # List of callbacks to apply during training 
    validation_split=0.0,           # not needed as we use a validation data generator
    validation_data=validation_generator,
    shuffle=True,                   # Shuffle the training data before each epoch
    sample_weight=None,             # Set the weights for the train data set !
    class_weight=None,              # Set the weights for the classes, not needed if we use sample weights
    initial_epoch=0,                # Use this to continue training from a specific epoch
    steps_per_epoch=None,           # Set the number of steps per epoch, default is len(x_train) // batch_size
    validation_steps=None,          # Set the number of steps for validation, default is len(x_val) // batch_size
    validation_batch_size=None,     # Set the batch size for validation, default is batch_size
    validation_freq=1,              # Only relevant if validation data is a generator. Set the frequency to validate the model on the validation set
    max_queue_size=10,              # Set the max size for the generator queue
    workers=-1,                     # Set the max number of processes to generate the data in parallel, -1 means all CPUs
    use_multiprocessing=False       # Set to True if you use a generator in parallel, e.g. model.predict_generator()
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


## Saving the model

In [113]:
from datetime import datetime
timestamp = datetime.now()

# Initialize the counter variable
counter = 3

# Loop through the existing model files to find the highest R number
while True:
    model_name = f"VGG16_R{counter}"
    model_path = f"../models/model_{timestamp}_{model_name}.h5"
    if not os.path.exists(model_path):
        break
    counter += 1

# Use the highest R number found to create the new model
model_name = f"VGG16_R{counter}"
model_path = f"../models/model_{timestamp}_{model_name}.h5"
model.save(model_path)

  saving_api.save_model(


In [115]:
history_path = f"../models/model_{timestamp}_{model_name}_history.npy"
np.save(history_path,history.history)