In [54]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow.keras as kb
from tensorflow.keras import backend
import tensorflow as tf
from zipfile import ZipFile

In [55]:
#Using zipfile to unzip the downloaded file from kaggle
file_name = '/content/archive 1.zip'

with ZipFile(file_name, 'r') as zip:
  zip.extractall()
  print('Done')

Done


In [56]:
#Essentially a lot of work to add all the images to a dataframe with labels
#I think this could have been done more simply but I have no idea how
import os


base_dir = '/content/Skin_Data'

data = []

# Function to add images from a directory to the list with a label
def add_images_from_directory(directory, label):
    for filename in os.listdir(directory):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            file_path = os.path.join(directory, filename)
            data.append((file_path, label))

# Add cancer images
add_images_from_directory(os.path.join(base_dir, 'Cancer/Training'), 'Cancer')
add_images_from_directory(os.path.join(base_dir, 'Cancer/Testing'), 'Cancer')

# Add non-cancer images
add_images_from_directory(os.path.join(base_dir, 'Non_Cancer/Training'), 'Non_Cancer')
add_images_from_directory(os.path.join(base_dir, 'Non_Cancer/Testing'), 'Non_Cancer')

# Create a DataFrame
df = pd.DataFrame(data, columns=['Filepath', 'Label'])

# Shuffle the DataFrame
df = df.sample(frac=1).reset_index(drop=True)

df.head()

Unnamed: 0,Filepath,Label
0,/content/Skin_Data/Cancer/Testing/2535-3q1.JPG,Cancer
1,/content/Skin_Data/Non_Cancer/Testing/1839-02.JPG,Non_Cancer
2,/content/Skin_Data/Non_Cancer/Testing/761-01.JPG,Non_Cancer
3,/content/Skin_Data/Non_Cancer/Testing/1359-2.jpg,Non_Cancer
4,/content/Skin_Data/Cancer/Testing/1969-2.JPG,Cancer


In [57]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# TTS with 80/20 split
train_df, test_df = train_test_split(df, test_size=0.2)

img_height=150
img_width=150
batch_size=32

# Set up image generators to read images from dataframe
#and to automatically perform some augmentations as well
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)


test_datagen = ImageDataGenerator(rescale=1./255)

# Create generators to read images from the dataframe for both train and test
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary'
)

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(img_height, img_width, 3)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')]
)

Cnn = model.fit(
    train_generator,
    validation_data=test_generator,
    epochs=50,
    steps_per_epoch=len(train_generator),
    validation_steps=len(test_generator),verbose=0
)

Found 230 validated image filenames belonging to 2 classes.
Found 58 validated image filenames belonging to 2 classes.


In [67]:
#Printing out loss and accuracy for test
eval_result = model.evaluate(test_generator, steps=test_generator.samples // batch_size)

print('Test loss:', eval_result[0])
print('Test accuracy:', eval_result[1])

Test loss: 0.2456779032945633
Test accuracy: 0.90625


In [66]:
#Printing out loss and accuracy for test

eval_result = model.evaluate(train_generator, steps=train_generator.samples // batch_size)

print('Train loss:', eval_result[0])
print('Train accuracy:', eval_result[1])

Train loss: 0.20332637429237366
Train accuracy: 0.9151785969734192


In [60]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model


# Load the MobileNetV2 model without the head
base_model = MobileNetV2(weights='imagenet', include_top=False,
                         input_shape=(img_height, img_width, 3))

# Freeze the layers of the model
for layer in base_model.layers:
    layer.trainable = False

# Create the custom head
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

# Combine the base and the head I made together
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy', tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')])

# Train the model
tLearning = model.fit(
    train_generator,
    validation_data=test_generator,
    epochs=10,
    steps_per_epoch=len(train_generator),
    validation_steps=len(test_generator),verbose=0
)



In [65]:
#Printing out loss and accuracy for test
eval_result = model.evaluate(test_generator, steps=test_generator.samples // batch_size)

print('Test loss:', eval_result[0])
print('Test accuracy:', eval_result[1])

Test loss: 0.20563030242919922
Test accuracy: 0.90625


In [62]:
#Printing out loss and accuracy for train
eval_result = model.evaluate(train_generator, steps=train_generator.samples // batch_size)

print('Train loss:', eval_result[0])
print('Train accuracy:', eval_result[1])

Train loss: 0.16589124500751495
Train accuracy: 0.9330357313156128
