In [None]:
!wget https://cloud.rahulvk.com/s/dEZNYfrS5kkXM3D/download/train.zip


In [None]:
!wget https://cloud.rahulvk.com/s/bNW7Hpp2dcoQgCA/download/valid.zip


In [None]:
!wget https://cloud.rahulvk.com/s/S4ZMK85YGYNRjRW/download/test.zip

In [None]:
!unzip train.zip
!unzip test.zip
!unzip valid.zip

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import MobileNetV2
from keras.models import Sequential
from keras.layers import Dense, GlobalAveragePooling2D
from keras.optimizers import Adam

In [None]:
# Set random seeds consistently
tf.random.set_seed(7)
np.random.seed(7)

# 0 for benign, 1 for malignant
class_names = ["benign", "malignant"]

# Define the function to generate CSV files
def generate_csv(folder, label2int):
    folder_name = Path(folder).name
    labels = list(label2int.keys())
    data_list = []

    for label in labels:
        filepaths = list(Path(folder, label).glob("*"))
        label_int = label2int[label]
        data = [{"filepath": str(filepath), "label": str(label_int)} for filepath in filepaths]
        data_list.extend(data)

    df = pd.DataFrame(data_list)
    output_file = f"{folder_name}.csv"
    df.to_csv(output_file, index=False)
    print(f"Saved {output_file}")

# Generate CSV files
generate_csv("train", {"nevus": 0, "seborrheic_keratosis": 0, "melanoma": 1})
generate_csv("valid", {"nevus": 0, "seborrheic_keratosis": 0, "melanoma": 1})
generate_csv("test", {"nevus": 0, "seborrheic_keratosis": 0, "melanoma": 1})

# Load and preprocess images using data generators
train_metadata_filename = "train.csv"
valid_metadata_filename = "valid.csv"
df_train = pd.read_csv(train_metadata_filename)
df_valid = pd.read_csv(valid_metadata_filename)
n_training_samples = len(df_train)
n_validation_samples = len(df_valid)
print("Number of training samples:", n_training_samples)
print("Number of validation samples:", n_validation_samples)

# Data Augmentation for Training Images
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Data Augmentation for Validation Images
valid_datagen = ImageDataGenerator(rescale=1.0 / 255)

batch_size = 32

# Convert label column values to strings
df_train['label'] = df_train['label'].astype(str)
df_valid['label'] = df_valid['label'].astype(str)

train_generator = train_datagen.flow_from_dataframe(
    df_train,
    x_col="filepath",
    y_col="label",
    target_size=(224, 224),  # Adjust the target size as per MobileNetV2 requirement
    batch_size=batch_size,
    class_mode="binary",
    shuffle=True,
    seed=7
)

valid_generator = valid_datagen.flow_from_dataframe(
    df_valid,
    x_col="filepath",
    y_col="label",
    target_size=(224, 224),  # Adjust the target size as per MobileNetV2 requirement
    batch_size=batch_size,
    class_mode="binary",
    shuffle=False
)


In [None]:

# Create the model
base_model = MobileNetV2(weights='imagenet', include_top=False)
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_generator, validation_data=valid_generator, epochs=10)

# Save the model
save_model_path = 'LatestModel.h5'  # Replace with the desired path to save the model
model.save(save_model_path)

In [None]:
def predict_image_class(img_path, model, threshold=0.5):
  img = tf.keras.preprocessing.image.load_img(img_path, target_size=(299, 299))
  img = tf.keras.preprocessing.image.img_to_array(img)
  img = tf.expand_dims(img, 0) # Create a batch
  img = tf.keras.applications.inception_v3.preprocess_input(img)
  img = tf.image.convert_image_dtype(img, tf.float32)
  predictions = model.predict(img)
  score = predictions.squeeze()
  if score >= threshold:
    print(f"This image is {100 * score:.2f}% malignant.")
  else:
    print(f"This image is {100 * (1 - score):.2f}% benign.")
  plt.imshow(img[0])
  plt.axis('off')
  plt.show()

In [None]:
# loading data
train_metadata_filename = "train.csv"
valid_metadata_filename = "valid.csv"
# load CSV files as DataFrames
df_train = pd.read_csv(train_metadata_filename)
df_valid = pd.read_csv(valid_metadata_filename)
n_training_samples = len(df_train)
n_validation_samples = len(df_valid)
print("Number of training samples:", n_training_samples)
print("Number of validation samples:", n_validation_samples)
train_ds = tf.data.Dataset.from_tensor_slices((df_train["filepath"], df_train["label"]))
valid_ds = tf.data.Dataset.from_tensor_slices((df_valid["filepath"], df_valid["label"]))

In [None]:
# preprocess data
def decode_img(img):
  # convert the compressed string to a 3D uint8 tensor
  img = tf.image.decode_jpeg(img, channels=3)
  # Use `convert_image_dtype` to convert to floats in the [0,1] range.
  img = tf.image.convert_image_dtype(img, tf.float32)
  # resize the image to the desired size.
  return tf.image.resize(img, [299, 299])


def process_path(filepath, label):
  # load the raw data from the file as a string
  img = tf.io.read_file(filepath)
  img = decode_img(img)
  return img, label


valid_ds = valid_ds.map(process_path)
train_ds = train_ds.map(process_path)
# test_ds = test_ds
for image, label in train_ds.take(1):
    print("Image shape:", image.shape)
    print("Label:", label.numpy())

In [None]:
# training parameters
batch_size = 64
optimizer = "rmsprop"

def prepare_for_training(ds, cache=True, batch_size=64, shuffle_buffer_size=1000):
  if cache:
    if isinstance(cache, str):
      ds = ds.cache(cache)
    else:
      ds = ds.cache()
  # shuffle the dataset
  ds = ds.shuffle(buffer_size=shuffle_buffer_size)
  # Repeat forever
  ds = ds.repeat()
  # split to batches
  ds = ds.batch(batch_size)
  # `prefetch` lets the dataset fetch batches in the background while the model
  # is training.
  ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
  return ds

valid_ds = prepare_for_training(valid_ds, batch_size=batch_size, cache="valid-cached-data")
train_ds = prepare_for_training(train_ds, batch_size=batch_size, cache="train-cached-data")

In [None]:
# evaluation
# load testing set
test_metadata_filename = "test.csv"
df_test = pd.read_csv(test_metadata_filename)
n_testing_samples = len(df_test)
print("Number of testing samples:", n_testing_samples)
test_ds = tf.data.Dataset.from_tensor_slices((df_test["filepath"], df_test["label"]))

def prepare_for_testing(ds, cache=True, shuffle_buffer_size=1000):
  if cache:
    if isinstance(cache, str):
      ds = ds.cache(cache)
    else:
      ds = ds.cache()
  ds = ds.shuffle(buffer_size=shuffle_buffer_size)
  return ds

test_ds = test_ds.map(process_path)
test_ds = prepare_for_testing(test_ds, cache="test-cached-data")

In [None]:
# convert testing set to numpy array to fit in memory (don't do that when testing
# set is too large)
y_test = np.zeros((n_testing_samples,))
X_test = np.zeros((n_testing_samples, 299, 299, 3))
for i, (img, label) in enumerate(test_ds.take(n_testing_samples)):
  # print(img.shape, label.shape)
  X_test[i] = img
  y_test[i] = label.numpy()

print("y_test.shape:", y_test.shape)

In [None]:
print("Evaluating the model...")
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print("Loss:", loss, "  Accuracy:", accuracy)

In [None]:
import numpy as np
from keras.models import load_model
from PIL import Image
from keras.preprocessing import image

# Load the saved model for prediction
loaded_model = load_model('LatestModel.h5')

# Load an image for prediction (replace 'path_to_image.jpg' with the actual image path)
#img_path = 'ISIC_2019_Training_Input/ISIC_0000002.jpg'
img_path = 'sc2.jpeg'
img = Image.open(img_path)
img = img.resize((224, 224))  # Resize the image to match the model's input size

# Convert the image to a numpy array
img_array = np.array(img)
img_array = np.expand_dims(img_array, axis=0)

# Make a prediction
prediction = loaded_model.predict(img_array)
print(prediction)
# Convert the prediction to a class label
if prediction[0][0] >= 0.4:
    print("Malignant")
else:
    print("Benign")
