In [1]:

import os
import pandas as pd
import numpy as np
import shutil
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

image_data_url = "https://isic-challenge-data.s3.amazonaws.com/2020/ISIC_2020_Training_JPEG.zip"
metadata_url = "https://isic-challenge-data.s3.amazonaws.com/2020/ISIC_2020_Training_GroundTruth.csv"

data_dir = './data'

if not os.path.exists(os.path.join(data_dir, 'ISIC_2020_Training_JPEG')):
    os.system(f"wget {image_data_url} -P {data_dir}")
    os.system(f"unzip {os.path.join(data_dir, 'ISIC_2020_Training_JPEG.zip')} -d {data_dir}")

metadata = pd.read_csv(metadata_url)



In [2]:

metadata['known'] = (metadata['diagnosis'] != 'unknown').astype(str)

# Identify classes with very few samples and group them as 'unknown'
class_counts = metadata['diagnosis'].value_counts()
rare_classes = class_counts[class_counts < 2].index
metadata['diagnosis'] = metadata['diagnosis'].apply(lambda x: 'unknown' if x in rare_classes else x)


class_counts = metadata['diagnosis'].value_counts()
valid_classes = class_counts[class_counts >= 2].index
metadata_filtered = metadata[metadata['diagnosis'].isin(valid_classes)]
metadata_filtered = shuffle(metadata_filtered, random_state=42)

X_train, X_val, y_train, y_val = train_test_split(
    metadata['image_name'], (metadata['diagnosis'] != 'unknown').astype(int),  # Use 'diagnosis' for labels
    test_size=0.2, random_state=42, stratify=metadata['diagnosis']
)

train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'validation')

print("X_train (image names):")
print(X_train.head())

print("\ny_train (labels based on 'diagnosis'):")
print(y_train.head())

for class_name in ['known', 'unknown']:
    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)

for image_name, label in zip(X_train, y_train):
    class_name = 'known' if label == 1 else 'unknown'  # Corrected class assignment
    source_path = os.path.join(data_dir, 'train', f'{image_name}.jpg')  # Updated path to 'train' folder
    destination_path = os.path.join(train_dir, class_name, f'{image_name}.jpg')

    # Ensure the source image exists before copying
    if os.path.exists(source_path):
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
        shutil.copy(source_path, destination_path)
    else:
        print(f"Image not found: {source_path}")

for image_name, label in zip(X_val, y_val):
    class_name = 'known' if label == 1 else 'unknown'
    source_path = os.path.join(data_dir, 'train', f'{image_name}.jpg')  # Updated path to 'train' folder
    destination_path = os.path.join(val_dir, class_name, f'{image_name}.jpg')

    # Ensure the source image exists before copying
    if os.path.exists(source_path):
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
        shutil.copy(source_path, destination_path)
    else:
        print(f"Image not found: {source_path}")

class_distribution_train = y_train.value_counts()
print("Class distribution in the training set:\n", class_distribution_train)

# Display the class distribution in the validation set
class_distribution_val = y_val.value_counts()
print("\nClass distribution in the validation set:\n", class_distribution_val)



X_train (image names):
23336    ISIC_7095039
30562    ISIC_9233009
2621     ISIC_0885962
979      ISIC_0401116
10465    ISIC_3237557
Name: image_name, dtype: object

y_train (labels based on 'diagnosis'):
23336    0
30562    0
2621     0
979      0
10465    0
Name: diagnosis, dtype: int64
Class distribution in the training set:
 0    21700
1     4800
Name: diagnosis, dtype: int64

Class distribution in the validation set:
 0    5426
1    1200
Name: diagnosis, dtype: int64


In [3]:
# Image dimensions and batch size
img_width, img_height = 224, 224
input_shape = (img_width, img_height, 3)
batch_size = 32
class_names = ['known', 'unknown']
# Data augmentation and generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary',
    classes=class_names
)

validation_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_directory(
    val_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary',
    classes=class_names
)



Found 26500 images belonging to 2 classes.
Found 6626 images belonging to 2 classes.


In [4]:
# Model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [5]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

In [6]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size,
    epochs=5,
    callbacks=[EarlyStopping(patience=3)]
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [7]:
# Evaluate the model
evaluation = model.evaluate(validation_generator)
print("Validation Loss:", evaluation[0])
print("Validation Accuracy:", evaluation[1])

Validation Loss: 0.1841851770877838
Validation Accuracy: 0.9414427876472473


In [11]:
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input

image_path = '/content/data/train/ISIC_0074268.jpg'

# Load and preprocess the image
img = image.load_img(image_path, target_size=(img_width, img_height))
img = image.img_to_array(img)
img = preprocess_input(img)
img = np.expand_dims(img, axis=0)  # Add batch dimension

# Predict the class
prediction = model.predict(img)

if prediction > 0.5:
    print("unknown")
else:
    print("known")

unknown


In [None]:
import matplotlib.pyplot as plt  # Import Matplotlib for plotting



# Display a bar graph for class distribution of 'known' and 'unknown'
class_distribution_train = y_train.value_counts()
class_distribution_val = y_val.value_counts()

# Plotting the bar graph
plt.figure(figsize=(10, 6))
plt.bar(class_distribution_train.index.astype(str), class_distribution_train.values, alpha=0.6, label='Train Set')
plt.bar(class_distribution_val.index.astype(str), class_distribution_val.values, alpha=0.6, label='Validation Set')
plt.xlabel('Classes')
plt.ylabel('Count')
plt.title('Class Distribution of Known and Unknown Classes')
plt.legend()
plt.show()




![Alt text](Untitled.png)

In [None]:
import seaborn as sns

# Create a scatterplot to visualize all classes
plt.figure(figsize=(10, 6))
sns.scatterplot(data=metadata_filtered, x=metadata_filtered.index, y="diagnosis", hue="diagnosis", palette="Set1")
plt.xlabel('Data Points')
plt.ylabel('Classes')
plt.title('Scatterplot of All Classes')
plt.legend(loc='upper right', title='Classes')
plt.show()


![Alt text](Untitled-1.png)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
# Count the occurrences of each class
class_distribution = metadata['diagnosis'].value_counts()

# Sort the classes by count (optional)
class_distribution = class_distribution.sort_values(ascending=False)

# Create a bar plot
plt.figure(figsize=(10, 6))
plt.bar(class_distribution.index, class_distribution.values, color='skyblue')
plt.xlabel('Class')
plt.ylabel('Count')
plt.title('Class Distribution')
plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for readability
plt.tight_layout()  # Ensure labels fit within the figure

plt.show()

![Alt text](Untitled-2.png)