In [None]:
# Install and setup Kaggle API
!pip install kaggle

from google.colab import files
files.upload()

import os

# Make the Kaggle directory
os.makedirs(os.path.expanduser("~/.kaggle"), exist_ok=True)

# Move the kaggle.json file to the created directory
!cp kaggle.json ~/.kaggle/

# Set permissions for the file
!chmod 600 ~/.kaggle/kaggle.json

# Download the dataset
!kaggle datasets download -d andrewmvd/isic-2019

import zipfile

# Ensure the file exists before trying to unzip
if os.path.exists('isic-2019.zip'):
    with zipfile.ZipFile('isic-2019.zip', 'r') as zip_ref:
        zip_ref.extractall('data')
else:
    print("Zip file not found!")



Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/andrewmvd/isic-2019
License(s): Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)
Downloading isic-2019.zip to /content
100% 9.10G/9.10G [08:35<00:00, 19.8MB/s]
100% 9.10G/9.10G [08:35<00:00, 18.9MB/s]


In [None]:
!pip install efficientnet-pytorch
!pip install torchattacks

import torch
import random
import torch.nn as nn
import torch.nn.functional as F
from torchvision.utils import save_image
import torchattacks
import os
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from torch.utils.data import random_split
from efficientnet_pytorch import EfficientNet
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from torch.optim.lr_scheduler import StepLR

Collecting efficientnet-pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->efficientnet-pytorch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->efficientnet-pytorch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->efficientnet-pytorch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->efficientnet-pytorch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->efficientnet-pytorch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.

In [None]:
# Load the datasets
ground_truth = pd.read_csv('data/ISIC_2019_Training_GroundTruth.csv')
metadata = pd.read_csv('data/ISIC_2019_Training_Metadata.csv')

# Merge the DataFrames based on the 'image' column
merge_df = pd.merge(ground_truth, metadata, on='image').drop('lesion_id', axis=1)

# Identify the columns corresponding to each diagnosis
diagnosis_cols = ['MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC', 'UNK']

# Create a map from column names to numeric targets
target_map = {
    'MEL': 0,
    'NV': 1,
    'BCC': 2,
    'AK': 3,
    'BKL': 4,
    'DF': 5,
    'VASC': 6,
    'SCC': 7,
    'UNK': 8
}

# Create a map from numeric targets to human-readable diagnosis
diagnosis_map = {
    0: 'Melanoma',
    1: 'Melanocytic nevus',
    2: 'Basal cell carcinoma',
    3: 'Actinic keratosis',
    4: 'Benign keratosis',
    5: 'Dermatofibroma',
    6: 'Vascular lesion',
    7: 'Squamous cell carcinoma',
    8: 'Unknown'
}

# Create a target column by finding the column with the maximum value in each row and mapping to numeric values
merge_df['target'] = merge_df[diagnosis_cols].idxmax(axis=1).map(target_map)

# Map the numeric target column to the diagnosis
merge_df['diagnosis'] = merge_df['target'].map(diagnosis_map)

# Select relevant columns
df = merge_df.loc[:, ['image', 'target', 'diagnosis']]

# Create the path to the images
df['images'] = 'data/ISIC_2019_Training_Input/ISIC_2019_Training_Input/' + df['image'] + '.jpg'

# Optionally, replace certain anatom_site_general values
df['anatom_site_general'] = merge_df['anatom_site_general'].replace(['lateral torso', 'anterior torso', 'posterior torso'], 'torso')

# Display the first few rows
print(df.head())


          image  target          diagnosis  \
0  ISIC_0000000       1  Melanocytic nevus   
1  ISIC_0000001       1  Melanocytic nevus   
2  ISIC_0000002       0           Melanoma   
3  ISIC_0000003       1  Melanocytic nevus   
4  ISIC_0000004       0           Melanoma   

                                              images anatom_site_general  
0  data/ISIC_2019_Training_Input/ISIC_2019_Traini...               torso  
1  data/ISIC_2019_Training_Input/ISIC_2019_Traini...               torso  
2  data/ISIC_2019_Training_Input/ISIC_2019_Traini...     upper extremity  
3  data/ISIC_2019_Training_Input/ISIC_2019_Traini...     upper extremity  
4  data/ISIC_2019_Training_Input/ISIC_2019_Traini...               torso  


In [None]:
# Custom Dataset Class
class MelanomaDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx]['images']
        image = Image.open(img_path)
        if self.transform:
            image = self.transform(image)
        label = self.df.iloc[idx]['target']
        return image, label

# Data Augmentation and Normalization for training
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

transform = transforms.Compose([
    transforms.Resize((224,224)),  # Resize images to 224x224
    transforms.ToTensor(),           # Convert images to PyTorch tensors
])


# Splitting the data into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# Creating datasets and dataloaders
train_dataset = MelanomaDataset(train_df, transform=transform)
val_dataset = MelanomaDataset(val_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=12)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=12)




In [None]:
class MelanomaModel(nn.Module):
    def __init__(self, out_size, dropout_prob=0.5):
        super(MelanomaModel, self).__init__()
        self.efficient_net = EfficientNet.from_pretrained('efficientnet-b0')
        self.efficient_net._fc = nn.Identity()  # Remove the original fully connected layer
        self.fc1 = nn.Linear(1280, 512)  # EfficientNet-b0 output features
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, out_size)
        self.dropout = nn.Dropout(dropout_prob)  # Dropout layer
        self.loss_fn = nn.CrossEntropyLoss()  # Assuming classification task

    def forward(self, x):
        x = self.efficient_net(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

# Instantiate model
num_classes = 9 # Number of classes


# Display the first few rows
print(df.head())

# Count the total number of images in each class
class_counts = df['target'].value_counts().sort_index()

# Print the total number of images in each class
for label, count in class_counts.items():
    print(f"Class {label} ({diagnosis_map[label]}): {count} images")



          image  target          diagnosis  \
0  ISIC_0000000       1  Melanocytic nevus   
1  ISIC_0000001       1  Melanocytic nevus   
2  ISIC_0000002       0           Melanoma   
3  ISIC_0000003       1  Melanocytic nevus   
4  ISIC_0000004       0           Melanoma   

                                              images anatom_site_general  
0  data/ISIC_2019_Training_Input/ISIC_2019_Traini...               torso  
1  data/ISIC_2019_Training_Input/ISIC_2019_Traini...               torso  
2  data/ISIC_2019_Training_Input/ISIC_2019_Traini...     upper extremity  
3  data/ISIC_2019_Training_Input/ISIC_2019_Traini...     upper extremity  
4  data/ISIC_2019_Training_Input/ISIC_2019_Traini...               torso  
Class 0 (Melanoma): 4522 images
Class 1 (Melanocytic nevus): 12875 images
Class 2 (Basal cell carcinoma): 3323 images
Class 3 (Actinic keratosis): 867 images
Class 4 (Benign keratosis): 2624 images
Class 5 (Dermatofibroma): 239 images
Class 6 (Vascular lesion): 253 images


In [None]:
import os
import shutil
import numpy as np
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Number of images we need in total and the number taken from class 5
total_images_needed = 10000
class_5_images = 239

# Subtract class 5 images from total
remaining_images_needed = total_images_needed - class_5_images

# Number of images in each class (excluding class 5)
class_counts = {
    0: 4522,
    1: 12875,
    2: 3323,
    3: 867,
    4: 2624,
    6: 253,
    7: 628,
    8: 0  # Assuming no images in class 8 for simplicity; update if necessary
}

# Calculate the proportion of images to sample from each class
total_other_class_images = sum(class_counts.values())
class_proportions = {key: val / total_other_class_images for key, val in class_counts.items()}

# Calculate the number of images to sample from each class
class_samples = {key: int(proportion * remaining_images_needed) for key, proportion in class_proportions.items()}

# Adjust the last class to make sure the total sums up to remaining_images_needed
class_samples[0] += remaining_images_needed - sum(class_samples.values())

# Include class 5 samples
class_samples[5] = class_5_images

# Create test dataset by sampling the calculated number of images from each class
test_df_list = []
for label, count in class_samples.items():
    test_df_list.append(df[df['target'] == label].sample(count, random_state=42))

test_df = pd.concat(test_df_list).reset_index(drop=True)

# Ensure the total number of images is 10,000
print(f"Total images in test dataset: {len(test_df)}")

# Save the test dataset to Google Drive
test_dataset_path = '/content/drive/My Drive/testdataset'

# Create the main testdataset directory if it doesn't exist
os.makedirs(test_dataset_path, exist_ok=True)

# Create class-specific directories and save images
for label in test_df['target'].unique():
    class_dir = os.path.join(test_dataset_path, str(label))
    os.makedirs(class_dir, exist_ok=True)
    for _, row in test_df[test_df['target'] == label].iterrows():
        src = row['images']
        dst = os.path.join(class_dir, os.path.basename(src))
        shutil.copy(src, dst)

# Print the number of images inside every class of the test dataset
for label in test_df['target'].unique():
    class_dir = os.path.join(test_dataset_path, str(label))
    print(f"Class {label} ({diagnosis_map[label]}): {len(os.listdir(class_dir))} images")

# Print the total number of images in the test dataset
total_test_images = sum(len(os.listdir(os.path.join(test_dataset_path, str(label)))) for label in test_df['target'].unique())
print(f"Total images in the test dataset: {total_test_images}")

Mounted at /content/drive
Total images in test dataset: 10000
Class 0 (Melanoma): 1762 images
Class 1 (Melanocytic nevus): 5008 images
Class 2 (Basal cell carcinoma): 1292 images
Class 3 (Actinic keratosis): 337 images
Class 4 (Benign keratosis): 1020 images
Class 6 (Vascular lesion): 98 images
Class 7 (Squamous cell carcinoma): 244 images
Class 5 (Dermatofibroma): 239 images
Total images in the test dataset: 10000


In [None]:
import os
# Directory containing test images
test_dir = '/content/drive/My Drive/testdataset/7'

# List all image files in the directory
image_paths = [os.path.join(test_dir, filename) for filename in os.listdir(test_dir) if os.path.isfile(os.path.join(test_dir, filename)) and filename.endswith(('.jpg', '.jpeg', '.png'))]

# Display number of images found
print(f"Found {len(image_paths)} images in {test_dir}")


Found 244 images in /content/drive/My Drive/testdataset/7


In [None]:
class MelanomaModel(nn.Module):
    def __init__(self, out_size, dropout_prob=0.5):
        super(MelanomaModel, self).__init__()
        self.efficient_net = EfficientNet.from_pretrained('efficientnet-b0')
        self.efficient_net._fc = nn.Identity()  # Remove the original fully connected layer
        self.fc1 = nn.Linear(1280, 512)  # EfficientNet-b0 output features
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, out_size)
        self.dropout = nn.Dropout(dropout_prob)  # Dropout layer
        self.loss_fn = nn.CrossEntropyLoss()  # Assuming classification task

    def forward(self, x):
        x = self.efficient_net(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

# Instantiate model
num_classes = 9 # Number of classes

from google.colab import drive
drive.mount('/content/drive')

# Load the saved model and weights from Google Drive
file_path = '/content/drive/MyDrive/multi_weight2.pth'
checkpoint = torch.load(file_path, map_location=torch.device('cpu'))
model = checkpoint['architecture']
model.load_state_dict(checkpoint['model_state_dict'])

# Move the model to the appropriate device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

print("Model and weights have been loaded from Google Drive:", file_path)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Model and weights have been loaded from Google Drive: /content/drive/MyDrive/multi_weight2.pth


In [None]:
transform = transforms.Compose([
    transforms.Resize((244,244)),
    transforms.ToTensor()
])

test_dataset = datasets.ImageFolder('/content/drive/MyDrive/testdataset', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=12)
class_names = test_dataset.classes
print(class_names)


['0', '1', '2', '3', '4', '5', '6', '7']




In [None]:
from tqdm.notebook import tqdm

# Define the PGD attack using torchattacks
epsilon = 0.04  # Maximum perturbation
alpha = epsilon / 10  # Step size
steps = 80  # Number of steps
attack_PGD = torchattacks.PGD(model, eps=epsilon, alpha=alpha, steps=steps)

# Directory to save adversarial examples
adv_dataset_path = '/content/drive/MyDrive/adv_testdataset_PGD'
os.makedirs(adv_dataset_path, exist_ok=True)

# Function to get the correct save path
def get_save_path(original_path, base_dir, label):
    label_dir = os.path.join(base_dir, str(label))
    os.makedirs(label_dir, exist_ok=True)
    return os.path.join(label_dir, os.path.basename(original_path))

# Perform PGD attack and save adversarial examples
for batch_idx, (inputs, labels) in enumerate(tqdm(test_loader)):
    inputs, labels = inputs.to(device), labels.to(device)
    adv_inputs = attack_PGD(inputs, labels)

    for i in range(adv_inputs.size(0)):
        adv_img = adv_inputs[i].cpu().detach()
        original_img_path = test_dataset.samples[batch_idx * test_loader.batch_size + i][0]
        original_label = test_dataset.samples[batch_idx * test_loader.batch_size + i][1]
        adv_img_path = get_save_path(original_img_path, adv_dataset_path, original_label)
        save_image(adv_img, adv_img_path)

# Print message when completed
print("Adversarial examples saved successfully.")

  0%|          | 0/313 [00:00<?, ?it/s]

Adversarial examples saved successfully.


In [None]:
import os
# Directory containing test images
test_dir = '/content/drive/My Drive/adv_testdataset_PGD/1'

# List all image files in the directory
image_paths = [os.path.join(test_dir, filename) for filename in os.listdir(test_dir) if os.path.isfile(os.path.join(test_dir, filename)) and filename.endswith(('.jpg', '.jpeg', '.png'))]

# Display number of images found
print(f"Found {len(image_paths)} images in {test_dir}")


Found 5008 images in /content/drive/My Drive/adv_testdataset_PGD/1


In [None]:
from tqdm.notebook import tqdm

# Import BIM attack from torchattacks
epsilon = 0.04  # Maximum perturbation
alpha = epsilon / 10  # Step size
steps = 10  # Number of steps (iterations)
attack_BIM = torchattacks.BIM(model, eps=epsilon, alpha=alpha, steps=steps)

# Directory to save adversarial examples
adv_dataset_path = '/content/drive/MyDrive/adv_testdataset_BIM'
os.makedirs(adv_dataset_path, exist_ok=True)

# Function to get the correct save path
def get_save_path(original_path, base_dir, label):
    label_dir = os.path.join(base_dir, str(label))
    os.makedirs(label_dir, exist_ok=True)
    return os.path.join(label_dir, os.path.basename(original_path))

# Perform BIM attack and save adversarial examples
for batch_idx, (inputs, labels) in enumerate(tqdm(test_loader)):
    inputs, labels = inputs.to(device), labels.to(device)
    adv_inputs = attack_BIM(inputs, labels)

    for i in range(adv_inputs.size(0)):
        adv_img = adv_inputs[i].cpu().detach()
        original_img_path = test_dataset.samples[batch_idx * test_loader.batch_size + i][0]
        original_label = test_dataset.samples[batch_idx * test_loader.batch_size + i][1]
        adv_img_path = get_save_path(original_img_path, adv_dataset_path, original_label)
        save_image(adv_img, adv_img_path)

# Print message when completed
print("Adversarial examples saved successfully.")


  0%|          | 0/313 [00:00<?, ?it/s]

Adversarial examples saved successfully.


In [None]:
import os
# Directory containing test images
test_dir = '/content/drive/My Drive/adv_testdataset_BIM/6'

# List all image files in the directory
image_paths = [os.path.join(test_dir, filename) for filename in os.listdir(test_dir) if os.path.isfile(os.path.join(test_dir, filename)) and filename.endswith(('.jpg', '.jpeg', '.png'))]

# Display number of images found
print(f"Found {len(image_paths)} images in {test_dir}")


Found 98 images in /content/drive/My Drive/adv_testdataset_BIM/6
