In [4]:
import PIL
import torch
from torchvision import transforms as T
import pandas as pd
import numpy as np
np.random.seed(0)

We first load the dataset and analyse whether there are any images with two or more labels.

In [2]:
isic_labels = pd.read_csv("datasets/isic_2019/ISIC_2019_Training_GroundTruth.csv")

In [3]:
isic_label_values = isic_labels.iloc[:, 1:].values
any_double_labels = np.any(np.sum(isic_label_values, axis=1) > 1)
print(f"Are there any images with two labels? {any_double_labels}")

Are there any images with two labels? False


So all images ahve unique labels. Now let's take a look at formating the labels correctly for each image as well as take a look at the distribution of labels.

In [4]:
image_names = isic_labels.iloc[:, 0]
image_labels = isic_labels.iloc[:, 1:].idxmax(axis=1)
isic_label_info = pd.concat([image_names, image_labels], axis=1).rename(columns={0: "label_name"})
isic_label_info["label"] = isic_label_info["label_name"].map({"MEL": 0, "NV": 1, "BCC": 2, "AK": 3, "BKL": 4, "DF": 5, "VASC": 6, "SCC": 7, "UNK": 8})
isic_label_info

Unnamed: 0,image,label_name,label
0,ISIC_0000000,NV,1
1,ISIC_0000001,NV,1
2,ISIC_0000002,MEL,0
3,ISIC_0000003,NV,1
4,ISIC_0000004,MEL,0
...,...,...,...
25326,ISIC_0073247,BCC,2
25327,ISIC_0073248,BKL,4
25328,ISIC_0073249,MEL,0
25329,ISIC_0073251,NV,1


In [5]:
counts = isic_label_info.drop(columns=["image"]).groupby(["label"]).count()
counts

Unnamed: 0_level_0,label_name
label,Unnamed: 1_level_1
0,4522
1,12875
2,3323
3,867
4,2624
5,239
6,253
7,628


Pretty bad distribution. WE can solve this by first limiting label 0 and 1 to 3000 images each and then sample randomly from each class with a weighted distribution.

In [6]:
# keep only the first 3000 examples of label 1 (NV) from the dataset isic_label_info
is_label_1 = isic_label_info[isic_label_info["label"] == 1]
is_not_label_1 = isic_label_info[isic_label_info["label"] != 1]
random_label_1_indices = np.random.randint(0, len(is_label_1), 3000)
isic_label_info_tmp = pd.concat([is_not_label_1, is_label_1.iloc[random_label_1_indices, :]])

# keep only the first 3000 examples of label 0 (MEL) from the dataset isic_label_info
is_label_0 = isic_label_info_tmp[isic_label_info_tmp["label"] == 0]
is_not_label_0 = isic_label_info_tmp[isic_label_info_tmp["label"] != 0]
random_label_0_indices = np.random.randint(0, len(is_label_0), 3000)
isic_label_info_balanced = pd.concat([is_not_label_0, is_label_0.iloc[random_label_0_indices, :]])

In [7]:
isic_label_info_balanced.drop(columns=["image"]).groupby(["label"]).count()

Unnamed: 0_level_0,label_name
label,Unnamed: 1_level_1
0,3000
1,3000
2,3323
3,867
4,2624
5,239
6,253
7,628


So far so good, now lets create a vector of probabilities for each individual image. This can be used with the WeightedRandomSampler in pytorch.

In [8]:
balanced_label_counts = isic_label_info_balanced.drop(columns=["image"]).groupby(["label"]).count()["label_name"].values
highest_label_count = np.repeat(np.max(balanced_label_counts), len(balanced_label_counts))
sample_ratios = highest_label_count / balanced_label_counts
sample_probabilities = sample_ratios / np.sum(sample_ratios)
sample_probabilities

array([0.02725287, 0.02725287, 0.02460386, 0.0943006 , 0.03115801,
       0.34208628, 0.32315661, 0.13018889])

Now we can combine this with the info table to creat the final metadata table.

In [9]:
isic_meta_table = isic_label_info_balanced.copy()
isic_meta_table["sample_prob"] = sample_probabilities[isic_meta_table["label"].values]
isic_meta_table

Unnamed: 0,image,label_name,label,sample_prob
1008,ISIC_0010491,BKL,4,0.031158
1234,ISIC_0012086_downsampled,BKL,4,0.031158
1236,ISIC_0012090_downsampled,BKL,4,0.031158
1243,ISIC_0012103_downsampled,BKL,4,0.031158
1249,ISIC_0012117_downsampled,BKL,4,0.031158
...,...,...,...,...
13713,ISIC_0054747,MEL,0,0.027253
15303,ISIC_0057307,MEL,0,0.027253
21846,ISIC_0067747,MEL,0,0.027253
15755,ISIC_0058059,MEL,0,0.027253


Finally lets create a function that will take the original meta data table and create our preferred one.

In [10]:
def create_isic_ground_truth_table(filepath, save_result=True, save_path=None):
    isic_labels = pd.read_csv(filepath)
    
    # Format the data into a table having the image name, the label name and the label
    image_names = isic_labels.iloc[:, 0]
    image_labels = isic_labels.iloc[:, 1:].idxmax(axis=1)
    isic_label_info = pd.concat([image_names, image_labels], axis=1).rename(columns={0: "label_name"})
    isic_label_info["label"] = isic_label_info["label_name"].map(
        {"MEL": 0, "NV": 1, "BCC": 2, "AK": 3, "BKL": 4, "DF": 5, "VASC": 6, "SCC": 7, "UNK": 8}
    ).astype(np.int32)
    isic_label_info
    
    # keep only the first 3000 examples of label 1 (NV) from the dataset isic_label_info
    is_label_1 = isic_label_info[isic_label_info["label"] == 1]
    is_not_label_1 = isic_label_info[isic_label_info["label"] != 1]
    random_label_1_indices = np.random.randint(0, len(is_label_1), 3000)
    isic_label_info_tmp = pd.concat([is_not_label_1, is_label_1.iloc[random_label_1_indices, :]])

    # keep only the first 3000 examples of label 0 (MEL) from the dataset isic_label_info
    is_label_0 = isic_label_info_tmp[isic_label_info_tmp["label"] == 0]
    is_not_label_0 = isic_label_info_tmp[isic_label_info_tmp["label"] != 0]
    random_label_0_indices = np.random.randint(0, len(is_label_0), 3000)
    isic_label_info_balanced = pd.concat([is_not_label_0, is_label_0.iloc[random_label_0_indices, :]])
    
    # Create sample probabilities
    balanced_label_counts = isic_label_info_balanced.drop(columns=["image"]).groupby(["label"]).count()["label_name"].values
    highest_label_count = np.repeat(np.max(balanced_label_counts), len(balanced_label_counts))
    sample_ratios = highest_label_count / balanced_label_counts
    sample_probabilities = sample_ratios / np.sum(sample_ratios)
    
    isic_gt_table = isic_label_info_balanced.copy()
    isic_gt_table["sample_prob"] = sample_probabilities[isic_gt_table["label"].values]
    
    if save_result and save_path is not None:
        isic_gt_table.to_csv(save_path, index=False)
    
    return isic_gt_table

In [11]:
isic_gt = create_isic_ground_truth_table("datasets/isic_2019/ISIC_2019_Training_GroundTruth.csv", save_path="datasets/isic_2019/isic_2019_ground_truth.csv")
len(isic_gt)

13934

### Analysis of ISIC images

In [12]:
example_image = PIL.Image.open("datasets/isic_2019/images/ISIC_2019_Training_Input/ISIC_0000000.jpg")
example_image = example_image.convert("RGB")
print(f"size of example image: {example_image.size}")

size of example image: (1022, 767)


Thus we see remarkably larger images than we did with CIFAR. Lets now discuss how to appropriately handle these images. We suggest resizing such that the shortest side is 256 pixels, and afterwards to a random crop as suggested in BiT. This will allow us to use a pretrained model with a 224x224 input size. We will also use the same normalization as before.

We resize such that the shortest side is 256 pixels and then take a random crop of size 224x224 because we want to keep the aspect ratio of the image.

The discussion of this should mainly focus on center crop vs random crop. While we assume that all images are adequately centered, making a centered crop a good option. One could also argue that a random crop introduces more varied data and lets our models learn more robust features. We will use a random crop for now, but may change to center crop later. If we change, it will be noted below.

In terms of output sizes, since both models can handle 224x224 images and the images in the dataset are of high resolution, we will scale down to 224x224 for both models.

In [13]:
# def get_cifar10_feature_extractor(image_size=(224, 224)):
#     return T.Compose([
#         T.PILToTensor(),
#         T.Resize(image_size, T.InterpolationMode.BILINEAR, antialias=False),
#         T.ConvertImageDtype(torch.float32),
#         T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#     ])

im = example_image
im_width, im_height = im.size
im_smallest_size = np.min(im.size)
resize_ratio = 256 / im_smallest_size

resize_width = int(im_width * resize_ratio)
resize_height = int(im_height * resize_ratio)

isic_transform = T.Compose([
    T.ToTensor(),
    T.Resize(256, T.InterpolationMode.BILINEAR, antialias=False),
    T.RandomCrop((224, 224)),
    T.ConvertImageDtype(torch.float32),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [14]:
isic_transform(example_image).shape

torch.Size([3, 224, 224])

Thus we can define a feature extractor for the ISIC 2019 dataset as the following:

In [15]:
def get_isic_2019_feature_extractor(image_size=224):
    return T.Compose([
        T.PILToTensor(),
        T.Resize(image_size, T.InterpolationMode.BILINEAR, antialias=False),
        T.RandomCrop((224, 224)),
        T.ConvertImageDtype(torch.float32),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])


### Loading the ISIC 2019 dataset using the python file

We have now used this to create a python file with the dataset and the feature extractor build in.

In [2]:
import isic_2019

In [3]:
isic_2019_dataset = isic_2019.ISIC2019Dataset(
    root="~/datasets/isic_2019"
)
isic_2019_dataset[0]

(tensor([[[0.4980, 0.4863, 0.4784,  ..., 0.5020, 0.4980, 0.4980],
          [0.4824, 0.4784, 0.4784,  ..., 0.5020, 0.4980, 0.4980],
          [0.4824, 0.4824, 0.4863,  ..., 0.5020, 0.5020, 0.4980],
          ...,
          [0.4706, 0.4784, 0.4824,  ..., 0.5059, 0.5059, 0.5059],
          [0.4706, 0.4745, 0.4824,  ..., 0.5137, 0.5137, 0.5137],
          [0.4706, 0.4784, 0.4863,  ..., 0.5137, 0.5137, 0.5137]],
 
         [[0.4549, 0.4431, 0.4353,  ..., 0.4980, 0.4941, 0.4941],
          [0.4431, 0.4353, 0.4353,  ..., 0.4980, 0.4941, 0.4941],
          [0.4431, 0.4431, 0.4431,  ..., 0.4980, 0.4980, 0.4941],
          ...,
          [0.4392, 0.4471, 0.4510,  ..., 0.4863, 0.4863, 0.4863],
          [0.4392, 0.4431, 0.4510,  ..., 0.4941, 0.4941, 0.4941],
          [0.4392, 0.4471, 0.4549,  ..., 0.4941, 0.4941, 0.4941]],
 
         [[0.4471, 0.4353, 0.4275,  ..., 0.4902, 0.4863, 0.4863],
          [0.4353, 0.4275, 0.4275,  ..., 0.4902, 0.4863, 0.4863],
          [0.4353, 0.4353, 0.4353,  ...,

In [6]:
img = PIL.Image.open("datasets/isic_2019old/images/ISIC_2019_Training_Input/ISIC_0000000.jpg")