In [1]:
import os
from pathlib import Path
from glob import glob

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import cv2
from PIL import Image

In [2]:
DATA_PATH = Path("../input/siim-isic-melanoma-classification")
TRAIN_PATH = DATA_PATH / "train.csv"
TEST_PATH = DATA_PATH / "test.csv"
IMAGE_TRAIN_PATH = DATA_PATH / "jpeg" / "train"
IMAGE_TEST_PATH = DATA_PATH / "jpeg" / "test"

In [4]:
train_df = pd.read_csv(TRAIN_PATH)
train_df.head()

## Visualizing benign and malignant melanoma

In [36]:
def plot_melanoma(target=0, body_part="torso", n=20, random_state=0):
    image_list = train_df[(train_df["target"] == target) & (train_df["anatom_site_general_challenge"] == body_part)].sample(n=n, random_state=random_state)["image_name"]
    image_list = list(image_list.map(lambda x: IMAGE_TRAIN_PATH / Path(x + ".jpg")))
    
    if n > 5:
        fig, axes = plt.subplots(5, 4, figsize=(15, 15))
        for i in range(5):
            for j in range(4):
                img = cv2.imread(str(image_list[i * 4 + j]))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                axes[i][j].imshow(img)
    else:
        fig, axes = plt.subplots(1, n, figsize=(15, 3))
        for i in range(n):
            img = cv2.imread(str(image_list[i]))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            axes[i].imshow(img)
            
    plt.suptitle("Melanoma (target=%s) on %s " % (target, body_part), fontsize=17, fontweight="bold")
    plt.tight_layout()

In [31]:
plot_melanoma(0)

In [32]:
plot_melanoma(1)

## Visualizing body parts

In [37]:
plot_melanoma(target=0, body_part="head/neck", n=5)

In [38]:
plot_melanoma(target=1, body_part="head/neck", n=5)

In [40]:
plot_melanoma(target=0, body_part="upper extremity", n=5)

In [41]:
plot_melanoma(target=1, body_part="upper extremity", n=5)

In [42]:
plot_melanoma(target=0, body_part="lower extremity", n=5)

In [43]:
plot_melanoma(target=1, body_part="lower extremity", n=5)

In [44]:
plot_melanoma(target=0, body_part="palms/soles", n=5)

In [45]:
plot_melanoma(target=1, body_part="palms/soles", n=5)

In [46]:
plot_melanoma(target=0, body_part="oral/genital", n=3)

In [47]:
plot_melanoma(target=1, body_part="oral/genital", n=3)

- Generally speaking, malignant melanoma look darker and more asymmetric than benign ones. 

Some melanoma pictures exhibit hairs and a tiny medical scale. All of those constitute good ideas for data augmentation.
It is worth noting that this tiny medical scale appears more predominantly on malignant images, which might constitute a form
of leakage.

- Other ideas for future data augmentation: change in contrast, saturation, random cropping.