Images scraped using selenium from [DermNet NZ Image Library](https://dermnetnz.org/image-library) and [ISIC 2019 Challenge](https://challenge.isic-archive.com/data/#2019)

In [None]:
# install required libraries
!pip install selenium
!pip install webdriver_manager

**import required libraries**

In [None]:
# import required libraries
import selenium
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
import pandas as pd
import re
import time
from IPython.core.display import HTML
import webbrowser
import requests as rq
import os
import pathlib
import pandas as pd

**Scraping Dermnet website**


In [None]:
# chrome driver for dermnetnz
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
url = "https://dermnetnz.org/image-library"
driver.get(url)
driver.maximize_window()

[WDM] - Downloading: 100%|██████████| 6.80M/6.80M [00:02<00:00, 3.24MB/s]


In [None]:
# Finding all the skin_disorders listed on the main page
skin_disorder_tag_main_page = driver.find_elements("xpath", '//a[@class="imageList__group__item"]')

# For loop to extract the skin disorder names and the link to the skin_disorders
disease_label = []
link_list= []
for tag in skin_disorder_tag_main_page:
    disease_label.append(tag.text)
    link_list.append(tag.get_attribute("href"))

# Previewing the lists created:
print(f'The first 10 skin condition names: \n {disease_label[:10]}\n')
print(f'The first 10 links to skin condition image links:\n{link_list[:10]}')

The first 10 skin condition names: 
 ['Acne affecting the back images', 'Acne affecting the face images', 'Acne and other follicular disorder images', 'Acquired dermal macular hyperpigmentation images', 'Acral lentiginous melanoma images', 'Actinic keratosis affecting the face images', 'Actinic keratosis affecting the hand images', 'Actinic keratosis affecting the legs and feet images', 'Actinic keratosis affecting the scalp images', 'Actinic keratosis dermoscopy images']

The first 10 links to skin condition image links:
['https://dermnetnz.org/topics/acne-affecting-the-back-images/?stage=Live', 'https://dermnetnz.org/topics/acne-face-images/?stage=Live', 'https://dermnetnz.org/image-catalogue/acne-and-other-follicular-disorder-images/?stage=Live', 'https://dermnetnz.org/topics/acquired-dermal-macular-hyperpigmentation-images/?stage=Live', 'https://dermnetnz.org/topics/acral-lentiginous-melanoma-images/?stage=Live', 'https://dermnetnz.org/topics/actinic-keratosis-face-images/?stage=Li

In [None]:
# initialize the webdriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# dataframe for disease, URL
dfs = []
for link, disease_label in zip(link_list, disorder_names):
    image_disease = []

    try:
        # navigate to the link
        driver.get(link)

        # maximize the window to ensure all elements are visible
        driver.maximize_window()

        # find all the elements on the page with the class "imageLinkBlock__item__image"
        skin_image_disorder = driver.find_elements("xpath", '//div[@class="imageLinkBlock__item__image"]')

        # loop through each element and find all the images within it
        for image in skin_image_disorder:
            list_ = image.find_elements("tag name", "img")
            for lists in list_:
                # add each image URL to the image_disease list
                image_disease.append(lists.get_attribute("src"))

        # create a list of dictionaries, where each dictionary represents a row in the DataFrame
        # each dictionary contains an image URL and the disease name
        rows = [{'skin_disorder_name': disease_label, 'images': url} for url in image_disease]

        df = pd.DataFrame(rows, columns=['skin_disorder_name', 'images'])
        dfs.append(df)

    # error case
    except Exception as e:
        print(f"Error occurred for {disease_label}: {str(e)}")
        continue

# concatenate and text normalization
result_df = pd.concat(dfs)
result_df['skin_disorder_name'] = result_df['skin_disorder_name'].map(lambda x: x.lower())

# save the result dataframe to a CSV file named "data.csv"
result_df.to_csv('dermnetnz.csv', index=False)

In [None]:
# DataFrame with all the 294 images:
image_df = pd.read_csv('dermnetnz.csv')

# first 5 samples
image_df.head(5)

Unnamed: 0,skin_disorder_name,images
0,acne affecting the back images,https://dermnetnz.org/assets/Uploads/acne/acne...
1,acne affecting the back images,https://dermnetnz.org/assets/Uploads/acne/acne...
2,acne affecting the back images,https://dermnetnz.org/assets/Uploads/acne/acne...


In [None]:
# downloading images
def save_image(folder: str, name: str, url: str, index:int):
    image_source = rq.get(url)

    # file format
    suffix = pathlib.Path(url).suffix
    if suffix not in ['.jpg', '.jpeg', '.png', '.gif']:
        # set to png
        output = name + str(index) + '.png'

    else:
        output = name + str(index) + suffix

    if not os.path.exists(folder):
        os.makedirs(folder)

    with open(f'{folder}{output}', 'wb') as file:
        file.write(image_source.content)
        print(f'Successfully downloaded: {output}')


if __name__ == '__main__':
    df = pd.read_csv('dataset/dermnetnz.csv')

    for index, row in df.iterrows():
        image_url = row['images']
        disease_label = row['skin_disorder_name']

        # saving image
        save_image('Images/', disease_label, image_url, index)

Successfully downloaded: acne affecting the back images0.jpg
Successfully downloaded: acne affecting the back images1.jpg
Successfully downloaded: acne affecting the back images2.jpg
Successfully downloaded: acne affecting the back images3.jpg
Successfully downloaded: acne affecting the back images4.jpg
Successfully downloaded: acne affecting the back images5.jpg
Successfully downloaded: acne affecting the back images6.jpg
Successfully downloaded: acne affecting the back images7.jpg
Successfully downloaded: acne affecting the back images8.jpg
Successfully downloaded: acne affecting the back images9.jpg
Successfully downloaded: acne affecting the back images10.jpg
Successfully downloaded: acne affecting the back images11.jpg
Successfully downloaded: acne affecting the back images12.jpg
Successfully downloaded: acne affecting the back images13.jpg
Successfully downloaded: acne affecting the back images14.jpg
Successfully downloaded: acne affecting the back images15.jpg
Successfully downl