**Importing the required libraries**

In [2]:
import selenium
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
import pandas as pd
import re
import time
from IPython.core.display import HTML
import webbrowser

**Scraping Dermnet website**

In [2]:
# Creating an instance of the Chrome web browser
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# URL to the dermnet website
url = "https://dermnetnz.org/image-library"

# Navigating to the specified url in chrome
driver.get(url)
# maximizing the browser window so that it takes up the entire screen
driver.maximize_window()

In [3]:
# Finding all the skin_disorders listed on the main page
skin_disorder_tag_main_page = driver.find_elements("xpath", '//a[@class="imageList__group__item"]')

# For loop to extract the skin disorder names and the link to the skin_disorders 
disorder_names = [] 
link_list= []
for tag in skin_disorder_tag_main_page:
    disorder_names.append(tag.text)
    link_list.append(tag.get_attribute("href"))

# Previewing the lists created:
print(f'The first 10 skin condition names: \n {disorder_names[:10]}\n')
print(f'The first 10 links to skin condition image links:\n{link_list[:10]}')

The first 10 skin condition names: 
 ['Acne affecting the back images', 'Acne affecting the face images', 'Acne and other follicular disorder images', 'Acquired dermal macular hyperpigmentation images', 'Acral lentiginous melanoma images', 'Actinic keratosis affecting the face images', 'Actinic keratosis affecting the hand images', 'Actinic keratosis affecting the legs and feet images', 'Actinic keratosis affecting the scalp images', 'Actinic keratosis dermoscopy images']

The first 10 links to skin condition image links:
['https://dermnetnz.org/topics/acne-affecting-the-back-images/?stage=Live', 'https://dermnetnz.org/topics/acne-face-images/?stage=Live', 'https://dermnetnz.org/image-catalogue/acne-and-other-follicular-disorder-images/?stage=Live', 'https://dermnetnz.org/topics/acquired-dermal-macular-hyperpigmentation-images/?stage=Live', 'https://dermnetnz.org/topics/acral-lentiginous-melanoma-images/?stage=Live', 'https://dermnetnz.org/topics/actinic-keratosis-face-images/?stage=Li

In [4]:
# The number of skin_disorders listed in the website
print(f'The are {len(disorder_names)} skin conditions listed in the DermNet website.')

The are 294 skin conditions listed in the DermNet website.


In [5]:
# Creating a dataframe with two columns, the skin_disorder names and the links to the images of the skin disorders
name_link_df = pd.DataFrame({'skin_disorder_name': disorder_names, 'link': link_list})

# Saving the dataframe as a csv file
name_link_df.to_csv('Data/name_link.csv', index=False)

# Previewing the first five rows of the dataframe
name_link_df.head()

Unnamed: 0,skin_disorder_name,link
0,Acne affecting the back images,https://dermnetnz.org/topics/acne-affecting-th...
1,Acne affecting the face images,https://dermnetnz.org/topics/acne-face-images/...
2,Acne and other follicular disorder images,https://dermnetnz.org/image-catalogue/acne-and...
3,Acquired dermal macular hyperpigmentation images,https://dermnetnz.org/topics/acquired-dermal-m...
4,Acral lentiginous melanoma images,https://dermnetnz.org/topics/acral-lentiginous...


In [7]:
# initialize the webdriver
# Note ‚è∞ This cell takes about 1/2 an hour to complete running.

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# create an empty list to store the dataframes for each link/disease pair
dfs = []
# loop through each link/disease pair
for link, disease_name in zip(link_list, disorder_names):
    # create an empty list to store the image URLs
    image_disease = []

    try:
        # navigate to the link
        driver.get(link)

        # maximize the window to ensure all elements are visible
        driver.maximize_window()

        # find all the elements on the page with the class "imageLinkBlock__item__image"
        skin_image_disorder = driver.find_elements("xpath", '//div[@class="imageLinkBlock__item__image"]')

        # loop through each element and find all the images within it
        for image in skin_image_disorder:
            list_ = image.find_elements("tag name", "img")
            for lists in list_:
                # add each image URL to the image_disease list
                image_disease.append(lists.get_attribute("src"))

        # create a list of dictionaries, where each dictionary represents a row in the DataFrame
        # each dictionary contains an image URL and the disease name
        rows = [{'skin_disorder_name': disease_name, 'images': url} for url in image_disease]

        # create the DataFrame using the list of dictionaries
        df = pd.DataFrame(rows, columns=['skin_disorder_name', 'images'])

        # add the dataframe to the list of dataframes
        dfs.append(df)

    except Exception as e:
        # if an error occurs, print the error message and move to the next link/disease pair
        print(f"Error occurred for {disease_name}: {str(e)}")
        continue

# concatenate all the dataframes into a single dataframe
result_df = pd.concat(dfs)

# Turning all the skin_disorder_names to lower case:
result_df['skin_disorder_name'] = result_df['skin_disorder_name'].map(lambda x: x.lower())

# save the result dataframe to a CSV file named "data.csv"
result_df.to_csv('Data/data1-294.csv', index=False)

In [11]:
# DataFrame with all the 294 images:
image_df = pd.read_csv('Data/data1-294.csv')

# Previewing the first 3 rows of the dataframe
image_df.head(3)

Unnamed: 0,skin_disorder_name,images
0,acne affecting the back images,https://dermnetnz.org/assets/Uploads/acne/acne...
1,acne affecting the back images,https://dermnetnz.org/assets/Uploads/acne/acne...
2,acne affecting the back images,https://dermnetnz.org/assets/Uploads/acne/acne...


In [13]:
# Function takes in the image url and returns an html <img> tag that displays the image
def to_img_tag(path):
    return '<img src="'+ path + '" width="50" >'

In [14]:
# Save the HTML table to a file
with open('Data/image_table.html', 'w') as f:
    f.write(image_df.to_html(escape=False,formatters=dict(images=to_img_tag)))

In [17]:
# Open the HTML file in the default web browser
webbrowser.open('Data/image_table.html')

True

In [9]:
# Downloading and Saving the images into a folder:
# Note ‚è∞ This cell takes about 1 and 1/2 hours to complete running.

import requests as rq
import os
import pathlib
import pandas as pd


def save_image(folder: str, name: str, url: str, index:int):
    # Get the data from the url
    image_source = rq.get(url)

    # If there's a suffix, we will grab that
    suffix = pathlib.Path(url).suffix

    # Check if the suffix is one of the following
    if suffix not in ['.jpg', '.jpeg', '.png', '.gif']:
        # Default to .png
        output = name + str(index) + '.png'

    else:
        output = name + str(index) + suffix

    # Check first if folder exists, else create a new one
    if not os.path.exists(folder):
        os.makedirs(folder)

    # Create our output in the specified folder (wb = write bytes)
    with open(f'{folder}{output}', 'wb') as file:
        file.write(image_source.content)
        print(f'Successfully downloaded: {output}')


if __name__ == '__main__':
    # Load the dataframe with image urls and disease names
    df = pd.read_csv('Data/data1-294.csv')

    # Loop through the dataframe
    for index, row in df.iterrows():
        # Get the image url and disease name
        image_url = row['images']
        disease_name = row['skin_disorder_name']

        # Save the image
        save_image('Images/', disease_name, image_url, index)

Successfully downloaded: acne affecting the back images0.jpg
Successfully downloaded: acne affecting the back images1.jpg
Successfully downloaded: acne affecting the back images2.jpg
Successfully downloaded: acne affecting the back images3.jpg
Successfully downloaded: acne affecting the back images4.jpg
Successfully downloaded: acne affecting the back images5.jpg
Successfully downloaded: acne affecting the back images6.jpg
Successfully downloaded: acne affecting the back images7.jpg
Successfully downloaded: acne affecting the back images8.jpg
Successfully downloaded: acne affecting the back images9.jpg
Successfully downloaded: acne affecting the back images10.jpg
Successfully downloaded: acne affecting the back images11.jpg
Successfully downloaded: acne affecting the back images12.jpg
Successfully downloaded: acne affecting the back images13.jpg
Successfully downloaded: acne affecting the back images14.jpg
Successfully downloaded: acne affecting the back images15.jpg
Successfully downl

**Scrapping Mind the Gap: Supporting Black and Brown Skin Health website**