In [102]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

import time
import json
import requests

In [104]:
UNRELEASED_SELECT = '//select[@class="font-medium rounded-md text-blazer-950 text-right pb-0.5"]'
CLOSE_SETTINGS_MENU_ICON = '//div[@class="absolute right-3 cursor-pointer p-4"]'
CHAR_SELECT_ICON = '//a[@href="/en/archive/avatar"]'
CHAR_GRID = '//*[@class="flex flex-row flex-wrap justify-center gap-6 mt-2"]'
CHAR_NAME_CLASSES = '.absolute.bottom-0.left-0.right-0'
CHAR_NAME = './/div[@class="text-2xl font-bold text-white xl:text-3xl"]'


class ImageScraper:
    def __init__(self):
        self.driver = None
        self.url = "https://hsr.yatta.top/en/archive/avatar"
        self.charlist = None
        self.trailblazer_order = [
            'Destruction - Caelus',
            'Destruction - Stelle',
            'Preservation - Caelus',
            'Preservation - Stelle'
        ]
    
    def create_driver(self):
        self.driver = webdriver.Chrome()
        self.wait = WebDriverWait(self.driver, 20)
        self.driver.get(self.url)
        self.driver.maximize_window()

    def enable_unreleased(self):
        # enable unreleased content
        unreleased_select = WebDriverWait(self.driver, 10).until(
            EC.presence_of_element_located((By.XPATH, UNRELEASED_SELECT))
        )
        unreleased_select.click()
        enable_option = unreleased_select.find_element(By.XPATH, './/option[@value="on"]')
        enable_option.click()

        # close the settings menu
        settings_title = self.driver.find_element(By.XPATH, '//div[@name="title"]')
        x_button = settings_title.find_element(By.XPATH, CLOSE_SETTINGS_MENU_ICON)
        x_button.click()

    def close_ad(self):
        self.driver.switch_to.frame(self.driver.find_element(By.XPATH, './/*[@aria-label="Advertisement"]'))
        close_ad = self.driver.find_element(By.ID, 'dismiss-button')
        close_ad.click()

    def mine_images(self):
        char_names = []
        char_img_links = []
        tb_idx = 0

        char_grid = self.wait.until(EC.presence_of_element_located((By.XPATH, CHAR_GRID)))
        all_characters = char_grid.find_elements(By.TAG_NAME, 'a')

        for character in all_characters:
            char_img_link = character.find_elements(By.TAG_NAME, 'img')[2].get_attribute('src')
            char_img_links.append(char_img_link)

        for i in range(len(all_characters)):
            # refresh reference
            char_grid = WebDriverWait(self.driver, 10).until(
                EC.presence_of_element_located((By.XPATH, CHAR_GRID))
            )
            all_characters = char_grid.find_elements(By.TAG_NAME, 'a')
            all_characters[i].click()

            # get char name
            char_name_div = WebDriverWait(self.driver, 10).until(
                EC.presence_of_element_located((By.NAME, 'Avatar Name'))
            )
            char_name = char_name_div.find_element(By.XPATH, CHAR_NAME).get_attribute('innerText')
            if(char_name == 'Trailblazer'):
                char_name = 'Trailblazer - ' + self.trailblazer_order[tb_idx]
                tb_idx += 1
            char_names.append(char_name)
            self.driver.back()
        self.charlist = dict(zip(char_names, char_img_links))
        self.driver.close()

    def get_image_links(self):
        return self.charlist
    
    def save_links_to_json(self):
        with open('character_images.json', 'w') as f:
            json.dump(self.charlist, f)

    def download_images(self, path='./'):
        for char in self.charlist:
            link = self.charlist[char]
            response = requests.get(link)
            filename = char + '.png'
            with open(path + filename, 'wb') as f:
                f.write(response.content)

In [107]:
scraper = ImageScraper()
scraper.create_driver()
scraper.enable_unreleased()
scraper.mine_images()
scraper.save_links_to_json()
scraper.download_images('../../public/char_icons/')