In [3]:
import pandas as pd
import numpy as np
import json
import requests

from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

In [34]:
RELIC_GRID = '//div[@class="grid grid-cols-1 md:grid-cols-2 h-100 p-4 text-slate-950"]'

class RelicBasicDataScraper:
    def __init__(self, url):
        self.url = url
        self.driver = None
    
    def create_driver(self):
        self.driver = webdriver.Chrome()
        self.driver.get(self.url)
        self.driver.maximize_window()
        self.wait = WebDriverWait(self.driver, 10)
    
    def mine_relics(self):
        relic_grid = self.wait.until(EC.presence_of_element_located((By.XPATH, RELIC_GRID)))
        relic_divs = relic_grid.find_elements(By.XPATH, './/div[@class="grid grid-cols-6 p-2 text-slate-100 py-4"]')
        return_dict = {}
        for i in range(len(relic_divs)):
            relic_dict = {}

            # refresh references
            relic_grid = self.wait.until(EC.presence_of_element_located((By.XPATH, RELIC_GRID)))
            relic_divs = relic_grid.find_elements(By.XPATH, './/div[@class="grid grid-cols-6 p-2 text-slate-100 py-4"]')

            relic_div = relic_divs[i]

            # get ID
            id = relic_div.find_element(By.TAG_NAME, 'a').get_attribute('href').split('/')[-1]

            # get name
            name = relic_div.find_element(By.TAG_NAME, 'div').find_element(By.TAG_NAME, 'a').get_attribute('innerText')
            
            # get image link
            image_link = relic_div.find_element(By.TAG_NAME, 'a').find_element(By.TAG_NAME, 'img').get_attribute('src')

            image_data = requests.get(image_link).content
            image_path = f"../assets/relic_images/{name.replace(' ', '').replace(':', '')}.webp"
            with open(image_path, 'wb') as f:
                f.write(image_data)

            # create object
            relic_dict = {
                'Name': name,
                'ImageLink': image_link,
                'ImagePath': image_path
            }

            print(relic_dict)

            return_dict[id] = relic_dict
        self.driver.close()
        return return_dict

In [35]:
HAKUSHIN_URL = 'https://hsr3.hakush.in/relicset'

scraper = RelicBasicDataScraper(HAKUSHIN_URL)
scraper.create_driver()
relic_dict = scraper.mine_relics()

{'Name': 'Passerby of Wandering Cloud', 'ImageLink': 'https://api.hakush.in/hsr/UI/itemfigures/71000.webp', 'ImagePath': '../assets/relic_images/PasserbyofWanderingCloud.webp'}
{'Name': 'Musketeer of Wild Wheat', 'ImageLink': 'https://api.hakush.in/hsr/UI/itemfigures/71001.webp', 'ImagePath': '../assets/relic_images/MusketeerofWildWheat.webp'}
{'Name': 'Knight of Purity Palace', 'ImageLink': 'https://api.hakush.in/hsr/UI/itemfigures/71002.webp', 'ImagePath': '../assets/relic_images/KnightofPurityPalace.webp'}
{'Name': 'Hunter of Glacial Forest', 'ImageLink': 'https://api.hakush.in/hsr/UI/itemfigures/71003.webp', 'ImagePath': '../assets/relic_images/HunterofGlacialForest.webp'}
{'Name': 'Champion of Streetwise Boxing', 'ImageLink': 'https://api.hakush.in/hsr/UI/itemfigures/71004.webp', 'ImagePath': '../assets/relic_images/ChampionofStreetwiseBoxing.webp'}
{'Name': 'Guard of Wuthering Snow', 'ImageLink': 'https://api.hakush.in/hsr/UI/itemfigures/71005.webp', 'ImagePath': '../assets/relic