In [21]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

import time
import json
import requests

import pandas as pd

In [88]:
class StatScraper:
    def __init__(self, url):
        self.driver = None
        self.url = url
        self.trailblazer_order = [
            'Destruction - Caelus',
            'Destruction - Stelle',
            'Preservation - Caelus',
            'Preservation - Stelle'
        ]
    
    def create_driver(self):
        self.driver = webdriver.Chrome()
        self.driver.get(self.url)
        self.driver.maximize_window()
        self.wait = WebDriverWait(self.driver, 10)

    def enable_unreleased(self):
        # constants
        UNRELEASED_SELECT = '//select[@class="font-medium rounded-md text-blazer-950 text-right pb-0.5"]'
        CLOSE_SETTINGS_MENU_ICON = '//div[@class="absolute right-3 cursor-pointer p-4"]'

        # enable unreleased content
        unreleased_select = WebDriverWait(self.driver, 10).until(
            EC.presence_of_element_located((By.XPATH, UNRELEASED_SELECT))
        )
        unreleased_select.click()
        enable_option = unreleased_select.find_element(By.XPATH, './/option[@value="on"]')
        enable_option.click()

        # close the settings menu
        settings_title = self.driver.find_element(By.XPATH, '//div[@name="title"]')
        x_button = settings_title.find_element(By.XPATH, CLOSE_SETTINGS_MENU_ICON)
        x_button.click()

    def mine_stats(self):
        CHARACTER_GRID = '//div[@class="flex flex-row flex-wrap justify-center gap-6 mt-2"]'
        SLIDER_BUTTON = '//button[text()="Slider"]'
        STAT_TABLE = '//table[@class="table-outline text-sm font-normal text-white"]'
        CHAR_NAME = '//div[@class="text-2xl font-bold text-white xl:text-3xl"]'

        all_characters = self.wait.until(EC.presence_of_element_located((By.XPATH, CHARACTER_GRID))).find_elements(By.TAG_NAME, 'a')
        
        char_jsons = []
        tb_idx = 0
        # main scraping loop
        for i in range(len(all_characters)):
            # refresh reference
            char_grid = WebDriverWait(self.driver, 10).until(
                EC.presence_of_element_located((By.XPATH, CHARACTER_GRID))
            )
            all_characters = char_grid.find_elements(By.TAG_NAME, 'a')
            
            # click the character
            all_characters[i].click()

            # get the character name
            char_name_div = self.wait.until(EC.presence_of_element_located((By.XPATH, CHAR_NAME)))
            char_name = char_name_div.get_attribute('innerText')
            if(char_name == 'Trailblazer'):
                char_name = 'Trailblazer - ' + self.trailblazer_order[tb_idx]
                tb_idx += 1

            # find the slider button and turn to table
            slider_button = self.wait.until(EC.presence_of_element_located((By.XPATH, SLIDER_BUTTON)))
            slider_button.click()

            # get the table and convert it to pandas
            stat_table = self.wait.until(EC.presence_of_element_located((By.XPATH, STAT_TABLE)))
            stat_df = pd.read_html(stat_table.get_attribute('outerHTML'))
            stat_df = stat_df[0]
            stat_df = stat_df.ffill(axis=1).fillna('Aggro')[['LVL', '80']].T
            stat_df.columns = stat_df.iloc[0, :]
            stat_df = stat_df.iloc[1:, :]
            char_spd = stat_df['SPD']
            stat_json = {
                'Character Name': char_name,
                'Speed': int(char_spd[0])
            }
            char_jsons.append(stat_json)
            self.driver.back()
        self.driver.close()
        return char_jsons

In [89]:
YATTA_URL = 'https://hsr.yatta.top/en/archive/avatar'

scraper = StatScraper(YATTA_URL)
scraper.create_driver()
scraper.enable_unreleased()
scraper.mine_stats()

[{'Character Name': 'Xueyi', 'Speed': 103},
 {'Character Name': 'Ruan Mei', 'Speed': 104},
 {'Character Name': 'Dr. Ratio', 'Speed': 103},
 {'Character Name': 'Hanya', 'Speed': 110},
 {'Character Name': 'Argenti', 'Speed': 103},
 {'Character Name': 'Huohuo', 'Speed': 98},
 {'Character Name': 'Topaz & Numby', 'Speed': 110},
 {'Character Name': 'Guinaifen', 'Speed': 106},
 {'Character Name': 'Jingliu', 'Speed': 96},
 {'Character Name': 'Lynx', 'Speed': 100},
 {'Character Name': 'Fu Xuan', 'Speed': 100},
 {'Character Name': 'Dan Heng â€¢ Imbibitor Lunae', 'Speed': 102},
 {'Character Name': 'Kafka', 'Speed': 100},
 {'Character Name': 'Luka', 'Speed': 103},
 {'Character Name': 'Blade', 'Speed': 97},
 {'Character Name': 'Luocha', 'Speed': 101},
 {'Character Name': 'Yukong', 'Speed': 107},
 {'Character Name': 'Silver Wolf', 'Speed': 107},
 {'Character Name': 'Jing Yuan', 'Speed': 99},
 {'Character Name': 'March 7th', 'Speed': 101},
 {'Character Name': 'Dan Heng', 'Speed': 110},
 {'Character N