In [None]:
import pandas as pd
import numpy as np
import json

from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

In [75]:
CHARACTERS_GRID = '//div[@class="grid grid-cols-4 sm:grid-cols-5 md:grid-cols-6 lg:grid-cols-7 xl:grid-cols-8 h-100 p-3 text-slate-950"]'
TRACES_DIV = '//div[@id="traces"]'
PATH_DIV = '//div[@class="grid grid-cols-2 bg-hakushin-table-2 text-center justify-center items-center"][4]'
TRACE_EFFECT = '//div[@class="text-sm 3xl:text-lg font-normal pb-4"]'
REQUIREMENT_DIV = '//div[@class="text-xs 3xl:text-base font-normal py-2 px-3 text-center rounded-full mx-auto bg-red-950 text-red-300"]'
CHAR_NAME_SPAN = '//span[@class="char-name"]'

class CharMinorTraceScraper:
    def __init__(self, url):
        self.driver = None
        self.wait = None
        self.url = url
    
    def create_driver(self):
        self.driver = webdriver.Chrome()
        self.driver.get(self.url)
        self.driver.maximize_window()
        self.wait = WebDriverWait(self.driver, 8)

    def mine_traces(self):
        char_grid = self.wait.until(EC.presence_of_element_located((By.XPATH, CHARACTERS_GRID)))
        char_links = char_grid.find_elements(By.TAG_NAME, 'a')
        for link in char_links:
            link_href = link.get_attribute('href')
            self.driver.get(link_href)

            # get character path
            path = self.wait.until(EC.presence_of_element_located((By.XPATH, PATH_DIV))).find_elements(By.TAG_NAME, 'div')[1].get_attribute('innerText')
            
            # get character name
            name = self.wait.until(EC.presence_of_element_located((By.XPATH, CHAR_NAME_SPAN))).get_attribute('innerText').strip('"')

            # now with path can mine minor traces
            traces_div = self.driver.find_element(By.XPATH, TRACES_DIV)
            traces = traces_div.find_elements(By.CLASS_NAME, 'trace')
            
            # all character traces
            char_traces_object = {}

            # mine traces loop
            for i in range(len(traces)):
                traces_div = self.wait.until(EC.presence_of_element_located((By.XPATH, TRACES_DIV)))
                traces = traces_div.find_elements(By.CLASS_NAME, 'trace')
                trace_obj = self.mine_trace(traces, i, path, name)
                print(trace_obj)

            self.driver.back()
    
    def mine_trace(self, traces, i, path, name):
        trace = traces[i]
        trace_type = trace.get_attribute('class').split(' ')[1]
        id = trace.get_attribute('id')

        # click on the traces in order to find out their prereqs
        if trace_type != 'trace-2':
            trace_object = {}

            # click the trace - sometimes when you click nothing shows up, if that happens try again
            try:
                trace.click()
                WebDriverWait(self.driver, 1).until(EC.presence_of_element_located((By.XPATH, TRACE_EFFECT)))
            except:
                trace.click()
            
            # wait for trace info div to appear
            self.wait.until(EC.presence_of_element_located((By.XPATH, TRACE_EFFECT)))

            # check if it's a major or minor trace
            major_trace = True
            try:
                self.driver.find_element(By.ID, 'trace-type')
            except:
                major_trace = False
            
            # add to the trace object
            if major_trace:
                trace_effect = self.wait.until(EC.presence_of_element_located((By.XPATH, TRACE_EFFECT))).get_attribute('innerText')
                unlock_requirement = self.driver.find_element(By.XPATH, REQUIREMENT_DIV).get_attribute('innerText').split(' ', maxsplit=2)[2]
                trace_object['traceLevel'] = 'major'
                trace_object['effect'] = trace_effect
                trace_object['unlockRequirement'] = unlock_requirement
            else:
                trace_effect = self.wait.until(EC.presence_of_element_located((By.XPATH, TRACE_EFFECT))).get_attribute('innerText')
                unlock_requirement = self.driver.find_element(By.XPATH, REQUIREMENT_DIV).get_attribute('innerText').split(' ', maxsplit=2)[2]
                words = trace_effect.split(' ')
                increases_index = words.index('increases')
                scaling_stat = ' '.join(words[:increases_index]).strip()
                scaling_amount = float(words[-1].strip('%'))
                trace_object['traceLevel'] = 'minor'
                trace_object['scalingStat'] = scaling_stat
                trace_object['scalingValue'] = scaling_amount
                trace_object['unlockRequirement'] = unlock_requirement
        return trace_object

In [76]:
CHARACTERS_URL = 'https://hsr.hakush.in/char'

scraper = CharMinorTraceScraper(CHARACTERS_URL)
scraper.create_driver()
scraper.mine_traces()

TypeError: CharMinorTraceScraper.mine_trace() missing 2 required positional arguments: 'path' and 'name'