In [2]:
import pandas as pd
import numpy as np
import json

from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

In [16]:
CHARACTERS_GRID = '//div[@class="grid grid-cols-4 sm:grid-cols-5 md:grid-cols-6 lg:grid-cols-7 xl:grid-cols-8 h-100 p-3 text-slate-950"]'
TRACES_DIV = '//div[@id="traces"]'
PATH_DIV = '//div[@class="grid grid-cols-2 bg-hakushin-table-2 text-center justify-center items-center"][4]'
TRACE_EFFECT = '//div[@class="text-sm 3xl:text-lg font-normal pb-4"]'
REQUIREMENT_DIV = '//div[@class="text-xs 3xl:text-base font-normal py-2 px-3 text-center rounded-full mx-auto bg-red-950 text-red-300"]'
CHAR_NAME_SPAN = '//span[@class="char-name"]'
TRAILBLAZER_ORDER = [
    'Caelus - Destruction',
    'Stelle - Destruction',
    'Caelus - Preservation',
    'Stelle - Preservation'
]

class CharMinorTraceScraper:
    def __init__(self, url):
        self.driver = None
        self.wait = None
        self.url = url
    
    def create_driver(self):
        self.driver = webdriver.Chrome()
        self.driver.get(self.url)
        self.driver.maximize_window()
        self.wait = WebDriverWait(self.driver, 8)

    def mine_traces(self):
        trailblazer_index = 0
        char_grid = self.wait.until(EC.presence_of_element_located((By.XPATH, CHARACTERS_GRID)))
        char_links = char_grid.find_elements(By.TAG_NAME, 'a')

        all_char_traces = {}
        for i in range(len(char_links)):
            char_grid = self.wait.until(EC.presence_of_element_located((By.XPATH, CHARACTERS_GRID)))
            char_links = char_grid.find_elements(By.TAG_NAME, 'a')
            link = char_links[i]

            link_href = link.get_attribute('href')
            self.driver.get(link_href)

            # get character path
            path = self.wait.until(EC.presence_of_element_located((By.XPATH, PATH_DIV))).find_elements(By.TAG_NAME, 'div')[1].get_attribute('innerText')
            
            # get character name
            name = self.wait.until(EC.presence_of_element_located((By.XPATH, CHAR_NAME_SPAN))).get_attribute('innerText').strip('"').split('\n')[0]
            if name == 'Trailblazer':
                name = name + ' - ' + TRAILBLAZER_ORDER[trailblazer_index]
                trailblazer_index += 1

            # now with path can mine minor traces
            traces_div = self.driver.find_element(By.XPATH, TRACES_DIV)
            traces = traces_div.find_elements(By.CLASS_NAME, 'trace')
            
            # mine traces loop
            all_traces = []
            
            # scroll down to avoid ads
            self.driver.execute_script('window.scrollTo(0, 1080)')

            for i in range(len(traces)):
                traces_div = self.wait.until(EC.presence_of_element_located((By.XPATH, TRACES_DIV)))
                traces = traces_div.find_elements(By.CLASS_NAME, 'trace')
                trace_obj = self.mine_trace(traces, i)
                if trace_obj:
                    all_traces.append(trace_obj)

            # organize traces into structure based on path
            organized_traces = self.organize_traces(all_traces, path)

            # add the organized traces to the all character traces object
            all_char_traces[name] = organized_traces
            self.driver.back()
        return all_char_traces

    def organize_traces(self, all_traces, path):
        return_list = [trace for trace in all_traces if trace['id'].endswith('1')]

        # nihility
        if path == 'The Nihility':
            # start on top traces
            search_key = 'B1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if (trace['id'] == 'B2' or trace['id'] == 'B3')]
            
            # next go to right traces
            search_key = 'C1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'C2']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'C3']
                    obj['nextObjects'][0]['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'C4']

            # next go to left traces
            search_key = 'E1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'E2']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'E3']
                    obj['nextObjects'][0]['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'E4']
            
            search_key = 'D1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'E2']
        
        # preservation
        if path == 'The Preservation':
            # handling top path first
            search_key = 'B1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'B2']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if (trace['id'] == 'B3' or trace['id'] == 'B4')]
            
            # handle bottom
            search_key = 'D1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if (trace['id'] == 'D2' or trace['id'] == 'D5')]
                    next_search_key = 'D2'
                    for char_obj in obj['nextObjects']:
                        if char_obj['id'] == next_search_key:
                            char_obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D3']
                            char_obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D4']
                    
                    next_search_key = 'D5'
                    for char_obj in obj['nextObjects']:
                        if char_obj['id'] == next_search_key:
                            char_obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D6']
                            char_obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D7']
        
        # hunt
        if path == 'The Hunt':
            # handle top
            search_key = 'B1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'B2']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if (trace['id'] == 'B3' or trace['id'] == 'B4')]
            
            # add D2 and D5 to the return list as "origins"
            return_list.extend([trace for trace in all_traces if (trace['id'] == 'D2' or trace['id'] == 'D5')])

            # handle bottom-right
            search_key = 'D2'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D3']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D4']
            
            # handle bottom-left
            search_key = 'D5'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D6']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D7']
        
        # harmony
        if path == 'The Harmony':
            # handle top
            search_key = 'B1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'B2']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if (trace['id'] == 'B3' or trace['id'] == 'B4')]
            
            # right traces
            search_key = 'C1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'C2']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'C3']
                    obj['nextObjects'][0]['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'C4']

            # left traces
            search_key = 'E1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'E2']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'E3']
                    obj['nextObjects'][0]['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'E4']

            # bottom traces
            search_key = 'D1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if (trace['id'] == 'D2' or trace['id'] == 'D3')]
        
        # Erudition
        if path == 'The Erudition':
            # top traces
            search_key = 'B1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if (trace['id'] == 'B2' or trace['id'] == 'B3')]
            
            # right traces
            search_key = 'C1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'C2']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if (trace['id'] == 'C3' or trace['id'] == 'C4')]
            
            # left traces
            search_key = 'E1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'E2']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if (trace['id'] == 'E3' or trace['id'] == 'E4')]
            
            # bottom traces add to return_list
            return_list.append([trace for trace in all_traces if trace['id'] == 'D2'][0])

        if path == 'The Abundance':
            # top traces
            search_key = 'B1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if (trace['id'] == 'B2' or trace['id'] == 'B3')]

            # append needed values to return_list
            return_list.extend([trace for trace in all_traces if (trace['id'] == 'D2' or trace['id'] == 'D3' or trace['id'] == 'D7')])

            # right traces
            search_key = 'D3'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D4']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D5']
                    obj['nextObjects'][0]['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D6']

            # left traces
            search_key = 'D7'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D8']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D9']
                    obj['nextObjects'][0]['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D10']

        if path == 'The Destruction':
            # top traces
            search_key = 'B1'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'B2']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if (trace['id'] == 'B3' or trace['id'] == 'B4')]

            # append to return_list
            return_list.extend([trace for trace in all_traces if (trace['id'] == 'D2' or trace['id'] == 'D6')])

            # right traces
            search_key = 'D2'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D3']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D4']
                    obj['nextObjects'][0]['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D5']

            # left traces
            search_key = 'D6'
            for obj in return_list:
                if obj['id'] == search_key:
                    obj['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D7']
                    obj['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D8']
                    obj['nextObjects'][0]['nextObjects'][0]['nextObjects'] = [trace for trace in all_traces if trace['id'] == 'D9']
            
        return return_list

    def mine_trace(self, traces, i):
        trace = traces[i]
        trace_type = trace.get_attribute('class').split(' ')[1]
        id = trace.get_attribute('id')

        # click on the traces in order to find out their prereqs
        trace_object = {'id': id}
        if trace_type != 'trace-2':
            # click the trace - sometimes when you click nothing shows up, if that happens try again
            try:
                trace.click()
                WebDriverWait(self.driver, 1).until(EC.presence_of_element_located((By.XPATH, TRACE_EFFECT)))
            except:
                trace.click()
            
            # wait for trace info div to appear
            self.wait.until(EC.presence_of_element_located((By.XPATH, TRACE_EFFECT)))

            # check if it's a major or minor trace
            major_trace = True
            try:
                self.driver.find_element(By.ID, 'trace-type')
            except:
                major_trace = False
            
            # add to the trace object
            if major_trace:
                trace_effect = self.wait.until(EC.presence_of_element_located((By.XPATH, TRACE_EFFECT))).get_attribute('innerText')
                unlock_requirement = self.driver.find_element(By.XPATH, REQUIREMENT_DIV).get_attribute('innerText').split(' ', maxsplit=2)[2]
                trace_object['traceLevel'] = 'major'
                trace_object['effect'] = trace_effect
                trace_object['unlockRequirement'] = unlock_requirement
            else:
                trace_effect = self.wait.until(EC.presence_of_element_located((By.XPATH, TRACE_EFFECT))).get_attribute('innerText')
                unlock_requirement = self.driver.find_element(By.XPATH, REQUIREMENT_DIV).get_attribute('innerText').split(' ', maxsplit=2)[2]
                words = trace_effect.split(' ')
                increases_index = words.index('increases')
                scaling_stat = ' '.join(words[:increases_index]).strip()
                scaling_amount = float(words[-1].strip('%'))
                trace_object['traceLevel'] = 'minor'
                trace_object['scalingStat'] = scaling_stat
                trace_object['scalingValue'] = scaling_amount
                trace_object['unlockRequirement'] = unlock_requirement
            return trace_object
        return None

In [19]:
CHARACTERS_URL = 'https://hsr.hakush.in/char'

scraper = CharMinorTraceScraper(CHARACTERS_URL)
scraper.create_driver()
char_traces = scraper.mine_traces()

In [21]:
char_traces
with open('../general_logic/data/minor_traces.json', 'w') as f:
    json.dump(char_traces, f, indent=4)