In [3]:
import pandas as pd
import numpy as np
import json

from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

In [5]:
CHAR_GRID = '//div[@class="grid grid-cols-4 sm:grid-cols-5 md:grid-cols-6 lg:grid-cols-7 xl:grid-cols-8 h-100 p-3 text-slate-950"]'
TRAILBLAZER_ORDER = [
    'Caelus - Destruction',
    'Stelle - Destruction',
    'Caelus - Preservation',
    'Stelle - Preservation'
]

PATH_MAPPING = {
    'mage': 'Erudition',
    'rogue': 'The Hunt',
    'warrior': 'Destruction',
    'shaman': 'Harmony',
    'warlock': 'Nihility',
    'knight': 'Preservation',
    'priest': 'Abundance'
}

class CharMiner:
    def __init__(self, url):
        self.url = url
        self.driver = None
    
    def create_driver(self):
        self.driver = webdriver.Chrome()
        self.driver.get(self.url)
        self.driver.maximize_window()
        self.wait = WebDriverWait(self.driver, 8)

    def mine_char_data(self):
        char_grid = self.wait.until(EC.presence_of_element_located((By.XPATH, CHAR_GRID)))
        char_links = char_grid.find_elements(By.TAG_NAME, 'a')
        trailblazer_idx = 0
        return_dict = {}
        for i in range(len(char_links)):
            # char dict to store stuff
            char_dict = {}

            # refresh reference
            char_grid = self.wait.until(EC.presence_of_element_located((By.XPATH, CHAR_GRID)))
            char_links = char_grid.find_elements(By.TAG_NAME, 'a')
            link = char_links[i]

            # get the character name
            name = link.find_element(By.TAG_NAME, 'div').get_attribute('innerText')

            # handle trailblazer names
            if 'Trailblazer' in name:
                name = 'Trailblazer - ' + TRAILBLAZER_ORDER[trailblazer_idx]
                trailblazer_idx += 1
            
            char_dict['Name'] = name
            
            # get image link
            image_link = link.find_element(By.TAG_NAME, 'img').get_attribute('src')

            char_dict['ImageLink'] = image_link

            # get path
            path = PATH_MAPPING[link.find_elements(By.TAG_NAME, 'img')[2].get_attribute('src').split('/')[-1].split('.')[0]]
            char_dict['Path'] = path

            # get the id matching that name
            id = link.get_attribute('href').split('/')[-1]

            return_dict[id] = char_dict
        self.driver.close()
        return return_dict

In [6]:
HAKUSHIN_URL = 'https://hsr2.hakush.in/char'

scraper = CharMiner(HAKUSHIN_URL)
scraper.create_driver()
char_dictionary = scraper.mine_char_data()

In [7]:
with open('../datamine/AvatarPromotionConfig.json') as f:
    json_data = json.load(f)

def organize_char_data(data, id):
    ascensions = [
        data[id]['0'],
        data[id]['1'],
        data[id]['2'],
        data[id]['3'],
        data[id]['4'],
        data[id]['5'],
        data[id]['6']
    ]
    
    level_data = {}

    # handle this in sections
    for i in range(len(ascensions)):
        current_ascension = ascensions[i]
        min_level, max_level = 1, 0
        if i == 0:
            max_level = ascensions[i]['MaxLevel']
        else:
            min_level = ascensions[i-1]['MaxLevel']
            max_level = ascensions[i]['MaxLevel']

        base_atk = current_ascension['AttackBase']['Value']
        base_def = current_ascension['DefenceBase']['Value']
        base_hp = current_ascension['HPBase']['Value']

        atk_add = current_ascension['AttackAdd']['Value']
        def_add = current_ascension['DefenceAdd']['Value']
        hp_add = current_ascension['HPAdd']['Value']
        
        # now that we know min and max level, fill in the values
        for j in range(min_level, max_level+1):
            stat_object = {}
            current_atk = round((base_atk + atk_add*(j-1)), 3)
            current_def = round((base_def + def_add*(j-1)), 3)
            current_hp = round((base_hp + hp_add*(j-1)), 3)
            level_tag = str(j)
            if (j == min_level) & (j != 1):
                # we're doing the x+
                level_tag = str(min_level) + '+'
            stat_object['ATK'] = current_atk
            stat_object['DEF'] = current_def
            stat_object['HP'] = current_hp
            if j == 1:
                stat_object['BaseSPD'] = current_ascension['SpeedBase']['Value']
                stat_object['CritRate'] = current_ascension['CriticalChance']['Value']
                stat_object['CritDamage'] = current_ascension['CriticalDamage']['Value']
                stat_object['BaseAggro'] = current_ascension['BaseAggro']['Value']
            level_data[level_tag] = stat_object
    return level_data

organize_char_data(json_data, '1303')

for key in char_dictionary.keys():
    char_dictionary[key]['BaseStats'] = organize_char_data(json_data, key)

In [8]:
with open('../data/hsr_char_stats.json', 'w') as f:
    json.dump(char_dictionary, f, indent=4)