In [1]:
import pandas as pd
import numpy as np
import json

from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

In [24]:
CHAR_GRID = '//div[@class="grid grid-cols-4 sm:grid-cols-5 md:grid-cols-6 lg:grid-cols-7 xl:grid-cols-8 h-100 p-3 text-slate-950"]'
TRAILBLAZER_ORDER = [
    'Caelus - Destruction',
    'Stelle - Destruction',
    'Caelus - Preservation',
    'Stelle - Preservation'
]

class CharMiner:
    def __init__(self, url):
        self.url = url
        self.driver = None
    
    def create_driver(self):
        self.driver = webdriver.Chrome()
        self.driver.get(self.url)
        self.driver.maximize_window()
        self.wait = WebDriverWait(self.driver, 8)

    def mine_char_data(self):
        char_grid = self.wait.until(EC.presence_of_element_located((By.XPATH, CHAR_GRID)))
        char_links = char_grid.find_elements(By.TAG_NAME, 'a')
        trailblazer_idx = 0
        return_dict = {}
        for i in range(len(char_links)):
            # char dict to store stuff
            char_dict = {}

            # refresh reference
            char_grid = self.wait.until(EC.presence_of_element_located((By.XPATH, CHAR_GRID)))
            char_links = char_grid.find_elements(By.TAG_NAME, 'a')
            link = char_links[i]

            # get the character name
            name = link.find_element(By.TAG_NAME, 'div').get_attribute('innerText')

            # handle trailblazer names
            if 'Trailblazer' in name:
                name = 'Trailblazer - ' + TRAILBLAZER_ORDER[trailblazer_idx]
                trailblazer_idx += 1
            
            char_dict['Name'] = name
            
            # get image link
            image_link = link.find_element(By.TAG_NAME, 'img').get_attribute('src')

            char_dict['ImageLink'] = image_link

            # get the id matching that name
            id = link.get_attribute('href').split('/')[-1]

            return_dict[id] = char_dict
        self.driver.close()
        return return_dict

In [25]:
HAKUSHIN_URL = 'https://hsr2.hakush.in/char'

scraper = CharMiner(HAKUSHIN_URL)
scraper.create_driver()
char_dictionary = scraper.mine_char_data()

In [26]:
char_dictionary

{'1308': {'Name': 'Acheron',
  'ImageLink': 'https://api.hakush.in/hsr/UI/avatarshopicon/1308.webp'},
 '1307': {'Name': 'Black Swan',
  'ImageLink': 'https://api.hakush.in/hsr/UI/avatarshopicon/1307.webp'},
 '1306': {'Name': 'Sparkle',
  'ImageLink': 'https://api.hakush.in/hsr/UI/avatarshopicon/1306.webp'},
 '1305': {'Name': 'Dr. Ratio',
  'ImageLink': 'https://api.hakush.in/hsr/UI/avatarshopicon/1305.webp'},
 '1304': {'Name': 'Aventurine',
  'ImageLink': 'https://api.hakush.in/hsr/UI/avatarshopicon/1304.webp'},
 '1303': {'Name': 'Ruan Mei',
  'ImageLink': 'https://api.hakush.in/hsr/UI/avatarshopicon/1303.webp'},
 '1302': {'Name': 'Argenti',
  'ImageLink': 'https://api.hakush.in/hsr/UI/avatarshopicon/1302.webp'},
 '1217': {'Name': 'Huohuo',
  'ImageLink': 'https://api.hakush.in/hsr/UI/avatarshopicon/1217.webp'},
 '1213': {'Name': 'Dan Heng • Imbibitor Lunae',
  'ImageLink': 'https://api.hakush.in/hsr/UI/avatarshopicon/1213.webp'},
 '1212': {'Name': 'Jingliu',
  'ImageLink': 'https://ap

In [36]:
with open('../datamine/AvatarPromotionConfig.json') as f:
    json_data = json.load(f)

def organize_char_data(data, id):
    ascensions = [
        data[id]['0'],
        data[id]['1'],
        data[id]['2'],
        data[id]['3'],
        data[id]['4'],
        data[id]['5'],
        data[id]['6']
    ]
    
    level_data = {}

    # handle this in sections
    for i in range(len(ascensions)):
        current_ascension = ascensions[i]
        min_level, max_level = 0, 0
        if i == 0:
            max_level = ascensions[i]['MaxLevel']
        else:
            min_level = ascensions[i-1]['MaxLevel']
            max_level = ascensions[i]['MaxLevel']
        
        # now that we know min and max level, fill in the values
        for j in range(min_level, max_level+1):
            if j == min_level:
                # we're doing the x+
                level_tag = str(min_level) + '+'
                
            
    
    return ascensions[0]

organize_char_data(json_data, '1308')

{'AvatarID': 1308,
 'Promotion': 0,
 'PromotionCostList': [{'ItemID': 2, 'ItemNum': 4000},
  {'ItemID': 114001, 'ItemNum': 5}],
 'MaxLevel': 20,
 'PlayerLevelRequire': 15,
 'AttackBase': {'Value': 95.04},
 'AttackAdd': {'Value': 4.752},
 'DefenceBase': {'Value': 59.4},
 'DefenceAdd': {'Value': 2.97},
 'HPBase': {'Value': 153.12},
 'HPAdd': {'Value': 7.656},
 'SpeedBase': {'Value': 101},
 'CriticalChance': {'Value': 0.05},
 'CriticalDamage': {'Value': 0.5},
 'BaseAggro': {'Value': 100}}

In [37]:
for j in range(10, 20):
    print(j)

10
11
12
13
14
15
16
17
18
19
