In [1]:
# %load_ext autoreload
# %autoreload 2


In [None]:
import os
import time
from functools import partial

import polars as pl
from selenium.webdriver.common.by import By
from tqdm import tqdm

from prefect_perso.utils.selenium import (
    get_driver,
    get_element,
    get_elements,
    highlight_element,
)

repo_path = r"D:\projets_python_ssd\Sencrop\perso\MH_Wilds_tools"

In [3]:
driver = get_driver(headless=False)

  self._check_if_window_handle_is_current(windowHandle)


# Scrape all quests rewards

In [4]:
quest_page = "https://mhwilds.kiranico.com/fr/data/missions"

driver.get(quest_page)

In [5]:
quest_scroll_element = get_element(driver, (By.CLASS_NAME, "my-8"))
quests = get_elements(quest_scroll_element, (By.CLASS_NAME, "transition-colors"))

all_quests = []

for quest in quests:
    quest_name = quest.find_elements(By.TAG_NAME, "td")[0]
    quest_name = quest_name.find_element(By.TAG_NAME, "a")

    all_quests.append(
        {
            "name": quest_name.text,
            "href": quest_name.get_attribute("href"),
        }
    )
# all_quests

In [6]:
for quest in tqdm(all_quests):
    href = quest["href"]
    driver.get(href)
    quest["rewards"] = {}

    recompenses_element = driver.find_element(
        By.XPATH,
        "//h3[contains(text(), 'Récompenses')]",
    )
    parent_element = recompenses_element.find_element(By.XPATH, "..")

    # Get all rewards
    recompenses_list = []
    recompenses = parent_element.find_elements(By.TAG_NAME, "tr")
    for recompense in recompenses:
        value = recompense.find_elements(By.TAG_NAME, "td")[0]
        item = value.find_element(By.TAG_NAME, "a").text
        quantity = value.text.replace(item, "").strip().replace("x", "")
        match quantity:
            case "":
                quantity = 1
            case _:
                quantity = int(quantity)
        recompenses_list.append({"item": item, "quantity": quantity})
    quest["rewards"] = recompenses_list

# quest

100%|██████████| 81/81 [00:35<00:00,  2.29it/s]


In [None]:
(
    pl.DataFrame(all_quests)
    #
    .explode("rewards")
    .with_columns(
        pl.col("rewards").struct.field("item").alias("item"),
        pl.col("rewards").struct.field("quantity").alias("quantity"),
    )
    .drop("rewards")
    .filter(pl.col("item").is_not_null())
    .sort("item", "quantity", descending=[False, True])
    .to_pandas()
    .get(["name", "item", "quantity"])
    .to_parquet(os.path.join(repo_path, "data", "quests.parquet"))
)

In [8]:
print(
    pl.DataFrame(all_quests)
    #
    .explode("rewards")
    .with_columns(
        pl.col("rewards").struct.field("item").alias("item"),
        pl.col("rewards").struct.field("quantity").alias("quantity"),
    )
    .drop("rewards")
    .filter(pl.col("item").is_not_null())
    .sort("item", "quantity", descending=[False, True])
    .to_pandas()
    .get(["name", "item", "quantity"])
    .to_markdown(index=False)
)

| name                                     | item                         |   quantity |
|:-----------------------------------------|:-----------------------------|-----------:|
| Les fidèles Veilleurs                    | Ail de Sild                  |          3 |
| Lutte pour la survie                     | Ail de Sild spécial          |          3 |
| Capacité d'adaptation                    | Ail de Sild spécial          |          1 |
| Doshaguma sylvestre                      | Bombe tranquillisante        |         20 |
| Un visiteur venu de l'Ouest              | Bombe tranquillisante        |         10 |
| Nouveaux écosystèmes                     | Bombe tranquillisante        |         10 |
| Le village où chante le vent             | Bouillon d'amerinsecte       |          5 |
| Et la lumière fut                        | Capsule aveuglante           |         10 |
| Attention au Gypceros                    | Carbalite                    |          3 |
| Exploration sylvest

# Scrape armors

In [9]:
armor_href = "https://mhwilds.kiranico.com/fr/data/armor-series"
driver.get(armor_href)

In [10]:
sets_scroll = get_element(driver, (By.CLASS_NAME, "my-8"))
set_hrefs = [
    _.get_attribute("href") for _ in get_elements(sets_scroll, (By.TAG_NAME, "a"))
]

### Set talents

In [11]:
all_pieces = []
for set_href in tqdm(set_hrefs):
    driver.get(set_href)
    # Parse set data
    set_name = get_element(driver, (By.TAG_NAME, "h2")).text
    talent_table = (
        driver.find_element(
            By.XPATH,
            '//th[contains(text(), "Talents de l\'équipement")]',
        )
        .find_element(By.XPATH, "..")
        .find_element(By.XPATH, "..")
    )

    talent_table_rows = talent_table.find_elements(By.TAG_NAME, "tr")[1:]
    for talent_table_row in talent_table_rows:
        piece, name, jewels, talent = talent_table_row.find_elements(By.TAG_NAME, "td")
        piece = piece.text
        name = name.text
        jewels = jewels.text

        # Extract jewel levels
        all_jewels = {"0": 0, "1": 0, "2": 0, "3": 0, "4": 0}
        for char in jewels.replace("[", "").replace("]", ""):
            all_jewels[char] += 1

        # Extract talents
        all_talents = []
        try:
            talents = talent.find_elements(By.TAG_NAME, "a")
        except:
            continue
        for _talent in talents:
            talent_href = _talent.get_attribute("href")
            talent_name = _talent.text
            talent_level = int(talent_name.split("+")[-1])
            talent_name = talent_name.split("+")[:-1]
            talent_name = "+".join(talent_name).strip()
            all_talents.append(
                {
                    "talent_name": talent_name,
                    "talent_level": talent_level,
                }
            )
        piece_dict = {
            "piece": piece,
            "name": name,
            "jewels": all_jewels,
            "talents": all_talents,
        }
        all_pieces.append(piece_dict)

all_pieces

100%|██████████| 132/132 [01:13<00:00,  1.80it/s]


[{'piece': 'Tête',
  'name': "Masque d'espoir",
  'jewels': {'0': 3, '1': 0, '2': 0, '3': 0, '4': 0},
  'talents': [{'talent_name': "Crâne d'acier", 'talent_level': 1}]},
 {'piece': 'Torse',
  'name': "Cotte d'espoir",
  'jewels': {'0': 3, '1': 0, '2': 0, '3': 0, '4': 0},
  'talents': [{'talent_name': 'Bénédiction', 'talent_level': 1}]},
 {'piece': 'Bras',
  'name': "Avant-bras d'espoir",
  'jewels': {'0': 3, '1': 0, '2': 0, '3': 0, '4': 0},
  'talents': [{'talent_name': 'Bénédiction', 'talent_level': 1}]},
 {'piece': 'Taille',
  'name': "Tassette d'espoir",
  'jewels': {'0': 3, '1': 0, '2': 0, '3': 0, '4': 0},
  'talents': [{'talent_name': "Crâne d'acier", 'talent_level': 1}]},
 {'piece': 'Jambes',
  'name': "Grèves d'espoir",
  'jewels': {'0': 3, '1': 0, '2': 0, '3': 0, '4': 0},
  'talents': [{'talent_name': 'Bénédiction', 'talent_level': 1}]},
 {'piece': 'Tête',
  'name': 'Lunettes en cuir',
  'jewels': {'0': 3, '1': 0, '2': 0, '3': 0, '4': 0},
  'talents': [{'talent_name': 'Botanis

In [12]:
armor_pieces = (
    pl.DataFrame(all_pieces)
    #
    .explode("talents")
    .with_columns(
        pl.col("talents").struct.field("talent_name").alias("talent_name"),
        pl.col("talents").struct.field("talent_level").alias("talent_level"),
    )
    .drop("talents")
    .with_columns(
        *[
            pl.col("jewels").struct.field(jwl_lvl).alias(f"jewel_{jwl_lvl}")
            for jwl_lvl in ["0", "1", "2", "3", "4"]
        ]
    )
    .drop("jewels")
)
armor_pieces.write_parquet(os.path.join(repo_path, "data", "armor_pieces.parquet"))
armor_pieces

piece,name,talent_name,talent_level,jewel_0,jewel_1,jewel_2,jewel_3,jewel_4
str,str,str,i64,i64,i64,i64,i64,i64
"""Tête""","""Masque d'espoir""","""Crâne d'acier""",1,3,0,0,0,0
"""Torse""","""Cotte d'espoir""","""Bénédiction""",1,3,0,0,0,0
"""Bras""","""Avant-bras d'espoir""","""Bénédiction""",1,3,0,0,0,0
"""Taille""","""Tassette d'espoir""","""Crâne d'acier""",1,3,0,0,0,0
"""Jambes""","""Grèves d'espoir""","""Bénédiction""",1,3,0,0,0,0
…,…,…,…,…,…,…,…,…
"""Tête""","""Mimiflore α""","""Pelage de renforcement""",1,2,1,0,0,0
"""Tête""","""Mimiflore α""","""Entomologiste""",1,2,1,0,0,0
"""Tête""","""Mimiflore α""","""Embuscade""",1,2,1,0,0,0
"""Tête""","""Heaume d'expédition α""","""Totem élémentaire""",1,2,1,0,0,0


# Charms scraping

In [None]:
charms_url = "https://mhwilds.kiranico.com/fr/data/charms"
driver.get(charms_url)

In [None]:
charms_scroll = get_element(driver, (By.CLASS_NAME, "my-8"))
charms_elements = charms_scroll.find_elements(By.TAG_NAME, "tr")

all_charms = []
for charm_element in charms_elements:
    charm_element = charm_element.find_element(By.TAG_NAME, "a")
    charm_href = charm_element.get_attribute("href")
    charm_name = charm_element.text
    all_charms.append({"name": charm_name, "href": charm_href})
all_charms

In [None]:
for charm_element in tqdm(all_charms):
    href = charm_element["href"]
    driver.get(href)

    talent_table = get_element(driver, (By.TAG_NAME, "tbody"))
    talents = talent_table.find_elements(By.TAG_NAME, "tr")

    charm_talents = []
    for talent in talents:
        name, lvl, desc = talent.find_elements(By.TAG_NAME, "td")
        name = name.text
        lvl = int(lvl.text.replace("Lv", "").strip())
        charm_talents.append({"name": name, "lvl": lvl})
    charm_element["talents"] = charm_talents

100%|██████████| 159/159 [00:49<00:00,  3.18it/s]


In [None]:
charms_data = (
    pl.DataFrame(all_charms)
    #
    .explode("talents")
    .with_columns(
        pl.col("talents").struct.field("name").alias("talent_name"),
        pl.col("talents").struct.field("lvl").alias("talent_lvl"),
    )
    .drop("talents")
    .sort("talent_name", "talent_lvl", descending=[False, True])
)
charms_data.write_parquet(os.path.join(repo_path, "data", "charms.parquet"))
charms_data

name,href,talent_name,talent_lvl
str,str,str,i64
"""Talisman d'absorption""","""https://mhwilds.kiranico.com/f…","""Absorption élémentaire""",1
"""Talisman de survie III""","""https://mhwilds.kiranico.com/f…","""Ami de la nature""",3
"""Talisman de survie II""","""https://mhwilds.kiranico.com/f…","""Ami de la nature""",2
"""Talisman de survie""","""https://mhwilds.kiranico.com/f…","""Ami de la nature""",1
"""Talisman anti-immobilisation I…","""https://mhwilds.kiranico.com/f…","""Anti-immobilisation""",3
…,…,…,…
"""Talisman d'incision II""","""https://mhwilds.kiranico.com/f…","""Écorcheur""",2
"""Talisman d'incision""","""https://mhwilds.kiranico.com/f…","""Écorcheur""",1
"""Talisman d'eau III""","""https://mhwilds.kiranico.com/f…","""Étanchéité""",3
"""Talisman d'eau II""","""https://mhwilds.kiranico.com/f…","""Étanchéité""",2


# Scrap weapons

In [None]:
weapons_href = "https://mhwilds.kiranico.com/fr/data/weapons"
driver.get(weapons_href)

weapons_scroll = get_element(driver, (By.CLASS_NAME, "my-8"))

In [None]:
all_weapon_data = []

# Find the pagination bar for weapons
weapons_pagination_bar = weapons_scroll.find_element(By.XPATH, "//*[@dir='ltr']")
# Get all pagination buttons
weapons_pagination = weapons_pagination_bar.find_elements(By.TAG_NAME, "button")

# Determine the maximum number of pagination buttons
max_pagination = len(weapons_pagination)
# Iterate over pagination buttons in reverse order
for idx_pagination in tqdm(range(0, max_pagination)):
    pagination = weapons_pagination[list(range(0, max_pagination))[idx_pagination]]
    # Scroll to the top of the page
    driver.execute_script("window.scrollTo(0, 0);")
    # Click the pagination button
    pagination.click()
    time.sleep(2)

    # Get the weapon class from the pagination button
    weapon_class = pagination.get_attribute("aria-controls").split("-")[-1]
    # Find all weapon rows
    weapon_rows = weapons_scroll.find_elements(By.TAG_NAME, "tr")

    # Iterate over each weapon row
    for weapon_row in weapon_rows:
        img, name, jewels, raw, element, affinity, sharpness, extras, talents = (
            weapon_row.find_elements(By.TAG_NAME, "td")
        )

        # Extract image source
        img = img.find_element(By.TAG_NAME, "img").get_attribute("src")

        # Extract and clean weapon name
        name = name.text.strip()

        # Process jewels information
        jewels = (
            jewels.text.replace("③", "3")
            .replace("②", "2")
            .replace("①", "1")
            .replace("ー", "0")
            .strip()
        )
        jewels_list = {"0": 0, "1": 0, "2": 0, "3": 0}
        for jewel in jewels:
            jewels_list[jewel] += 1
        # Convert raw attack value to integer
        raw = int(raw.text.strip())

        try:
            # Attempt to extract element value and image source
            element_value = int(element.text.strip())
            element_img_href = element.find_element(By.TAG_NAME, "img").get_attribute(
                "src"
            )
        except:
            # Default values if extraction fails
            element_value = 0
            element_img_href = None

        try:
            # Attempt to extract affinity percentage
            affinity = int(affinity.text.replace("%", ""))
        except:
            # Default value if extraction fails
            affinity = 0

        try:
            # Attempt to extract sharpness SVGs
            sharpness_0, sharpness_1 = list(
                map(
                    lambda svg: driver.execute_script(
                        "return arguments[0].outerHTML;", svg
                    ),
                    sharpness.find_elements(By.TAG_NAME, "svg"),
                )
            )
            if not os.path.exists(os.path.join(repo_path, "data", "svg")):
                os.mkdir(os.path.join(repo_path, "data", "svg"))
            if not os.path.exists(os.path.join(repo_path, "data", "svg", weapon_class)):
                os.mkdir(os.path.join(repo_path, "data", "svg", weapon_class))

            with open(
                os.path.join(repo_path, "data", "svg", weapon_class, f"{name}_0.svg"),
                "w",
                encoding="utf-8",
            ) as f:
                f.write(sharpness_0)
            with open(
                os.path.join(repo_path, "data", "svg", weapon_class, f"{name}_1.svg"),
                "w",
                encoding="utf-8",
            ) as f:
                f.write(sharpness_1)
        except:
            pass

        # Find all talent elements
        talents_element_list = talents.find_elements(By.TAG_NAME, "a")

        talent_list = []
        # Iterate over each talent element
        for talent in talents_element_list:
            talent_href = talent.get_attribute("href")
            talent_text_raw = talent.text.strip()

            # Extract talent level and name
            talent_lvl = int(talent_text_raw.split("+")[-1])
            talent_name = "+".join(talent_text_raw.split("+")[:-1]).strip()
            talent_list.append(
                {
                    "name": talent_name,
                    "href": talent_href,
                    "lvl": talent_lvl,
                }
            )
        # Compile weapon data into a dictionary
        weapon_data = {
            "class": weapon_class,
            "img": img,
            "name": name,
            "jewels": jewels_list,
            "raw": raw,
            "element": {
                "value": element_value,
                "img_href": element_img_href,
            },
            "affinity": affinity,
            "talents": talent_list,
            "extra": extras.text.strip(),
        }
        # Append weapon data to the list
        all_weapon_data.append(weapon_data)

100%|██████████| 14/14 [01:25<00:00,  6.09s/it]


In [None]:
(
    pl.DataFrame(all_weapon_data)
    #
    .write_parquet(os.path.join(repo_path, "data", "weapons.parquet"))
)

# Scrape jewels

In [None]:
jewels_href = "https://mhwilds.kiranico.com/fr/data/decorations"
driver.get(jewels_href)

In [None]:
jewels_scroll = get_element(driver, (By.CLASS_NAME, "my-8"))
jewels_elements = jewels_scroll.find_elements(By.TAG_NAME, "tr")

all_jewels = []

for jewel_element in jewels_elements:
    name, description = jewel_element.find_elements(By.TAG_NAME, "td")
    href = name.find_element(By.TAG_NAME, "a").get_attribute("href")

    all_jewels.append(
        {
            "name": name.text.strip(),
            "href": href,
            "description": description.text.strip(),
        }
    )
all_jewels

In [None]:
all_jewel_data = []
for jewel in tqdm(all_jewels):
    jewel_lvl = int(jewel["name"].split("[")[-1][0])
    driver.get(jewel["href"])
    talent_table = get_element(driver, (By.TAG_NAME, "tbody"))

    talent_list = talent_table.find_elements(By.TAG_NAME, "tr")

    jewel_talent_list = []
    for talent_element in talent_list:
        name, lvl, description = talent_element.find_elements(By.TAG_NAME, "td")
        href = name.find_element(By.TAG_NAME, "a").get_attribute("href")
        name = name.text.strip()
        lvl = int(lvl.text.replace("Lv", "").strip())
        description = description.text.strip()
        jewel_talent_list.append(
            {
                "name": name,
                "lvl": lvl,
                "description": description,
                "href": href,
            }
        )

    all_jewel_data.append(
        {
            "name": jewel["name"],
            "jewel_lvl": jewel_lvl,
            "jewel_talent_list": jewel_talent_list,
        }
    )
all_jewel_data

100%|██████████| 361/361 [06:43<00:00,  1.12s/it]


[{'name': 'Joyau attaque [1]',
  'jewel_lvl': 1,
  'jewel_talent_list': [{'name': 'Machine de guerre',
    'lvl': 1,
    'description': "Augmente l'attaque.",
    'href': 'https://mhwilds.kiranico.com/fr/data/skills/machine-de-guerre'}]},
 {'name': 'Joyau attaque II [2]',
  'jewel_lvl': 2,
  'jewel_talent_list': [{'name': 'Machine de guerre',
    'lvl': 2,
    'description': "Augmente l'attaque.",
    'href': 'https://mhwilds.kiranico.com/fr/data/skills/machine-de-guerre'}]},
 {'name': 'Joyau attaque III [3]',
  'jewel_lvl': 3,
  'jewel_talent_list': [{'name': 'Machine de guerre',
    'lvl': 3,
    'description': "Augmente l'attaque.",
    'href': 'https://mhwilds.kiranico.com/fr/data/skills/machine-de-guerre'}]},
 {'name': 'Joyau vengeance [2]',
  'jewel_lvl': 2,
  'jewel_talent_list': [{'name': 'Vengeance',
    'lvl': 1,
    'description': "Augmente l'attaque lorsque vous avez subi des dégâts temporaires (zone rouge de la jauge de vie).",
    'href': 'https://mhwilds.kiranico.com/fr/

In [None]:
(
    pl.DataFrame(all_jewel_data)
    #
    .write_parquet(os.path.join(repo_path, "data", "jewels.parquet"))
)

# Scrape talents

In [None]:
talents_href = "https://mhwilds.kiranico.com/fr/data/skills"
driver.get(talents_href)

In [None]:
# Initialize an empty list to store all talents
all_talents = []

# Get all elements with the class name "my-8", which represent talent scroll groups
talents_scroll_groups = get_elements(driver, (By.CLASS_NAME, "my-8"))

# Iterate over each scroll group
for scroll_group in talents_scroll_groups:
    # Extract the group name from the <h3> tag within the scroll group
    group_name = scroll_group.find_element(By.TAG_NAME, "h3").text

    # Find all talent elements within the scroll group, represented by <tr> tags
    talents_elements = scroll_group.find_elements(By.TAG_NAME, "tr")

    # Iterate over each talent element
    for talent_element in talents_elements:
        # Extract the name and description from the <td> tags within the talent element
        name, description = talent_element.find_elements(By.TAG_NAME, "td")

        # Get the hyperlink reference (href) from the <a> tag within the name element
        href = name.find_element(By.TAG_NAME, "a").get_attribute("href")

        # Strip any leading or trailing whitespace from the name and description
        name = name.text.strip()
        description = description.text.strip()

        # Append a dictionary with the talent details to the all_talents list
        all_talents.append(
            {
                "group": group_name,
                "name": name,
                "description": description.replace("\n", " "),
                "href": href,
            }
        )

# Return the list of all talents
all_talents

In [None]:
for talent in reversed(tqdm(all_talents)):
    driver.get(talent["href"])
    talent_table = get_element(driver, (By.CSS_SELECTOR, ".my-8 tbody"))

    all_levels = []
    rows = talent_table.find_elements(By.TAG_NAME, "tr")

    for row in rows:
        lvl, _, description = row.find_elements(By.TAG_NAME, "td")
        lvl = int(lvl.text.replace("Lv", ""))
        description = description.text.strip()

        all_levels.append(
            {
                "lvl": lvl,
                "description": description,
            }
        )
    talent["levels"] = all_levels
all_talents

100%|██████████| 148/148 [02:34<00:00,  1.05s/it]


[{'group': 'Weapon',
  'name': 'Machine de guerre',
  'description': "Augmente l'attaque.",
  'href': 'https://mhwilds.kiranico.com/fr/data/skills/machine-de-guerre',
  'levels': [{'lvl': 1, 'description': 'Attaque +3'},
   {'lvl': 2, 'description': 'Attaque +5'},
   {'lvl': 3, 'description': 'Attaque +7'},
   {'lvl': 4, 'description': 'Attaque +2 % Attaque +8'},
   {'lvl': 5, 'description': 'Attaque +4 % Attaque +9'}]},
 {'group': 'Weapon',
  'name': 'Garde offensive',
  'description': "Accroît temporairement la puissance d'attaque après une garde parfaitement synchronisée.",
  'href': 'https://mhwilds.kiranico.com/fr/data/skills/garde-offensive',
  'levels': [{'lvl': 1, 'description': "Attaque +5 % lorsqu'il est actif."},
   {'lvl': 2, 'description': "Attaque +10 % lorsqu'il est actif."},
   {'lvl': 3, 'description': "Attaque +15 % lorsqu'il est actif."}]},
 {'group': 'Weapon',
  'name': "Maître d'armes",
  'description': "Augmente l'affinité.",
  'href': 'https://mhwilds.kiranico.co

In [None]:
(
    pl.DataFrame(all_talents)
    #
    .write_parquet(os.path.join(repo_path, "data", "talents.parquet"))
)

piece,name,talent_name,talent_level,jewel_0,jewel_1,jewel_2,jewel_3,jewel_4
str,str,str,i64,i64,i64,i64,i64,i64
"""Tête""","""Heaume Arkveld Gardien""","""Vitalité de l'Arkveld Gardien""",1,2,1,0,0,0
"""Torse""","""Cotte Arkveld Gardien""","""Vitalité de l'Arkveld Gardien""",1,3,0,0,0,0
"""Bras""","""Avant-bras Arkveld Gardien""","""Vitalité de l'Arkveld Gardien""",1,2,1,0,0,0
"""Taille""","""Tassette Arkveld Gardien""","""Vitalité de l'Arkveld Gardien""",1,1,2,0,0,0
"""Jambes""","""Grèves Arkveld Gardien""","""Vitalité de l'Arkveld Gardien""",1,2,1,0,0,0
…,…,…,…,…,…,…,…,…
"""Tête""","""Heaume Arkveld Gardien β""","""Vitalité de l'Arkveld Gardien""",1,1,1,0,1,0
"""Torse""","""Cotte Arkveld Gardien β""","""Vitalité de l'Arkveld Gardien""",1,2,0,0,1,0
"""Bras""","""Avant-bras Arkveld Gardien β""","""Vitalité de l'Arkveld Gardien""",1,0,3,0,0,0
"""Taille""","""Tassette Arkveld Gardien β""","""Vitalité de l'Arkveld Gardien""",1,1,1,1,0,0
