In [6]:
import re
from bs4 import BeautifulSoup
import pandas as pd

#https://aqiu384.github.io/megaten-fusion-tool/p3fes/skills

# Load the HTML content
with open('skill_list.html', 'r') as f:
    html_content = f.read()

# Parse the HTML
soup = BeautifulSoup(html_content, 'html.parser')

# Find all "tr" elements with class="app-smt-skill-list-row"
rows = soup.find_all('tr', class_='app-smt-skill-list-row')

# Prepare a list to hold the data
data = []

# Loop through each row
for row in rows:
    # Get the skill name
    skill = row.find_all('td')[1].text

    # Get the list of learners
    learners = [a.text for a in row.find_all('a')]
    
    # Remove all "\n" and text between parentheses
    learners = [re.sub(r'\(.*?\)|\n', '', learner) for learner in learners]

    # Add the data to the list
    data.append({'skill': skill, 'learns': ', '.join(learners)})

# Create a pandas DataFrame
df = pd.DataFrame(data)

# Print the DataFrame
print(df)

             skill                                             learns
0           Cleave     Hermes, Nekomata, Chimera, Archangel, Valkyrie
1      Power Slash                           Valkyrie, Gurr, Rakshasa
2         Getsu-ei  Rakshasa, Ares, Take-Mikazuchi, Orthrus, Hanum...
3           Zan-ei  Oberon, Ares, Titan, Pale Rider, Power, Jikoku...
4       Gale Slash             Rakshasa, Ghoul, Shiisaa, Flauros, Ose
..             ...                                                ...
275  Full Analysis                                              Lucia
276   Healing Wave                                              Lucia
277         Oracle                                              Lucia
278   Support Scan                                              Lucia
279      Third Eye                                              Lucia

[280 rows x 2 columns]


In [7]:
# Find the skill "Panta Rhei"
panta_rhei = df.loc[df['skill'] == 'Panta Rhei']

# Print the result
print(panta_rhei)

         skill learns
70  Panta Rhei   Norn


In [6]:
df.to_csv('skills.csv', index=False)

In [17]:
import re
from bs4 import BeautifulSoup
import pandas as pd

# Load the HTML content
with open('skill_list.html', 'r') as f:
    html_content = f.read()

# Parse the HTML
soup = BeautifulSoup(html_content, 'html.parser')

# Find all "tr" elements with class="app-smt-skill-list-row"
rows = soup.find_all('tr', class_='app-smt-skill-list-row')

# Prepare a list to hold the data
data = []

# Loop through each row
for row in rows:
    # Get the skill name
    skill = row.find_all('td')[1].text

    # Initialize an empty list for learners with levels
    learners_with_levels = []
    skip_row = False  # Flag to indicate whether to skip adding the current row to the dataset

    # Iterate through each 'li' element in the row
    for li in row.find_all('li'):
        # Extract the learner's name
        learner_name = li.find('a').text if li.find('a') else None
        
        # Extract the text after the learner's name to check for levels or 'Fs'
        text_after_a = li.get_text(strip=True)[len(learner_name):] if learner_name else ''
        level_match = re.search(r'\((\d+|Fs)\)', text_after_a)

        # Check if 'Fs' or any non-numeric value is found
        if level_match and level_match.group(1) == 'Fs':
            skip_row = True  # Set the flag to skip this row
            break  # No need to continue processing this row
        else:
            level = level_match.group(1) if level_match else None

        # Append the learner name with level if available
        learner_with_level = f"{learner_name} ({level})" if level else learner_name
        learners_with_levels.append(learner_with_level)

    # If the skip_row flag is set, continue to the next row without adding this one to the dataset
    if skip_row:
        continue

    # Add the data to the list
    data.append({'skill': skill, 'learners': ', '.join(learners_with_levels)})

# Create a pandas DataFrame
df = pd.DataFrame(data)

# Print the DataFrame
print(df)


                skill  \
0              Cleave   
1         Power Slash   
2            Getsu-ei   
3              Zan-ei   
4          Gale Slash   
5        Mighty Swing   
6           Fatal End   
7       Blade of Fury   
8          Deathbound   
9      Heaven's Blade   
10      Tempest Slash   
11        Brave Blade   
12       Vorpal Blade   
13               Bash   
14       Assault Dive   
15        Sonic Punch   
16       Swift Strike   
17          Kill Rush   
18   Herculean Strike   
19      Gigantic Fist   
20          Heat Wave   
21     Vicious Strike   
22        Akasha Arts   
23         God's Hand   
24       Weary Thrust   
25        Single Shot   
26       Double Fangs   
27         Holy Arrow   
28          Twin Shot   
29       Cruel Attack   
30       Torrent Shot   
31       Vile Assault   
32       Poison Arrow   
33         Arrow Rain   
34      Myriad Arrows   
35            Pralaya   
36       Primal Force   
37                Agi   
38             Maragi   


In [18]:
# Find the skill "Panta Rhei"
panta_rhei = df.loc[df['skill'] == 'Panta Rhei']

# Print the result
print(panta_rhei)

         skill learners
65  Panta Rhei     Norn


In [19]:
df.to_csv('skills_with_levels.csv', index=False)