In [1]:
import re
from bs4 import BeautifulSoup
import pandas as pd

# Load the HTML content
with open('skill_list.html', 'r') as f:
    html_content = f.read()

# Parse the HTML
soup = BeautifulSoup(html_content, 'html.parser')

# Find all "tr" elements with class="app-smt-skill-list-row"
rows = soup.find_all('tr', class_='app-smt-skill-list-row')

# Prepare a list to hold the data
data = []

# Loop through each row
for row in rows:
    # Get the skill name
    skill = row.find_all('td')[1].text

    # Initialize an empty list for learners with levels
    learners_with_levels = []
    skip_row = False  # Flag to indicate whether to skip adding the current row to the dataset

    # Iterate through each 'li' element in the row
    for li in row.find_all('li'):
        # Extract the learner's name
        learner_name = li.find('a').text if li.find('a') else None
        
        # Extract the text after the learner's name to check for levels or 'Fs'
        text_after_a = li.get_text(strip=True)[len(learner_name):] if learner_name else ''
        level_match = re.search(r'\((\d+|Fs)\)', text_after_a)

        # Check if 'Fs' or any non-numeric value is found
        if level_match and level_match.group(1) == 'Fs':
            skip_row = True  # Set the flag to skip this row
            break  # No need to continue processing this row
        else:
            level = level_match.group(1) if level_match else None

        # Append the learner name with level if available
        learner_with_level = f"{learner_name} ({level})" if level else learner_name
        learners_with_levels.append(learner_with_level)

    # If the skip_row flag is set, continue to the next row without adding this one to the dataset
    if skip_row:
        continue

    # Add the data to the list
    data.append({'skill': skill, 'learners': ', '.join(learners_with_levels)})

# Create a pandas DataFrame
df = pd.DataFrame(data)

# Print the DataFrame
print(df)


             skill                                           learners
0           Cleave     Hermes, Nekomata, Chimera, Archangel, Valkyrie
1      Power Slash                      Valkyrie (15), Gurr, Rakshasa
2         Getsu-ei  Rakshasa (18), Ares (24), Take-Mikazuchi (27),...
3           Zan-ei  Oberon (18), Ares, Titan, Pale Rider, Power (2...
4       Gale Slash   Rakshasa (22), Ghoul (23), Shiisaa, Flauros, Ose
..             ...                                                ...
257  Full Analysis                                              Lucia
258   Healing Wave                                         Lucia (41)
259         Oracle                                         Lucia (50)
260   Support Scan                                         Lucia (23)
261      Third Eye                                         Lucia (32)

[262 rows x 2 columns]


In [3]:
# Find the skill "Panta Rhei"
panta_rhei = df.loc[df['skill'] == 'Power Slash']

# Print the result
print(panta_rhei)

         skill                       learners
1  Power Slash  Valkyrie (15), Gurr, Rakshasa


In [None]:
df.to_csv('skills_with_levels.csv', index=False)