In [1]:
import re
from bs4 import BeautifulSoup
import pandas as pd

# Load the HTML content
with open('skill_list.html', 'r') as f:
    html_content = f.read()

# Parse the HTML
soup = BeautifulSoup(html_content, 'html.parser')

# Find all "tr" elements with class="app-smt-skill-list-row"
rows = soup.find_all('tr', class_='app-smt-skill-list-row')

# Prepare a list to hold the data
data = []

# Loop through each row
for row in rows:
    # Get the skill name
    skill = row.find_all('td')[1].text

    # Get the skill type by finding the "element-icon" div and extracting its text
    type_div = row.find('div', class_='element-icon')
    skill_type = type_div.text if type_div else 'Unknown'

    # Extract the rank value from the appropriate <td> element
    rank_td = row.find_all('td')[5]  # Assuming the rank value is always in the 6th <td> element
    rank = rank_td.text.strip()

    # Initialize an empty list for learners
    learners = [a.text for a in row.find_all('a')]
    
    # Remove all "\n" and text between parentheses
    learners = [re.sub(r'\(.*?\)|\n', '', learner) for learner in learners]

    # Add the data to the list, including the skill type and rank
    data.append({'skill': skill, 'type': skill_type, 'rank': rank, 'learns': ', '.join(learners)})

# Create a pandas DataFrame
df = pd.DataFrame(data)

# Print the entire dataframe
pd.set_option('display.max_rows', None)
print(df)



                skill type rank  \
0              Cleave  sla  1.5   
1         Power Slash  sla    2   
2            Getsu-ei  sla    3   
3              Zan-ei  sla    3   
4          Gale Slash  sla    3   
5        Mighty Swing  sla    4   
6           Fatal End  sla  4.5   
7       Blade of Fury  sla  5.5   
8          Deathbound  sla  6.5   
9      Heaven's Blade  sla    7   
10      Tempest Slash  sla    7   
11        Brave Blade  sla    8   
12      Scarlet Havoc  sla   99   
13       Vorpal Blade  sla   99   
14               Bash  str  1.5   
15       Assault Dive  str  2.5   
16        Sonic Punch  str  2.5   
17       Swift Strike  str    3   
18          Kill Rush  str  3.5   
19   Herculean Strike  str    4   
20      Gigantic Fist  str  5.5   
21          Heat Wave  str  5.5   
22     Vicious Strike  str    6   
23        Akasha Arts  str    7   
24         God's Hand  str    8   
25       Shadow Hound  str   99   
26       Weary Thrust  str   99   
27        Single Sho

In [2]:
# Find the skill "Panta Rhei"
panta_rhei = df.loc[df['skill'] == 'Panta Rhei']

# Print the result
print(panta_rhei)

         skill type rank learns
70  Panta Rhei  win    9   Norn


In [3]:
conversion_of_types = {'sla': 'Slash',
 'str': 'Strike',
 'pie': 'Pierce',
 'fir': 'Fire',
 'ice': 'Ice',
 'ele': 'Elec',
 'win': 'Wind',
 'lig': 'Light',
 'dar': 'Dark',
 'rec': 'Recovery',
 'sup': 'Enhance',
 'ail': 'Bad Status',
 'spe': 'Special',
 'alm': 'Almighty',
 'pas': 'Passive'}

# update each row of the dataframe. the update is to change the value in the 'type' column to the value in the 'conversion_of_types' dictionary
for index, row in df.iterrows():
    df.loc[index, 'type'] = conversion_of_types[row['type']]

# Print the DataFrame
print(df)


                skill        type rank  \
0              Cleave       Slash  1.5   
1         Power Slash       Slash    2   
2            Getsu-ei       Slash    3   
3              Zan-ei       Slash    3   
4          Gale Slash       Slash    3   
5        Mighty Swing       Slash    4   
6           Fatal End       Slash  4.5   
7       Blade of Fury       Slash  5.5   
8          Deathbound       Slash  6.5   
9      Heaven's Blade       Slash    7   
10      Tempest Slash       Slash    7   
11        Brave Blade       Slash    8   
12      Scarlet Havoc       Slash   99   
13       Vorpal Blade       Slash   99   
14               Bash      Strike  1.5   
15       Assault Dive      Strike  2.5   
16        Sonic Punch      Strike  2.5   
17       Swift Strike      Strike    3   
18          Kill Rush      Strike  3.5   
19   Herculean Strike      Strike    4   
20      Gigantic Fist      Strike  5.5   
21          Heat Wave      Strike  5.5   
22     Vicious Strike      Strike 

In [4]:
df.to_csv('skills.csv', index=False)

In [17]:
import re
from bs4 import BeautifulSoup
import pandas as pd

# Load the HTML content
with open('skill_list.html', 'r') as f:
    html_content = f.read()

# Parse the HTML
soup = BeautifulSoup(html_content, 'html.parser')

# Find all "tr" elements with class="app-smt-skill-list-row"
rows = soup.find_all('tr', class_='app-smt-skill-list-row')

# Prepare a list to hold the data
data = []

# Loop through each row
for row in rows:
    # Get the skill name
    skill = row.find_all('td')[1].text

    # Initialize an empty list for learners with levels
    learners_with_levels = []
    skip_row = False  # Flag to indicate whether to skip adding the current row to the dataset

    # Iterate through each 'li' element in the row
    for li in row.find_all('li'):
        # Extract the learner's name
        learner_name = li.find('a').text if li.find('a') else None
        
        # Extract the text after the learner's name to check for levels or 'Fs'
        text_after_a = li.get_text(strip=True)[len(learner_name):] if learner_name else ''
        level_match = re.search(r'\((\d+|Fs)\)', text_after_a)

        # Check if 'Fs' or any non-numeric value is found
        if level_match and level_match.group(1) == 'Fs':
            skip_row = True  # Set the flag to skip this row
            break  # No need to continue processing this row
        else:
            level = level_match.group(1) if level_match else None

        # Append the learner name with level if available
        learner_with_level = f"{learner_name} ({level})" if level else learner_name
        learners_with_levels.append(learner_with_level)

    # If the skip_row flag is set, continue to the next row without adding this one to the dataset
    if skip_row:
        continue

    # Add the data to the list
    data.append({'skill': skill, 'learners': ', '.join(learners_with_levels)})

# Create a pandas DataFrame
df = pd.DataFrame(data)

# Print the DataFrame
print(df)


                skill  \
0              Cleave   
1         Power Slash   
2            Getsu-ei   
3              Zan-ei   
4          Gale Slash   
5        Mighty Swing   
6           Fatal End   
7       Blade of Fury   
8          Deathbound   
9      Heaven's Blade   
10      Tempest Slash   
11        Brave Blade   
12       Vorpal Blade   
13               Bash   
14       Assault Dive   
15        Sonic Punch   
16       Swift Strike   
17          Kill Rush   
18   Herculean Strike   
19      Gigantic Fist   
20          Heat Wave   
21     Vicious Strike   
22        Akasha Arts   
23         God's Hand   
24       Weary Thrust   
25        Single Shot   
26       Double Fangs   
27         Holy Arrow   
28          Twin Shot   
29       Cruel Attack   
30       Torrent Shot   
31       Vile Assault   
32       Poison Arrow   
33         Arrow Rain   
34      Myriad Arrows   
35            Pralaya   
36       Primal Force   
37                Agi   
38             Maragi   


In [18]:
# Find the skill "Panta Rhei"
panta_rhei = df.loc[df['skill'] == 'Panta Rhei']

# Print the result
print(panta_rhei)

         skill learners
65  Panta Rhei     Norn


In [19]:
df.to_csv('skills_with_levels.csv', index=False)