In [2]:
from bs4 import BeautifulSoup
import requests
import json

url = 'https://www.aonprd.com/'

class ArchetypeScraper:
    def __init__(self, url, class_name):
        self.url = url 
        self.href = f'MagicWeapons.aspx?Category={class_name}'
        self.page = requests.get(url + self.href)
        self.soup = BeautifulSoup(self.page.content, 'html.parser')
        self.table = self.soup.find("table")

    def table_rows(self):
        for tr in self.soup.find_all('tr')[1:]:
            yield tr

    def archetype_name(self, href2_list):
        for url in href2_list:
            values = url.split('=')[1]
            #url has the class name + the archetype name, we need to remove the class name manually
            value = values.replace(f'{class_name} ', '')
            return value            

    def clean_text(self, text):
        # Replace Unicode right single quotation mark with an apostrophe
        cleaned_text = [line.replace('\u2019', "'").strip() for line in text if line.strip() and line.strip() != "."]
        return ' '.join(cleaned_text)

    def get_archetype_info(self, table_row):
        output = {}
        tag = table_row.td
        href2 = tag.a.attrs['href']
        href2_list = []
        href2_list.append(href2)
        value = self.archetype_name(href2_list)

        detail_page = requests.get(self.url + href2)
        detail_soup = BeautifulSoup(detail_page.content, 'html.parser')

        # Find all <b> tags on the detail page
        b_tags = detail_soup.find_all('b')

        for i, b_tag in enumerate(b_tags):
            key = b_tag.text.lower()

            # Extract all navigable strings until the next <b> tag
            value_tags = []
            next_sibling = b_tag.next_sibling


            while next_sibling and next_sibling.name != 'b':
                if hasattr(next_sibling, 'strings'):
                    # Join all strings to handle multiple lines
                    value_tags.append(' '.join(next_sibling.strings).strip())

                next_sibling = next_sibling.next_sibling

            cleaned_value = self.clean_text(value_tags)
            output[key] = cleaned_value

        return value, output

# List of class names
class_names = ["MeleeWeaponQuality", "RangedWeaponQuality"]

# Create a dictionary to store the JSON output for each class
json_output = {}

# Iterate through class names and scrape archetype information
for class_name in class_names:
    arch_scraper = ArchetypeScraper(url, class_name)

    # Create a list to store information for all archetypes of the current class
    archetypes_info = {}

    # Iterate through all table rows and collect information for each archetype
    for table_row in arch_scraper.table_rows():
        archetype_name, archetype_info = arch_scraper.get_archetype_info(table_row)
        archetypes_info[archetype_name] = archetype_info

    # Add the dictionary of archetypes' information to the json_output dictionary
    json_output[class_name] = archetypes_info

# Convert the dictionary to a JSON-formatted string
json_string = json.dumps(json_output, indent=2)

with open('weapon_and_armor_qualities.json', 'w', encoding='utf-8') as json_file:
    json.dump(json_output, json_file, indent=2, ensure_ascii=False)

# Print the JSON string
print(json_string)

{
  "MeleeWeaponQuality": {
    "Shrinking": {
      "source": "Melee Tactics Toolbox pg. 27",
      "aura": "faint transmutation",
      "cl": "5th",
      "slot": "none;",
      "price": "+500 gp",
      "weight": "\u2014 Description This special ability can be placed only on melee weapons. When the wielder issues the command word, a shrinking weapon shrinks to the size of a standard dagger. While in this state, the weapon deals 1d4 points of damage but is more easily concealable. When the command word is spoken again, the weapon returns to its original state. Construction",
      "requirements": "Craft Magic Arms and Armor, shrink item ;"
    },
    "Agile": {
      "source": "Pathfinder #100: A Song of Silver pg. 21 , Pathfinder Society Field Guide pg. 50",
      "aura": "moderate transmutation",
      "cl": "7th",
      "slot": "none;",
      "price": "\u2014",
      "weight": "\u2014 Description A character with Weapon Finesse can apply her Dexterity modifier to damage rolls with

In [1]:
from bs4 import BeautifulSoup
import requests
import json

url = 'https://www.aonprd.com/'

class ArchetypeScraper:
    def __init__(self, url, class_name):
        self.url = url 
        self.href = f'MagicArmor.aspx?Category={class_name}'
        self.page = requests.get(url + self.href)
        self.soup = BeautifulSoup(self.page.content, 'html.parser')
        self.table = self.soup.find("table")

    def table_rows(self):
        for tr in self.soup.find_all('tr')[1:]:
            yield tr

    def archetype_name(self, href2_list):
        for url in href2_list:
            values = url.split('=')[1]
            #url has the class name + the archetype name, we need to remove the class name manually
            value = values.replace(f'{class_name} ', '')
            return value            

    def clean_text(self, text):
        # Replace Unicode right single quotation mark with an apostrophe
        cleaned_text = [line.replace('\u2019', "'").strip() for line in text if line.strip() and line.strip() != "."]
        return ' '.join(cleaned_text)

    def get_archetype_info(self, table_row):
        output = {}
        tag = table_row.td
        href2 = tag.a.attrs['href']
        href2_list = []
        href2_list.append(href2)
        value = self.archetype_name(href2_list)

        detail_page = requests.get(self.url + href2)
        detail_soup = BeautifulSoup(detail_page.content, 'html.parser')

        # Find all <b> tags on the detail page
        b_tags = detail_soup.find_all('b')

        for i, b_tag in enumerate(b_tags):
            key = b_tag.text.lower()

            # Extract all navigable strings until the next <b> tag
            value_tags = []
            next_sibling = b_tag.next_sibling


            while next_sibling and next_sibling.name != 'b':
                if hasattr(next_sibling, 'strings'):
                    # Join all strings to handle multiple lines
                    value_tags.append(' '.join(next_sibling.strings).strip())

                next_sibling = next_sibling.next_sibling

            cleaned_value = self.clean_text(value_tags)
            output[key] = cleaned_value

        return value, output

# List of class names
class_names = ["ArmorQuality", "ShieldQuality"]

# Create a dictionary to store the JSON output for each class
json_output = {}

# Iterate through class names and scrape archetype information
for class_name in class_names:
    arch_scraper = ArchetypeScraper(url, class_name)

    # Create a list to store information for all archetypes of the current class
    archetypes_info = {}

    # Iterate through all table rows and collect information for each archetype
    for table_row in arch_scraper.table_rows():
        archetype_name, archetype_info = arch_scraper.get_archetype_info(table_row)
        archetypes_info[archetype_name] = archetype_info

    # Add the dictionary of archetypes' information to the json_output dictionary
    json_output[class_name] = archetypes_info

# Convert the dictionary to a JSON-formatted string
json_string = json.dumps(json_output, indent=2)

with open('armor_qualities.json', 'w', encoding='utf-8') as json_file:
    json.dump(json_output, json_file, indent=2, ensure_ascii=False)

# Print the JSON string
print(json_string)

{
  "ArmorQuality": {
    "Advancing": {
      "source": "Armor Master's Handbook pg. 28",
      "aura": "faint necromancy",
      "cl": "5th",
      "slot": "armor quality;",
      "price": "+1 bonus",
      "weight": "\u2014 Description A suit of armor with the advancing special ability allows its wearer to move through fallen enemies to the forefront of any conflict. Once per round when the wearer of a suit of advancing armor reduces an opponent to 0 hit points or fewer with a melee attack, she can immediately move up to 10 feet as a free action. This movement does not provoke attacks of opportunity. If this armor is created as barding, the effect is triggered if either the mount or its rider reduces an opponent to 0 hit points or fewer, and allows the mount to move up to 10 feet. The advancing special ability can be applied only to heavy armor. Construction",
      "requirements": "Craft Magic Arms and Armor, deadly juggernaut UC ;"
    },
    "Balanced": {
      "source": "Armor M