In [1]:
import numpy as np
import pandas as pd

In [2]:
armor_df = pd.read_html('https://finalfantasy.fandom.com/wiki/Final_Fantasy_armor')

In [3]:
shields = armor_df[1]
helmet = armor_df[2]
body_armor = armor_df[3]
gloves = armor_df[4]

In [4]:
all_armor_df = pd.DataFrame()

armor_dfs = [
    shields, helmet, body_armor, gloves
]
armor_types = [
    'Sheilds', 'Helmet', 'Body Armor', 'Gloves'
]

for armor_df, armor_type in zip(armor_dfs, armor_types):
    armor_df['Armor Type'] = armor_type
    all_armor_df = pd.concat([all_armor_df, armor_df], ignore_index=True)

In [5]:
reshaped = all_armor_df.copy()

reshaped['Count'] = reshaped.groupby('Name').cumcount()

reshaped = reshaped.pivot(index='Name', columns='Count', values=['Jobs'])

PIVOT_RENAME = {
    '0': 'Jobs',
    '1': 'Effect',
    '2': 'Description',
}

reshaped.columns = [f'{i}' for col, i in reshaped.columns]
reshaped.reset_index(inplace=True)
reshaped = reshaped.rename(columns = PIVOT_RENAME)

reshaped['Effect'], reshaped['Description'] = np.where(reshaped['Description'].isna(), (reshaped['Description'], reshaped['Effect']), (reshaped['Effect'], reshaped['Description']))

reshaped['Effect'] = np.where(reshaped['Effect'].isna(), ('No Effect'), (reshaped['Effect']))

In [6]:
grouped = all_armor_df.groupby('Name').first()

grouped.reset_index(inplace=True)

all_armor_df = grouped.merge(reshaped)

In [7]:
all_armor_df.columns.values.tolist()

['Name',
 'PS/GBA Image',
 'PSP Image',
 'Jobs',
 'Defense',
 'Evasion',
 'Weight',
 'Acquirement',
 'Cost',
 'Unnamed: 9',
 'Armor Type',
 'Acquisition',
 'Effect',
 'Description']

In [8]:
all_armor_df['Cost'] = all_armor_df['Cost'].str.replace('(^[^0-9]+?(.*))', 'N/A', regex=True)
all_armor_df['Cost'] = all_armor_df['Cost'].str.replace('Sells(.*)', '', regex=True)
all_armor_df['Jobs'] = all_armor_df['Jobs'].str.replace('Black Wizard', 'BW')
all_armor_df['Jobs'] = all_armor_df['Jobs'].str.replace('White Wizard', 'WW')
all_armor_df['Jobs'] = all_armor_df['Jobs'].str.replace('Red Wizard', 'RW')
all_armor_df['Jobs'] = all_armor_df['Jobs'].str.replace('Knight', 'Kn')
all_armor_df['Jobs'] = all_armor_df['Jobs'].str.replace('Ninja', 'Ni')
all_armor_df['Jobs'] = all_armor_df['Jobs'].str.replace('All jobs but Thief', 'Wa, Kn, Ni, RM, RW, BM, BW, Mo, Ma')
all_armor_df['Jobs'] = all_armor_df['Jobs'].str.replace('All jobs but Mo & Ma', 'Wa, Kn, Th, Ni, RM, RW, BM, BW')
all_armor_df['Jobs'] = all_armor_df['Jobs'].str.replace('All jobs', 'Wa, Kn, Th, Ni, RM, RW, BM, BW, Mo, Ma')

In [9]:
all_armor_df['Alternative Name'] = all_armor_df['Name'].str.extract(
    '(\\(.*)'
)

all_armor_df['Alternative Name'] = all_armor_df['Alternative Name'].str.replace(') (', ', ')
all_armor_df['Alternative Name'] = all_armor_df['Alternative Name'].str.replace('(', '')
all_armor_df['Alternative Name'] = all_armor_df['Alternative Name'].str.replace(')', '')

all_armor_df['Name'] = all_armor_df['Name'].str.replace('(\\(.*)', '', regex=True)

In [10]:
all_armor_df['Acquirement'] = np.where(all_armor_df['Acquirement'].notna(), all_armor_df['Acquirement'], all_armor_df['Acquisition'])

b_df = all_armor_df['Acquirement'].str.extract('(?=Buy: (.*?) (?=[A-Z][a-z]+:))|(?=Buy: (.*))', expand=False)
all_armor_df['Buy'] = np.where(b_df[0].notna(), b_df[0], b_df[1])

f_df = all_armor_df['Acquirement'].str.extract('(?=Find: (.*?) (?=[A-Z][a-z]+:))|(?=Find: (.*))', expand=False)
all_armor_df['Find'] = np.where(f_df[0].notna(), f_df[0], f_df[1])

d_df = all_armor_df['Acquirement'].str.extract('(?=Drop: (.*?) (?=[A-Z][a-z]+:))|(?=Drop: (.*))', expand=False)
all_armor_df['Drop'] = np.where(d_df[0].notna(), d_df[0], d_df[1])

In [11]:
BONUS_AREAS = [
    'Earthgift Shrine', 
    'Hellfire Chasm', 
    'Lifespring Grotto', 
    'Whisperwind Cove',
    'Labyrinth of Time',
]

BONUS_ENCOUNTERS = [
    'Black Goblin',
    'Wild Nakk',
    'Gloom Widow',
    'Desertpede',
    'Catoblepas',
    'Abyss Warm',
    'Skudier',
    'Silver Dragon',
    'Dark Elemental',
    'Devil Hound',
    'Hundlegs'
    'Python',
    'Yellow Ogre',
    'Elm Gigas',
    'Death Elemental',
    'Blue Troll',
    'Poison Naga',
    'Sahagin Queen',
    'Revenant',
    'Black Dragon',
    'Blue Dragon',
    'Dragon Zombie',
    'Green Dragon',
    'Holy Dragon',
    'Red Dragon',
    'White Dragon',
    'Yellow Dragon',
    'Death Manticore',
    'Killer Shark',
    'Dark Eye',
    'Dark Wolf',
    'Knocker',
    'Red Flan',
    'Squidraken'
    'Earth Troll',
    'Sekhret',
    'Rock Gargoyle',
    'Poison Eagle',
    'Reaper',
    'Undergrounder',
    'Bonesnatch',
    'Flood Gigas',
    'Mad Ogre',
    'Devil Wizard',
    'Duel Knight',
    'Unicorn',
    'Prototype',
    'Earth Plant',
    'Bloody Eye',
    'Mage Chimera',
    'Mythril Golem',
    'Yamatano Orochi',
    'Blood Tiger',
    'Pharaoh',
    'Hundlegs',
    'Flare Gigas',
]

BONUS_BOSSES = [
    'Ahriman',
    'Cerberus',
    'Echidna',
    'Two-Headed Dragon',
    'Cognazzo',
    'Scarmiglione',
    'Barbariccia',
    'Rubicante',
    'Gilgamesh',
    'Atomos',
    'Omega',
    'Shinrya',
    'Astos',
    'Vampire',
    'Typhon',
    'Orthros',
    'Phatom Train',
    'Death Gaze',
    'Chronodia',
]

BONUS_CONTENT = BONUS_AREAS + BONUS_ENCOUNTERS + BONUS_BOSSES

all_armor_df['Bonus Content'] = all_armor_df['Acquirement'].apply(lambda x: True if any(bonus in x for bonus in BONUS_CONTENT) else False)

In [12]:
split_costs = all_armor_df.copy()

split_costs['Cost (Easy)'] = split_costs['Cost'].str.extract(
    '(.*) \\(Easy\\) ')
split_costs['Cost (Normal)'] = split_costs['Cost'].str.extract(
    '\\(Easy\\) (.*) \\(Normal\\)')

split_costs['Cost (Easy)'] = np.where(split_costs['Cost (Easy)'].isna(),
    split_costs['Cost'], split_costs['Cost (Easy)'])
split_costs['Cost (Normal)'] = np.where(split_costs['Cost (Normal)'].isna(),
    split_costs['Cost'], split_costs['Cost (Normal)'])

all_armor_df = split_costs

In [13]:
pre_drop = all_armor_df.copy()

drop_candidates = [
    'Acquirement', 'Acquisition', 'Count', 'NES Image', 'Unnamed: 9',
    'PS/GBA Image', 'PSP Image', 'PSP/Mobile Image',
]
drop_columns = [x for x in drop_candidates if x in all_armor_df.columns]

pre_drop = pre_drop.drop(columns=drop_columns)

pre_drop.reset_index()

all_armor_df = pre_drop

In [14]:
cols_order = [
    'Name',
    'Alternative Name',
    'Armor Type',
    'Jobs',
    'Defense',
    'Evasion',
    'Weight'
    'Description',
    'Effect',
    'Buy',
    'Find',
    'Drop',
    'Cost',
    'Cost (Easy)',
    'Cost (Normal)',
    'Bonus Content',
]

cols = all_armor_df.columns.values.tolist()

cols

['Name',
 'Jobs',
 'Defense',
 'Evasion',
 'Weight',
 'Cost',
 'Armor Type',
 'Acquisition',
 'Effect',
 'Description',
 'Alternative Name',
 'Buy',
 'Find',
 'Drop',
 'Bonus Content',
 'Cost (Easy)',
 'Cost (Normal)']

In [15]:
all_armor_df.to_csv('ff1_armor.csv', index=False)