In [58]:
'''
Scrape Data from tables on many different pages from the Twilight Imperium Wiki.
Assemble into two data frames for Space and Ground forces. 
'''

'\nScrape Data from tables on many different pages from the Twilight Imperium Wiki.\nAssemble into two data frames for Space and Ground forces. \n'

In [59]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from io import StringIO
import os

In [60]:
# FLAGSHIPS

URL = 'https://twilight-imperium.fandom.com/wiki/Flagship'

# Fetch the webpage
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')

# Locate Each Faction Heading
headings = soup.find_all('h3')
clean_headings = []
for heading in headings:
    if heading.find('span', class_='mw-headline'):
        clean_headings.append(heading)
headings = clean_headings

# Locate Each Heading Span
spans = []
for heading in headings:
    span = heading.find('span', class_='mw-headline')
    if span:
        spans.append(span)

# Locate Faction Name from each Span
factions = []
for span in spans:
    name = span.get_text(strip=True).strip('[]')
    factions.append(name)

# Locate Each Faction Table from each Heading
tables = []
for heading in headings:
    table = heading.find_next_sibling()
    tables.append(table)
##################################################
#  Currently missing The Nomad's flagship v2
##################################################

# Scrape Data for Each Flagship
all_flagships = []
for table in tables:
    rows = table.find_all('tr')
    name = rows[0].get_text(strip=True)
    abilities = rows[1].get_text().lstrip('\n').rstrip('\n').split('\n')
    ability = abilities[0]

    bonus_abilities = abilities[1:]
    bonus_abilities = ' & '.join(bonus_abilities)
    has_sustain_damage = 'Sustain Damage' in bonus_abilities

    cells = rows[2].find_all('td')
    row_data = [cell.get_text(strip=True) for cell in cells]
    cost = row_data[0]
    combat = row_data[1]
    combat_value = combat.strip('^')
    move = row_data[2]
    capacity = row_data[3]

    flagship_stats = {'Name': name, 'Ability': ability, 'Bonus_Abilities': bonus_abilities, 'Has_Sustain_Damage': has_sustain_damage, 'Cost': cost, 
                      'Combat': combat, 'Combat_Value': combat_value, 'Move': move, 'Capacity': capacity, 'Unit Type': 'Flagship'}
    all_flagships.append(flagship_stats)

flagships_df = pd.DataFrame(all_flagships, index=factions)
flagships_df['Faction'] = flagships_df.index
flagships_df = flagships_df.reset_index(drop=True)
# flagships_df

In [61]:
# OTHER SHIPS

ships_list = ['Carrier', 'Cruiser', 'Destroyer', 'Dreadnought', 'Fighter', 'War_Sun']
ships = []

for ship in ships_list:
    URL = f'https://twilight-imperium.fandom.com/wiki/{ship}'

    # Fetch the webpage
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, 'html.parser')

    tables = soup.find_all('table', class_="article-table")
##################################################
# Jumping to tables means currently missing faction
# name for faction specific units.
##################################################

    for table in tables:
        name = table.find('th').get_text(strip=True)
        table = pd.read_html(StringIO(str(table)))
        table = table[0]

        ability = table.iloc[0, 1]
        try:
            has_sustain_damage = 'Sustain Damage' in ability
        except TypeError:
            has_sustain_damage = False
        row_data = table.iloc[-2].dropna()
        cost = row_data.iloc[0]
        combat = row_data.iloc[1]
        combat_value = combat.strip('^')
        move = row_data.iloc[2]
        try:
            capacity = row_data.iloc[3]
        except IndexError:
            capacity = 0

        ship_stats = {'Name': name, 'Cost': cost, 'Combat': combat, 'Combat_Value': combat_value, 'Move': move, 
                      'Capacity': capacity, 'Ability': ability, 'Has_Sustain_Damage': has_sustain_damage, 'Unit Type': ship}
        ships.append(ship_stats)

ships_df = pd.DataFrame(ships)
ships_df = ships_df.drop(index=22)
# ships_df

In [62]:
ships_df

Unnamed: 0,Name,Cost,Combat,Combat_Value,Move,Capacity,Ability,Has_Sustain_Damage,Unit Type
0,Carrier,3,9,9,1^,4^,,False,Carrier
1,Carrier II,3,9,9,2,6,,False,Carrier
2,Advanced Carrier,3,9,9,1^,6^,,False,Carrier
3,Advanced Carrier II,3,9,9,2,8,Sustain Damage,True,Carrier
4,Cruiser,2,7^,7,2^,-^,,False,Cruiser
5,Cruiser II,2,6,6,3,1,,False,Cruiser
6,Saturn Engine I,2,7^,7,2^,1^,,False,Cruiser
7,Saturn Engine II,2,6,6,3,2,Sustain Damage,True,Cruiser
8,Destroyer,1,9^,9,2,0,Anti-Fighter Barrage 9 (x2),False,Destroyer
9,Destroyer II,1,8,8,2,0,Anti-Fighter Barrage 6 (x3),False,Destroyer


In [63]:
# GROUND FORCES

ground_forces_list = ['Infantry', 'Mechs']
ground_forces = []

for ground_force in ground_forces_list:
    URL = f'https://twilight-imperium.fandom.com/wiki/{ground_force}'

    # Fetch the webpage
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, 'html.parser')

    tables = soup.find_all('table', class_="article-table")
##################################################
# Jumping to tables means currently missing faction
# name for faction specific units.
##################################################

    for table in tables:
        name = table.find('th').get_text(strip=True)
        table = pd.read_html(StringIO(str(table)))
        table = table[0]

        ability = table.iloc[0, 1]
        try:
            has_sustain_damage = 'Sustain Damage' in ability
        except TypeError:
            has_sustain_damage = False
        row_data = table.iloc[-2].dropna()
        cost = row_data.iloc[0]
        combat = row_data.iloc[1]
        combat_value = combat.strip('^')

        ground_force_stats = {'Name': name, 'Cost': cost, 'Combat': combat, 'Combat_Value': combat_value, 
                              'Ability': ability, 'Has_Sustain_Damage': has_sustain_damage, 'Unit Type': ground_force}
        ground_forces.append(ground_force_stats)

ground_forces_df = pd.DataFrame(ground_forces)
ground_forces_df = ground_forces_df
ground_forces_df

Unnamed: 0,Name,Cost,Combat,Combat_Value,Ability,Has_Sustain_Damage,Unit Type
0,Infantry,1 (x2),8^,8,,False,Infantry
1,Infantry II,1 (x2),7,7,"After this unit is destroyed, roll 1 die. If t...",False,Infantry
2,Spec Ops I,1 (x2),7^,7,,False,Infantry
3,Spec Ops II,1 (x2),6,6,"After this unit is destroyed, roll 1 die. If t...",False,Infantry
4,Letani Warrior I,1 (x2),8^,8,Production 1,False,Infantry
5,Letani Warrior II,1 (x2),7,7,"After this unit is destroyed, roll 1 die. If t...",False,Infantry
6,Crimson Legionnaire I,1 (x2),8^,8,"After this unit is destroyed, gain 1 commodity...",False,Infantry
7,Crimson Legionnaire II,1 (x2),7,7,"After this unit is destroyed, gain 1 commodity...",False,Infantry
8,Letani Behemoth,2,6,6,DEPLOY: When you use MITOSIS faction ability y...,True,Mechs
9,Aerie Sentinel,2,6,6,This unit does not count against capacity if i...,True,Mechs


In [64]:
space_units_df = flagships_df.merge(ships_df, how='outer')
ground_units_df = ground_forces_df

In [65]:
if not os.path.exists('../Data'):
    os.makedirs('../Data')

space_units_df.to_csv('../Data/space_units_df.csv', index=False)
ground_units_df.to_csv('../Data/ground_units_df.csv', index=False)