In [40]:
import pandas as pd
import numpy as np
import os

pd.set_option('display.max_columns', None) 

In [41]:
space_units_df = pd.read_csv('../data/scraped/space_units_df.csv')
ground_units_df= pd.read_csv('../data/scraped/ground_units_df.csv')

In [42]:
space_units_df['Is_Ship'] = True
space_units_df['Is_Ground_Force'] = False
ground_units_df['Is_Ship'] = False
ground_units_df['Is_Ground_Force'] = True

In [43]:
all_units_df = pd.concat([space_units_df, ground_units_df]).reset_index(drop=True)
all_units_df

Unnamed: 0,Unit_Name,Faction_Name,Unit_Abilities,Standard_Abilities,Has_Sustain_Damage,Cost,Combat,Combat_Value,Shots,Move,Capacity,Unit_Type,Is_Ship,Is_Ground_Force
0,Advanced Carrier,The Federation of Sol,,,False,3,9,9,1,1^,6^,Carrier,True,False
1,Advanced Carrier II,The Federation of Sol,,,True,3,9,9,1,2,8,Carrier,True,False
2,Airo Shir Rex,Radiant Aur,"At the end of the edict phase, if this unit is...",Anti-Fighter Barrage 5 (x3),True,8,7 (x2),7,2,1,6,Flagship,True,False
3,Arc Secundus,The Barony of Letnev,Other players' units in this system lose PLANE...,Bombardment 5 (x3),True,8,5 (x2),5,2,1,3,Flagship,True,False
4,Artemiris,The Council Keleres,Other players must spend 2 influence to activa...,,True,8,7 (x2),7,2,1,6,Flagship,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,Reanimator,The Vuil'Raith Cabal,When your infantry on this planet are destroye...,,True,2,6,6,1,,,Mechs,False,True
100,Reclaimer,The Winnu,After you resolve a tactical action where you ...,,True,2,6,6,1,,,Mechs,False,True
101,Indomitus,The Xxcha Kingdom,You may use this unit's SPACE CANNON against s...,Space Cannon 8,True,2,6,6,1,,,Mechs,False,True
102,Moyin's ashes,The Yin Brotherhood,DEPLOY: When you use your INDOCTRINATION facti...,,True,2,6,6,1,,,Mechs,False,True


In [44]:
# Account for Upgrade Symbols
all_units_df['Cost_Value'] = all_units_df['Cost'].str.extract(r'(\d)').fillna(0).astype(int)
all_units_df['Move_Value'] = all_units_df['Move'].str.extract(r'(\d)').fillna(0).astype(int)
all_units_df['Capacity_Value'] = all_units_df['Capacity'].str.extract(r'(\d)').fillna(0).astype(int)

In [45]:
# Split 'Standard Abilities'

# ANTI-FIGHTER
all_units_df['Has_Anti_Fighter'] = all_units_df['Standard_Abilities'].str.contains('Anti-Fighter Barrage', na=False)
all_units_df[['Anti_Fighter_Value', 'Anti_Fighter_Shots']] = all_units_df['Standard_Abilities'].str.extract(
    r'\bAnti-Fighter Barrage\s*(\d+)?\s*\(x(\d+)\)?')

# BOMBARDMENT
all_units_df['Has_Bombardment'] = all_units_df['Standard_Abilities'].str.contains('Bombardment', na=False)
all_units_df[['Bombardment_Value', 'Bombardment_Shots']] = all_units_df['Standard_Abilities'].str.extract(
    r'\bBombardment\s*(\d+)?\s*\(x(\d+)\)?')

# SPACE CANNON
all_units_df['Has_Space_Cannon'] = all_units_df['Standard_Abilities'].str.contains('Space Cannon', na=False)
all_units_df[['Space_Cannon_Value', 'Space_Cannon_Shots']] = all_units_df['Standard_Abilities'].str.extract(
    r'\bSpace Cannon\s*(\d+)?\s*\(x(\d+)\)?')

# Set shots to 1 if ability present
all_units_df.loc[all_units_df['Has_Anti_Fighter'] & all_units_df['Anti_Fighter_Shots'].isna(), 'Anti_Fighter_Shots'] = 1
all_units_df.loc[all_units_df['Has_Bombardment'] & all_units_df['Bombardment_Shots'].isna(), 'Bombardment_Shots'] = 1
all_units_df.loc[all_units_df['Has_Space_Cannon'] & all_units_df['Space_Cannon_Shots'].isna(), 'Space_Cannon_Shots'] = 1

# Fill missing combat values with 0
all_units_df['Anti_Fighter_Value'] = all_units_df['Anti_Fighter_Value'].fillna(0)
all_units_df['Bombardment_Value'] = all_units_df['Bombardment_Value'].fillna(0)
all_units_df['Space_Cannon_Value'] = all_units_df['Space_Cannon_Value'].fillna(0)

# Fill missing shot values with 0
all_units_df['Anti_Fighter_Shots'] = all_units_df['Anti_Fighter_Shots'].fillna(0)
all_units_df['Bombardment_Shots'] = all_units_df['Bombardment_Shots'].fillna(0)
all_units_df['Space_Cannon_Shots'] = all_units_df['Space_Cannon_Shots'].fillna(0)

all_units_df.head(30)

Unnamed: 0,Unit_Name,Faction_Name,Unit_Abilities,Standard_Abilities,Has_Sustain_Damage,Cost,Combat,Combat_Value,Shots,Move,Capacity,Unit_Type,Is_Ship,Is_Ground_Force,Cost_Value,Move_Value,Capacity_Value,Has_Anti_Fighter,Anti_Fighter_Value,Anti_Fighter_Shots,Has_Bombardment,Bombardment_Value,Bombardment_Shots,Has_Space_Cannon,Space_Cannon_Value,Space_Cannon_Shots
0,Advanced Carrier,The Federation of Sol,,,False,3,9,9,1,1^,6^,Carrier,True,False,3,1,6,False,0,0,False,0,0,False,0,0
1,Advanced Carrier II,The Federation of Sol,,,True,3,9,9,1,2,8,Carrier,True,False,3,2,8,False,0,0,False,0,0,False,0,0
2,Airo Shir Rex,Radiant Aur,"At the end of the edict phase, if this unit is...",Anti-Fighter Barrage 5 (x3),True,8,7 (x2),7,2,1,6,Flagship,True,False,8,1,6,True,5,3,False,0,0,False,0,0
3,Arc Secundus,The Barony of Letnev,Other players' units in this system lose PLANE...,Bombardment 5 (x3),True,8,5 (x2),5,2,1,3,Flagship,True,False,8,1,3,False,0,0,True,5,3,False,0,0
4,Artemiris,The Council Keleres,Other players must spend 2 influence to activa...,,True,8,7 (x2),7,2,1,6,Flagship,True,False,8,1,6,False,0,0,False,0,0,False,0,0
5,Arvicon Rex,The Mahact Gene Sorcerers,During combat against an opponent whose comman...,,True,8,5 (x2),5,2,1,3,Flagship,True,False,8,1,3,False,0,0,False,0,0,False,0,0
6,C'Morran N'orr,Sardakk N'orr,Apply +1 to the result of each of your other s...,,True,8,6 (x2),6,2,1,3,Flagship,True,False,8,1,3,False,0,0,False,0,0,False,0,0
7,Carrier,Common Unit,,,False,3,9,9,1,1^,4^,Carrier,True,False,3,1,4,False,0,0,False,0,0,False,0,0
8,Carrier II,Common Unit,,,False,3,9,9,1,2,6,Carrier,True,False,3,2,6,False,0,0,False,0,0,False,0,0
9,Cruiser,Common Unit,,,False,2,7^,7,1,2^,-^,Cruiser,True,False,2,2,0,False,0,0,False,0,0,False,0,0


In [46]:
if not os.path.exists('../data/clean'):
    os.makedirs('../data/clean')

all_units_df.to_csv('../data/clean/all_units_df.csv', index=False)