# Data Cleaning

## Imports

In [1]:
import json
from pprint import pp
import re

### Read-in

In [2]:
with open('../Data/character_table.json', errors='ignore') as f:
    data = json.load(f)

## Transform

In [3]:
cleaned_data = {}
mapper = {"CASTER": "Caster",
          "MEDIC" : "Medic",
          "PIONEER" : "Vanguard",
          "SNIPER": "Sniper",
          "SPECIAL" : "Specialist",
          "SUPPORT" : "Supporter",
          "TANK" : "Defender",
          "WARRIOR" : "Guard",
          "TOKEN" : "Summoned Unit",
          "TRAP" : "Obstacle"}
pattern = re.compile(r'<[^>]*>')

In [4]:
def get_name(unit):
    return unit["name"]

In [5]:
def get_class(unit):
    return mapper[unit["profession"]]

In [6]:
def get_tags(unit):
    return unit["tagList"]

In [26]:
def get_trait(unit):
    if unit["description"]:
        return pattern.sub("",unit["description"])
    else:
        return None

In [13]:
def get_rarity(unit):
    return unit["rarity"] + 1

In [35]:
def get_talents(unit):
    talent_list = unit["talents"]
    try:
        return [{talent["name"] : talent["description"] for talent in talents["candidates"] if talent["requiredPotentialRank"] == 0} for talents in talent_list]
    except TypeError:
        return None

In [36]:
cleaned_data = {}
for unit in data.values():
    cleaned_data[get_name(unit)] = {
        "rarity" : get_rarity(unit),
        "class" : get_class(unit),
        "tags" : get_tags(unit),
        "trait" : get_trait(unit),
        "talents" : get_talents(unit)
    }

In [37]:
cleaned_data.keys()

dict_keys(['Lancet-2', 'Castle-3', 'Yato', 'Noir Corne', 'Rangers', 'Durin', '12F', 'Fang', 'Vanilla', 'Plume', 'Melantha', 'Popukar', 'Cardigan', 'Beagle', 'Spot', 'Kroos', 'Adnachiel', 'Lava', 'Hibiscus', 'Ansel', 'Steward', 'Orchid', 'Haze', 'Gitano', 'Greyy', 'Jessica', 'Meteor', 'Shirayuki', 'Courier', 'Scavenger', 'Vigna', 'Dobermann', 'Matoimaru', 'Frostleaf', 'Estelle', 'Mousse', 'Gravel', 'Rope', 'Myrrh', 'Gavial', 'Perfumer', 'Matterhorn', 'Cuora', 'Gummy', 'Deepcolor', 'Earthspirit', 'Shaw', 'Ptilopsis', 'Zima', 'Texas', 'Swire', 'Franka', 'Indra', 'Lappland', 'Specter', 'Blue Poison', 'Platinum', 'Meteorite', 'Amiya', 'Skyfire', 'Mayer', 'Silence', 'Warfarin', 'Nearl', 'Projekt Red', 'Liskarm', 'Croissant', 'Vulcan', 'Provence', 'Firewatch', 'Cliffheart', 'Pramanix', 'Istina', 'Sora', 'Manticore', 'FEater', 'Exusiai', 'Siege', 'Ifrit', 'Eyjafjalla', 'Angelina', 'Shining', 'Nightingale', 'Hoshiguma', 'Saria', 'SilverAsh', "Ch'en", 'Medic Drone', 'Tentacle', 'Mirage', 'Robott

## Load

In [38]:
with open('../Data/cleaned_characters.json', 'w') as f:
    json.dump(cleaned_data, f)