# Data Cleaning

## Imports

In [90]:
import json
from pprint import pp
from functools import reduce
import re

### Read-in

In [91]:
with open('../Data/character_table.json', errors='ignore') as f:
    data = json.load(f)
    
with open('../Data/skill_table.json', encoding="utf-8", errors='ignore') as f:
    skill_data = json.load(f)

## Transform

In [92]:
cleaned_data = {}
mapper = {"CASTER": "Caster",
          "MEDIC" : "Medic",
          "PIONEER" : "Vanguard",
          "SNIPER": "Sniper",
          "SPECIAL" : "Specialist",
          "SUPPORT" : "Supporter",
          "TANK" : "Defender",
          "WARRIOR" : "Guard",
          "TOKEN" : "Summoned Unit",
          "TRAP" : "Obstacle"}

replace_dict = {
    "atk" : "ATK",
    "def" : "DEF",
    "max hp" : "Max HP",
    "hp" : "HP",
    "block" : "Block",
    "range" : "Range"
}

pattern = re.compile(r'<[^>]*>')

In [93]:
def get_name(unit):
    return unit["name"]

In [94]:
def get_class(unit):
    return mapper[unit["profession"]]

In [95]:
def get_tags(unit):
    return unit["tagList"]

In [96]:
def get_trait(unit):
    if unit["description"]:
        return pattern.sub("",unit["description"])
    else:
        return None

In [97]:
def get_rarity(unit):
    return unit["rarity"] + 1

In [98]:
def get_talents(unit):
    talent_list = unit["talents"]
    try:
        return [{talent["name"] : talent["description"] for talent in talents["candidates"] if talent["requiredPotentialRank"] == 0} for talents in talent_list]
    except TypeError:
        return None

In [99]:
cleaned_data = {}
for key, unit in data.items():
    cleaned_data[get_name(unit)] = {
        "rarity" : get_rarity(unit),
        "class" : get_class(unit),
        "tags" : get_tags(unit),
        "trait" : get_trait(unit),
        "talents" : get_talents(unit),
        "internal_id" : key
    }

In [100]:
skills = [skill["skillId"] for skill in data[key]["skills"]]

In [113]:
for skill in skills:
    name = skill_data[skill]["levels"][0]["name"]
    bb = { row["key"] : row["value"] for row in skill_data[skill]["levels"][0]["blackboard"]}
    skill_text = skill_data[skill]["levels"][0]["description"]
    skill_text = reduce(lambda x,y: x.replace(y, replace_dict[y]), replace_dict, pattern.sub("",skill_text.lower().replace(":0%",":.0%").format(**bb)).capitalize())
    unit_name = get_name(unit)
    print({name : skill_text.replace(unit_name.lower(),unit_name)})

{'Swift Strike γ': 'Atk +20%; aspd +20.0'}
{'Wave Strike': 'Atk +80% for 15.0 seconds after Skadi is deployed'}
{'Tidal Elegy': 'Atk, DEF and Max HP +70%'}


In [102]:
skill_data["skchr_skadi_3"]["levels"][0]

{'name': 'Tidal Elegy',
 'rangeId': None,
 'description': 'ATK, DEF and Max HP <@ba.vup>+{atk:0%}</>',
 'skillType': 1,
 'spData': {'spType': 1,
  'levelUpCost': None,
  'maxChargeTime': 1,
  'spCost': 90,
  'initSp': 50,
  'increment': 1.0},
 'prefabId': 'skchr_skadi_3',
 'duration': 35.0,
 'blackboard': [{'key': 'atk', 'value': 0.7},
  {'key': 'def', 'value': 0.7},
  {'key': 'max_hp', 'value': 0.7}]}

In [103]:
skill_data["skchr_svrash_2"]["levels"][0]

{'name': 'Rules of Survival',
 'rangeId': '1-2',
 'description': '<@ba.rem>Can switch between the original state and the following state:</>\nRange <@ba.vdown>reduces</>; DEF <@ba.vup>+{def:0%}</>; Restores HP by <@ba.vup>{HP_RECOVERY_PER_SEC_BY_MAX_HP_RATIO:0.0%}</> of Max HP per second',
 'skillType': 1,
 'spData': {'spType': 1,
  'levelUpCost': None,
  'maxChargeTime': 1,
  'spCost': 5,
  'initSp': 0,
  'increment': 1.0},
 'prefabId': 'skchr_svrash_2',
 'duration': 0.0,
 'blackboard': [{'key': 'def', 'value': 0.35},
  {'key': 'hp_recovery_per_sec_by_max_hp_ratio', 'value': 0.03}]}

In [104]:
skill_text = skill_data["skchr_svrash_2"]["levels"][0]["description"]

In [105]:
skill_data["skchr_svrash_2"]["levels"][0]["blackboard"]

[{'key': 'def', 'value': 0.35},
 {'key': 'hp_recovery_per_sec_by_max_hp_ratio', 'value': 0.03}]

In [106]:
bb = { row["key"] : row["value"] for row in skill_data["skchr_svrash_2"]["levels"][0]["blackboard"]}

In [107]:
reduce(lambda x,y: x.replace(y, replace_dict[y]), replace_dict, pattern.sub("",skill_text.lower().replace(":0%",":.0%").format(**bb)).capitalize())

'Can switch between the original state and the following state:\nRange reduces; DEF +35%; restores HP by 3% of Max HP per second'

## Load

In [108]:
with open('../Data/cleaned_characters.json', 'w') as f:
    json.dump(cleaned_data, f)