# Parsing and processing IDEA files

In [16]:
import ClauseWizard
import yaml

In [17]:
TEST_PATH = '../data/raw/gypsy/common/ideas/00_basic_ideas.txt'
# TEST_PATH = '../data/raw/gypsy/common/ideas/00_country_ideas.txt'
# TEST_PATH = '../data/raw/gypsy/common/ideas/00_flogi_ideas.txt'
# TEST_PATH = '../data/raw/gypsy/common/ideas/zz_group_ideas.txt'

In [18]:
with open(TEST_PATH, 'r') as f:
    tokens = ClauseWizard.cwparse(f.read())
    obj = ClauseWizard.cwformat(tokens)

In [19]:
# Replace defaultdicts with dicts

def dictify(d):
    if isinstance(d, dict):
        return {k: dictify(v) for k, v in d.items()}
    elif isinstance(d, list):
        return [dictify(x) for x in d]
    elif isinstance(d, set):
        return {dictify(x) for x in d}
    else:
        return d

obj = dictify(obj)

In [20]:
# Remove ideas with Trigger-always = no

ideas_to_remove = []
for k, v in obj.items():
    if 'trigger' in v:
        if 'always' in v['trigger']:
            if v['trigger']['always'] == False:
                ideas_to_remove.append(k)
print('Ideas to remove:', ideas_to_remove)
for k in ideas_to_remove:
    del obj[k]

Ideas to remove: ['aristocracy_ideas', 'plutocracy_ideas', 'horde_gov_ideas', 'theocracy_gov_ideas', 'indigenous_ideas', 'naval_ideas', 'court_ideas', 'mercenary_ideas', 'infrastructure_ideas']


In [21]:
obj

{'innovativeness_ideas': {'category': 'ADM',
  'bonus': {'advisor_cost': -0.2, 'free_policy': 1},
  'patron_of_art': {'prestige_decay': -0.01},
  'pragmatism': {'manpower_recovery_speed': 0.25},
  'scientific_revolution': {'innovativeness_gain': 0.2,
   'technology_cost': -0.075},
  'dynamic_court': {'advisor_pool': 1},
  'resilient_state': {'reform_progress_growth': 0.33, 'idea_cost': -0.075},
  'optimism': {'war_exhaustion': -0.03},
  'formalized_officer_corps': {'free_leader_pool': 1},
  'ai_will_do': {'factor': 2,
   'modifier': [{'factor': 10, 'is_colonial_nation': True},
    {'factor': 5, 'has_reform': 'merchant_republic'},
    {'factor': 1.25, 'innovativeness': 40, 'NOT': {'is_year': 1650}},
    {'factor': 1.25,
     'OR': {'ruler_has_personality': ['scholar_personality',
       'free_thinker_personality'],
      'in_golden_age': True}},
    {'factor': 0, 'OR': {'is_year': 1700, 'innovativeness': 80}}]}},
 'religious_ideas': {'category': 'ADM',
  'bonus': {'culture_conversion_co

In [22]:
# Remove keys: ai_will_do, trigger, important, free

for k, v in obj.items():
    if 'ai_will_do' in v:
        del v['ai_will_do']
    if 'trigger' in v:
        del v['trigger']
    if 'important' in v:
        del v['important']
    if 'free' in v:
        del v['free']


In [23]:
# Quality check
# - every object has exactly 9 keys
# - every object has to have keys: category or start, bonus
# - category value is a str: ADM, DIP, MIL
# - eight key values are of type dict

IDEA_CATEGORY_VALUES = ['ADM', 'DIP', 'MIL']
for k, v in obj.items():
    if len(v) != 9:
        print(f'Object {k} has {len(v)} keys')
    if 'category' not in v:
        if 'start' not in v:
            print(f'Object {k} has no category or start key')
    else:
        v['category'] = v['category'].upper()
        if  v['category'] not in IDEA_CATEGORY_VALUES:
            print(f'Object {k} has category {v["category"]}')
    idea_keys = [k for k in v.keys() if k != 'category']
    for key in idea_keys:
        if not isinstance(v[key], dict):
            print(f'Object {k} has no dict value for key {key}')
            del v[key]
    
    

In [24]:
with open('../data/interim/dev_ideas_processing_parser.yaml', 'w') as f:
    yaml.dump(obj, f, default_flow_style=False)