# Parsing and processing IDEA files

In [2]:
import ClauseWizard
import yaml

In [3]:
# TEST_PATH = '../data/raw/gypsy/common/policies/00_adm.txt'
# TEST_PATH = '../data/raw/gypsy/common/policies/00_dip.txt'
# TEST_PATH = '../data/raw/gypsy/common/policies/00_mil.txt'
TEST_PATH = '../data/raw/gypsy/common/policies/Idea_Variation_policies.txt'

In [4]:
with open(TEST_PATH, 'r') as f:
    tokens = ClauseWizard.cwparse(f.read())
    obj = ClauseWizard.cwformat(tokens)

In [5]:
# Replace defaultdicts with dicts

def dictify(d):
    if isinstance(d, dict):
        return {k: dictify(v) for k, v in d.items()}
    elif isinstance(d, list):
        return [dictify(x) for x in d]
    elif isinstance(d, set):
        return {dictify(x) for x in d}
    else:
        return d

obj = dictify(obj)

In [6]:
# Remove keys: ai_will_do, potential
for k, v in obj.items():
    if 'ai_will_do' in v:
        del v['ai_will_do']
    if 'potential' in v:
        del v['potential']


In [7]:
# Quality check
# - every object has to have keys: monarch_power, allow
# - monarch_power value is a str: ADM, DIP, MIL
# - there are at least 3 keys in the object

POLICY_SLOT_VALUES = ['ADM', 'DIP', 'MIL']
for k, v in obj.items():
    if 'monarch_power' not in v:
        print(f'No monarch_power in {k}')
    else: 
        v['monarch_power'] = v['monarch_power'].upper()
        if v['monarch_power'] not in POLICY_SLOT_VALUES:
            print(f'Wrong monarch_power value in {k}: {v["monarch_power"]}')
    if 'allow' not in v:
        print(f'No allow in {k}')
    if len(v.keys()) < 3:
        print(f'Not enough keys in {k}')
    

In [8]:
from collections import Counter

allow_keys = Counter()
for k, v in obj.items():
    allow_keys.update(v['allow'].keys())

allow_keys.most_common(n=10)

[('full_idea_group', 543),
 ('OR', 116),
 ('has_completed_religious_ideas_group', 29),
 ('has_completed_government_ideas_group', 23)]

In [9]:
or_keys = Counter()
for k, v in obj.items():
    if 'OR' in v['allow']:
        if isinstance(v['allow']['OR'], list):
            for i in v['allow']['OR']:
                or_keys.update(i.keys())
        elif isinstance(v['allow']['OR'], dict):
            or_keys.update(v['allow']['OR'].keys())

or_keys.most_common(n=10)

[('full_idea_group', 123)]

In [1]:
rel_idea_names = [ 'religious_ideas', 'anglican0', 'animist0', 'buddhism0', 'cathar0', 'catholic0', 'confucian0', 'coptic0', 'dreamtime0', 'fetishist0', 'hellenic0', 'hindu0', 'hussite0', 'ibadi0', 'inti0', 'jewish0', 'manichean0', 'mesoamerican0', 'nahuatl0', 'norse0', 'orthodox0', 'protestant0', 'reformed0', 'romuva0', 'shia0', 'shinto0', 'slavic0', 'sunni0', 'suomi0', 'tengri0', 'totemist0', 'zoroastrian0' ]
gov_idea_names = ['dictatorship0', 'horde0', 'monarchy0', 'republic0', 'theocracy0']

for k, v in obj.items():
    allow_keys = v['allow'].keys()
    
    if len(allow_keys) == 1:
        if 'full_idea_group' in allow_keys:
            full_idea_group = v['allow']['full_idea_group']
            if isinstance(full_idea_group, list) and len(full_idea_group) == 2:
                v['req'] = ([full_idea_group[0]], [full_idea_group[1]])
            else:
                print(f'Unknown structure in {k}: {v["allow"]}')
        elif 'OR' in allow_keys:
            OR_group = v['allow']['OR']
            if isinstance(OR_group, list) and len(OR_group) == 2 and 'full_idea_group' in OR_group[0] and 'full_idea_group' in OR_group[1]:
                v['req'] = (OR_group[0]['full_idea_group'], OR_group[1]['full_idea_group'])
            else:
                print(f'Unknown structure in {k}: {v["allow"]}')
        else:
            print(f'Need parting for structure in {k}: {v["allow"]}')
    elif len(allow_keys) == 2:
        full_idea_group, government_ideas_group, religious_ideas_group, OR_group = None, None, None, None
        if 'full_idea_group' in allow_keys and isinstance(v['allow']['full_idea_group'], str):
            full_idea_group = v['allow']['full_idea_group']
        if 'has_completed_government_ideas_group' in allow_keys and isinstance(v['allow']['has_completed_government_ideas_group'], bool):
            government_ideas_group = v['allow']['has_completed_government_ideas_group']
        if 'has_completed_religious_ideas_group' in allow_keys and isinstance(v['allow']['has_completed_religious_ideas_group'], bool):
            religious_ideas_group = v['allow']['has_completed_religious_ideas_group']
        if 'OR' in allow_keys and isinstance(v['allow']['OR'], dict) and 'full_idea_group' in v['allow']['OR']:
            OR_group = v['allow']['OR']
       # Exactly 2 requirements has to be not None
        if sum([full_idea_group is not None, government_ideas_group is not None, religious_ideas_group is not None, OR_group is not None]) != 2:
           print(f'Unable to parse 2keys structure in {k}: {v["allow"]}')
        else:
            if full_idea_group is not None and government_ideas_group:
                v['req'] = [full_idea_group], gov_idea_names
            elif full_idea_group is not None and religious_ideas_group:
                v['req'] = [full_idea_group], rel_idea_names
            elif full_idea_group is not None and OR_group is not None:
                v['req'] = [full_idea_group], OR_group['full_idea_group']
            elif government_ideas_group and religious_ideas_group:
                v['req'] = gov_idea_names, rel_idea_names
            elif government_ideas_group and OR_group is not None:
                v['req'] = gov_idea_names, OR_group['full_idea_group']
            elif religious_ideas_group and OR_group is not None:
                v['req'] = rel_idea_names, OR_group['full_idea_group']
            else:
                print(f'Unable to parse 2keys structure in {k}: {v["allow"]}')
    else:
        print(f'Need parting for structure in {k}: {v["allow"]}')

NameError: name 'obj' is not defined

In [10]:
# Second quality check
# - remove allow key from every object
# - every object has to have keys req which is a tuple of 2 lists

for k, v in obj.items():
    if 'allow' in v:
        del v['allow']
    if 'req' not in v:
        print(f'No req in {k}')
    else:
        if not isinstance(v['req'], tuple):
            print(f'Wrong req type in {k}: {v["req"]}')
        else:
            if len(v['req']) != 2:
                print(f'Wrong req length in {k}: {v["req"]}')
            else:
                if not isinstance(v['req'][0], list):
                    print(f'Wrong req[0] type in {k}: {v["req"]}')
                if not isinstance(v['req'][1], list):
                    print(f'Wrong req[1] type in {k}: {v["req"]}')

In [11]:
with open('../data/interim/dev_policies_processing_parser.yaml', 'w') as f:
    yaml.dump(obj, f, default_flow_style=False)