In [6]:
import os 
import json 
import re 

In [8]:
ID_RE = re.compile(r'resin:((Events)|(Participants))\/(\d{5})\/')

In [2]:
# making quick changes to old schemas 
schema_dir = 'phase2b/old_scenarios'

In [11]:
with open(os.path.join(schema_dir, 'disease_outbreak.json'),'r') as f:
    schema = json.load(f)

In [12]:
schema.keys() 

dict_keys(['@id', 'sdfVersion', 'version', 'events'])

In [20]:
!mkdir phase2b/old_scenarios_updated

In [21]:
for schemafile in os.listdir(schema_dir):
    with open(os.path.join(schema_dir, schemafile),'r') as f:
        schema = json.load(f)
    for e in schema['events']:
        m = re.match(ID_RE, e['@id'])
        if not m: raise ValueError
        e['@id'] = m.group(0) 
        e['modality'] = []
        if 'children' in e:
            e['children'] = [re.match(ID_RE, cid).group(0) for cid in e['children']]
        if 'participants' in e:
            for p in e['participants']:
                p['@id'] = re.match(ID_RE, p['@id']).group(0)
        if 'outlinks' in e:
             e['outlinks'] = [re.match(ID_RE, cid).group(0) for cid in e['outlinks']]

    with open(os.path.join('phase2b/old_scenarios_updated/', schemafile), 'w') as writer:
        json.dump(schema, writer, indent=2) 
    
        
            

In [16]:
schema['events'][0]['relations'][0]

{'@id': 'resin:Relations/30000/',
 'name': 'proximity',
 'relationSubject': 'resin:Entities/00000/',
 'relationObject': 'resin:Entities/00013/',
 'wd_node': 'wd:Q19267375',
 'wd_description': 'state of being near as in space, time, or relationship',
 'wd_label': 'proximity'}

In [14]:
schema['events'][0]

{'@id': 'resin:Events/10000/',
 'children': ['resin:Events/10001/',
  'resin:Events/10002/',
  'resin:Events/10003/',
  'resin:Events/10005/',
  'resin:Events/10006/',
  'resin:Events/10007/',
  'resin:Events/10004/',
  'resin:Events/10009/'],
 'children_gate': 'or',
 'description': 'Root node for disease outbreak schema.',
 'name': 'Disease Outbreak with Hierarchy',
 'participants': [{'@id': 'resin:Participants/20000/',
   'roleName': 'A2-GOL_location_victims',
   'entity': 'resin:Entities/00000/'}],
 'repeatable': False,
 'wd_node': 'wd:Q3241045',
 'wd_label': 'disease outbreak',
 'wd_description': 'sudden increase in occurrences of a disease in a particular time and place',
 'outlinks': [],
 'entities': [{'@id': 'resin:Entities/00000/',
   'name': 'initial person infected',
   'wd_label': 'victim',
   'wd_node': 'wd:Q1851760',
   'wd_description': 'person who suffers as a result of a crime or other event'},
  {'@id': 'resin:Entities/00001/',
   'name': 'exposed people',
   'wd_label

In [2]:
scenario = 'business_change'

In [3]:
ontology_file = f'scenario-ontology/{scenario}_ontology.json'
schema_file = f'scenario-schemas-verified/{scenario}.json'

In [4]:
with open(ontology_file) as f: ontology = json.load(f)

In [5]:
with open(schema_file) as f: schema = json.load(f)

In [14]:
qnode2name = {} 

In [15]:
ontology_qnodes =  set()
for e in ontology['events']:
    ontology_qnodes.add(e['wd_node'])
    qnode2name[e['wd_node']] = e['name'] 


In [16]:
len(qnode2name) 

17

In [21]:
len(ontology_qnodes)

17

In [23]:
ontology_qnodes

{'',
 'Q11024',
 'Q11398090',
 'Q1145523',
 'Q12887029',
 'Q1527264',
 'Q20888800',
 'Q327000',
 'Q3380760',
 'Q3875186',
 'Q452440',
 'Q50384157',
 'Q656365',
 'Q685744',
 'Q8434',
 'Q891854',
 'Q900406'}

In [24]:
schema_qnodes = set() 
for e in schema['events']:
    if 'qnode' in e:
        qnode = e['qnode']
        if e['qnode'].startswith('wd:'):
            qnode = e['qnode'].split(':')[1]
        qnode2name[qnode] = e['name']
        schema_qnodes.add(qnode)


# for qnode in schema_qnodes:
#     print(f"{qnode2name[qnode]} Qnode {qnode}")


In [25]:
len(schema_qnodes)

51

In [26]:
diff_qnodes = schema_qnodes - ontology_qnodes 

In [27]:
len(diff_qnodes)

50

In [29]:
for qnode in diff_qnodes:
    print(f"{qnode2name[qnode]} Qnode {qnode}")


Fundraising Qnode Q1161355
ExitBankruptcy Qnode Q12769393
Propose Acquisition Qnode Q3604747
Propose Merger Qnode Q1363768
IPO Qnode Q185142
ChargeIndict Qnode Q19357312
Detain Qnode Q1403016
Release Parole Qnode Q5357120
founding Qnode Q3075355
Corporate Criminal Response Qnode Q13370881
Handcuffing Qnode Q15808
Justice Qnode Q5167661
Corporate Criminal Inquiry Qnode Q1964968
exchange_of_goods Qnode Q21651837
Privatize Qnode Q1589009
Investigation Qnode Q21004260
Corruption Qnode Q366
Sentence Qnode Q1763090
Reorganization Qnode Q2503234
Initiate Judicial Process Qnode Q17310301
Trial Qnode Q8016240
Acquit Qnode Q1454723
Inspect Qnode Q1137655
divestment Qnode Q1200733
Transfer Money Qnode Q21130860
Identifycategorize Qnode Q912550
AskRegulators Qnode Q428148
PrivateFunding Qnode Q58900637
Fine Qnode Q1243001
breakup Qnode Q326498
Convict Qnode Q5166547
PoliticalCorruption Qnode Q10858400
Shareholder Response Qnode Q1920566
bankruptcy Qnode Q152074
Shutdown Qnode Q4128962
rejection Qn

In [31]:
# look up these qnodes in the xpo ontology 
xpo_file = 'constant/xpo_v4.json'
with open(xpo_file) as f: xpo= json.load(f)

In [32]:
len(xpo['events']) 

4713