# Converting JSON Mapping

In [1]:
import json
from pprint import pprint
import pandas as pd
import pdb

In [2]:
def get_unique_bnode_dict(colname,propname,datatype,dataprefix='xsd'):
    """Return dictionary with new bnode properties"""
    prop =  {'transformation': {'expression': 'ioa', 'language': 'prefix'},
             'valueSource': {'constant': propname, 'source': 'constant'}}
    val_type = {'propertyMappings': [{'property': {'transformation': {'expression': 'pred', 'language': 'prefix'},
                                                   'valueSource': {'constant': 'value', 'source': 'constant'}},
                                      'values': [{'valueSource': {'columnName': colname, 'source': 'column'},
                                                  'valueType': {'datatype': {'transformation': {'expression': dataprefix, 'language': 'prefix'},
                                                                             'valueSource': {'constant': datatype, 'source': 'constant'}},
                                                                'type': 'datatype_literal'}}]}],
                'type': 'unique_bnode'}
    vals = [{'valueSource': {'columnName': 'Column', 'source': 'column'},
             'valueType': val_type}]
    return {'property':prop, 'values':vals}

def get_bnode_dict(colname,propname,datatype,dataprefix='xsd'):
    """Return dictionary with new bnode properties"""
    prop =  {'transformation': {'expression': 'ioa', 'language': 'prefix'},
             'valueSource': {'constant': propname, 'source': 'constant'}}
    val_type = {'propertyMappings': [{'property': {'transformation': {'expression': 'pred', 'language': 'prefix'},
                                                   'valueSource': {'constant': 'value', 'source': 'constant'}},
                                      'values': [{'valueSource': {'columnName': colname, 'source': 'column'},
                                                  'valueType': {'datatype': {'transformation': {'expression': dataprefix, 'language': 'prefix'},
                                                                             'valueSource': {'constant': datatype, 'source': 'constant'}},
                                                                'type': 'datatype_literal'}}]}],
                'type': 'value_bnode'}
    vals = [{'valueSource': {'columnName': 'Column', 'source': 'column'},
             'valueType': val_type}]
    return {'property':prop, 'values':vals}

f_prefix = lambda d: d['property']['transformation']['expression']
f_property = lambda d: d['property']['valueSource']['constant']
f_colname = lambda d: d['values'][0]['valueSource']['columnName']
f_dtype = lambda d: d['values'][0]['valueType']['datatype']['valueSource']['constant']

In [3]:
def all_blank_nodes(data_properties,prop_transform):
    """Transform all properties in data_properties to blank node relations"""
    blank_nodes = []
    for p_data in data_properties:
        prop_old = f_property(p_data)
        prefix = f_prefix(p_data)
        if prop_old in prop_transform and prefix=='ioa':
            colname = f_colname(p_data)
            datatype = f_dtype(p_data)
            propname = prop_transform[prop_old]
            blank_nodes.append( get_unique_bnode_dict(colname,propname,datatype) )
    return blank_nodes

## Exploring wdt examples

In [2]:
with open('kb.json', 'r') as f:
    data = json.load(f)

In [3]:
with open('statements.rj', 'r') as f:
    data_ioa = json.load(f)

In [4]:
type(data)

dict

In [5]:
data.keys()

dict_keys(['concepts', 'entities'])

In [7]:
len(data['concepts'])

794

In [8]:
len(data['entities'])

16960

In [13]:
k

'Q786'

In [15]:
key_sat = 'Q26540'
key_nasa = 'Q23548'
key_cosb = 'Q48711'
key_orbit = 'Q4130'

In [17]:
pprint(data['entities'][key_nasa])

{'attributes': [{'key': 'IPv4 routing prefix',
                 'qualifiers': {'start time': [{'type': 'date',
                                                'value': '1986/9/24'}]},
                 'value': {'type': 'string', 'value': '128.159.0.0/16'}},
                {'key': 'IPv4 routing prefix',
                 'qualifiers': {'start time': [{'type': 'date',
                                                'value': '1986/9/24'}]},
                 'value': {'type': 'string', 'value': '128.157.0.0/16'}},
                {'key': 'IPv4 routing prefix',
                 'qualifiers': {},
                 'value': {'type': 'string', 'value': '128.156.0.0/15'}},
                {'key': 'IPv4 routing prefix',
                 'qualifiers': {},
                 'value': {'type': 'string', 'value': '128.156.0.0/14'}},
                {'key': 'short name',
                 'qualifiers': {},
                 'value': {'type': 'string', 'value': 'NASA'}},
                {'key': 'short name

In [4]:
len(data_ioa)

155292

In [11]:
for i, k in enumerate(data_ioa):
    if i<1000:
        continue
    print(k)
    break

http://ioa-graph/resource/destorbit113875


In [12]:
len(data_ioa['http://ioa-graph/resource/entity632'])

4

In [13]:
pprint(data_ioa['http://ioa-graph/resource/entity632'])

{'http://www.semanticweb.org/esa-ioa/ontologies/2022/ioa-ontology#Name': [{'datatype': 'http://www.w3.org/2001/XMLSchema#string',
                                                                           'graphs': ['http://ioa-graph/resource/'],
                                                                           'type': 'literal',
                                                                           'value': 'National '
                                                                                    'Aeronautics '
                                                                                    'and '
                                                                                    'Space '
                                                                                    'Administration'}],
 'http://www.semanticweb.org/esa-ioa/ontologies/2022/ioa-ontology#hasHostCountry': [{'graphs': ['http://ioa-graph/resource/'],
                                                 

## WDT train examples

In [2]:
with open('train.json', 'r') as f:
    data = json.load(f)

In [7]:
data[0]

{'question': 'Which town has a TOID of 4000000074573917 and has an OS grid reference of SP8778?',
 'choices': ['Wigan',
  'Doncaster',
  'Royal Tunbridge Wells',
  'Kettering',
  'Edmonton',
  'Macclesfield',
  'Blackburn',
  'Colchester',
  'South Shields',
  'Wimbledon'],
 'program': [{'function': 'FindAll', 'dependencies': [], 'inputs': []},
  {'function': 'FilterStr',
   'dependencies': [0],
   'inputs': ['TOID', '4000000074573917']},
  {'function': 'FilterConcept', 'dependencies': [1], 'inputs': ['town']},
  {'function': 'FindAll', 'dependencies': [], 'inputs': []},
  {'function': 'FilterStr',
   'dependencies': [3],
   'inputs': ['OS grid reference', 'SP8778']},
  {'function': 'FilterConcept', 'dependencies': [4], 'inputs': ['town']},
  {'function': 'And', 'dependencies': [2, 5], 'inputs': []},
  {'function': 'What', 'dependencies': [6], 'inputs': []}],
 'sparql': 'SELECT DISTINCT ?e WHERE { ?e <pred:instance_of> ?c . ?c <pred:name> "town" . ?e <TOID> ?pv . ?pv <pred:value> "4000

In [10]:
train_df = pd.DataFrame(data[:500], columns=['question', 'sparql'])
train_df.to_csv('train_sparql.csv')

In [13]:
pprint(data[14]['question'])

('What is the academic degree Vladimir Nabokov achieved at Trinity College '
 '(the one that is the education place of George V) ?')


In [12]:
pprint(data[14]['sparql'])

('SELECT DISTINCT ?qpv WHERE { ?e_1 <pred:name> "Vladimir Nabokov" . ?e_2 '
 '<pred:name> "Trinity College" . ?e_3 <educated_at> ?e_2 . ?e_3 <pred:name> '
 '"George V" . ?e_1 <educated_at> ?e_2 . [ <pred:fact_h> ?e_1 ; <pred:fact_r> '
 '<educated_at> ; <pred:fact_t> ?e_2 ] <academic_degree> ?qpv .  }')


## Loading object mapping

In [2]:
with open('mapping_objects.json', 'r') as f:
    data_map = json.load(f)

In [3]:
data_map.keys()

dict_keys(['baseIRI', 'namespaces', 'subjectMappings'])

In [4]:
data_properties = data_map['subjectMappings'][0]['propertyMappings']
len(data_properties)

33

In [5]:
pprint(data_properties[3])

{'property': {'transformation': {'expression': 'ioa', 'language': 'prefix'},
              'valueSource': {'constant': 'Height', 'source': 'constant'}},
 'values': [{'valueSource': {'columnName': 'height', 'source': 'column'},
             'valueType': {'datatype': {'transformation': {'expression': 'xsd',
                                                           'language': 'prefix'},
                                        'valueSource': {'constant': 'float',
                                                        'source': 'constant'}},
                           'type': 'datatype_literal'}}]}


In [17]:
pprint(data_properties[20])

{'property': {'transformation': {'expression': 'pred', 'language': 'prefix'},
              'valueSource': {'constant': 'name', 'source': 'constant'}},
 'values': [{'valueSource': {'columnName': 'name', 'source': 'column'},
             'valueType': {'type': 'literal'}}]}


In [13]:
pre_all = []
prop_all = []
for p in data_properties:
    pre_all.append(f_prefix(p))
    prop_all.append(f_property(p))

In [14]:
prop_transform = {
    'Mass': 'mass',
    'Height': 'height',
    'Shape': 'shape',
    'AvgCrossSection': 'average_cross_section',
    'MaxCrossSection': 'max_cross_section',
    'CosparID': 'cospar_id',
    'Depth': 'depth',
    'Satno': 'satno',
    'Diameter': 'diameter',
    'Span': 'span',
    'MinCrossSection': 'min_cross_section',
    'VimpelID': 'vimpel_id'
}

In [15]:
blank_nodes_new = []
for p_data in data_properties:
    prop_old = f_property(p_data)
    prefix = f_prefix(p_data)
    if prop_old in prop_transform and prefix=='ioa':
        colname = f_colname(p_data)
        datatype = f_dtype(p_data)
        propname = prop_transform[prop_old]
        blank_nodes_new.append( get_unique_bnode_dict(colname,propname,datatype) )

In [19]:
data_properties_replace = data_properties[:21]
data_properties_replace.extend(blank_nodes_new)
len(data_properties_replace)

33

In [23]:
with open('mapping_objects.json', 'r') as f:
    data_map_new = json.load(f)
data_map_new['subjectMappings'][0]['propertyMappings'] = data_properties_replace

In [24]:
with open('mapping_objects_update.json', 'w') as f:
    data_map = json.dump(data_map_new, f)

## All new object mappings

### Initial Orbits

In [17]:
fname = 'AllMappingsJson/mapping-init-orbit.json'

In [18]:
with open(fname, 'r') as f:
    data_map = json.load(f)

In [19]:
data_map.keys()

dict_keys(['baseIRI', 'namespaces', 'subjectMappings'])

In [20]:
data_prop_init = data_map['subjectMappings'][0]['propertyMappings']
len(data_prop_init)

11

In [8]:
pprint(data_prop_init[3])

{'property': {'transformation': {'expression': 'ioa', 'language': 'prefix'},
              'valueSource': {'constant': 'ArgOfPeriapsis',
                              'source': 'constant'}},
 'values': [{'valueSource': {'columnName': 'aPer', 'source': 'column'},
             'valueType': {'datatype': {'transformation': {'expression': 'xsd',
                                                           'language': 'prefix'},
                                        'valueSource': {'constant': 'float',
                                                        'source': 'constant'}},
                           'type': 'datatype_literal'}}]}


In [10]:
pre_all = []
prop_all = []
for p in data_prop_init:
    pre_all.append(f_prefix(p))
    prop_all.append(f_property(p))

In [11]:
prop_all

['Inclination',
 'RAAN',
 'MeanAnomaly',
 'ArgOfPeriapsis',
 'hasFrame',
 'Eccentricity',
 'SemimajorAxis',
 'Epoch',
 'isInitialOrbitOf',
 'HeightofPericentre',
 'HeightofApocentre']

In [14]:
prop_transform_init = {
    'Inclination': 'inclination',
    'RAAN': 'raan',
    'MeanAnomaly': 'mean_anomaly',
    'ArgOfPeriapsis': 'arg_of_periapsis',
    'Eccentricity': 'eccentricity',
    'SemimajorAxis': 'semimajor_axis',
    'Epoch': 'epoch',
    'HeightofPericentre': 'height_of_pericentre',
    'HeightofApocentre': 'height_of_centre',
}

In [15]:
blank_nodes_init = all_blank_nodes(data_prop_init,prop_transform_init)

In [16]:
data_properties_replace = data_prop_init
data_properties_replace.extend(blank_nodes_init)
len(data_properties_replace)

20

In [21]:
with open(fname, 'r') as f:
    data_map_new = json.load(f)
data_map_new['subjectMappings'][0]['propertyMappings'] = data_properties_replace

In [22]:
with open('NewMappings/mapping-init-orbit-update.json', 'w') as f:
    data_map = json.dump(data_map_new, f)