## Cards

In [1]:
import os
import json
import uuid
import pandas as pd
import datetime
import shutil
import csv

lookup_path = './lookup/'
source_path = './out/'
mappings_path = './mappings/'
#save_path = '../../../../digipolis-arches-shoku-pkg/source/'
#source_file_name = './out/monuments_2025_06_09.json'
source_file_name = './out/monuments.json'

## Functions

In [2]:
def resource_identifier(source_id):
    return str(uuid.uuid5(uuid.NAMESPACE_DNS, source_id))


def generate_uri(source_id):
    return str(f'https://www.geonames.org/{source_id}')


def coordinates_geo_json(source_value): 
    
    return f'POINT {source_value}'.replace(",", "")


def coordinates_wkt(source_value):    
    return f'{source_value}'.replace(",", "")


def get_constants(node_mapping):
    constant_dict = {}
    constants = node_mapping['constants']
    for constant in constants:
        for constant_label, value in constant.items():
            constant_dict[constant_label] = value
    return constant_dict


def lookup(code, lookup_file):
    lookup_df = pd.read_csv(lookup_path + lookup_file)
    try:
        concept_name = lookup_df[lookup_df['code'] == code]['concept'].iloc[0]
        return concept_name
    except:
        print(f'{code} NOT FOUND IN {lookup_file}')
        return f'{code} NOT FOUND IN {lookup_file}'


## Save csv

In [3]:
def save_mapped_set(source_df, _package, _resource_model, _card):
    
    package = _package.split('_')[1]     
    
    file_name = '%s/%s/%s/%s.csv' % (save_path, package, _resource_model, _card)
    source_df.to_csv(file_name, index=False)    
    del source_df
    return 'Saved OK'

In [4]:
def related_resource(source_value):
    #print(source_value)
    resourceid = str(uuid.uuid5(uuid.NAMESPACE_DNS, source_value))
    resourceXresourceId = str(uuid.uuid5(uuid.NAMESPACE_DNS, source_value + 'relation'))
    relation = '[{"resourceId": "%s", "ontologyProperty": "", "resourceXresourceId": "%s", "inverseOntologyProperty": ""}]' % (resourceid, resourceXresourceId)
    return relation

## Single

In [5]:
def make_single(source_list, node_mappings, graph, dataset, card_name, card_order):
    
    obj_dict = {}
    obj_list = []
    for obj in source_list:        
        for node_mapping in node_mappings:            
            if node_mapping['from'] in obj.keys():
                
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['card'] = 'single'
                obj_dict['card_order'] = '0'
                #obj_dict['rights_type'] = 'CC BY-SA'
                obj_dict['language'] = 'Nederlands'
                
                        
                if 'constants' in node_mapping.keys():
                    obj_dict[node_mapping['to']] = obj[node_mapping['from']]  
                    obj_dict.update(get_constants(node_mapping))
                    #if node_mapping['from'] == 'name':                        
                    #    #print(node_mapping['from'])
                    #    obj_dict[node_mapping['to']] = "bulle" #{'nl', obj[node_mapping['from']}

                       
                if 'function' in node_mapping.keys():
                    
                    # OBS, try: result = globals()[func_name]()
                    
                    if node_mapping['function']['name'] == 'generate_uri':
                        obj_dict[node_mapping['to']] = generate_uri(obj[node_mapping['from']])                                            
                    
                    if node_mapping['function']['name'] == 'lookup':
                        obj_dict[node_mapping['to']] = lookup(obj[node_mapping['from']], node_mapping['function']['args']['lookup_file'])                                            
                    
                    if node_mapping['function']['name'] == 'related_resource':
                        obj_dict[node_mapping['to']] = related_resource(obj[node_mapping['from']])                        

                    if node_mapping['function']['name'] == 'coordinates_geo_json':
                        obj_dict[node_mapping['to']] = coordinates_geo_json(obj[node_mapping['from']])      
                    
                    if node_mapping['function']['name'] == 'coordinates_wkt':
                        obj_dict[node_mapping['to']] = coordinates_wkt(obj[node_mapping['from']])      
    
                
                
                else:
                    obj_dict[node_mapping['to']] = obj[node_mapping['from']]                  
        obj_list.append(obj_dict)
        obj_dict = {}
        
    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
        
    if number_of_records > 0:                       
        #save_it = save_mapped_set(source_df, package, resource_model, card)
        source_df.to_csv(f'out/buildings/single.csv', index =False)
        #return '%s saved: %s' % (card_name, number_of_records)
        return obj_list


## Building Type

In [6]:
def make_building_type(source_list, node_mappings, graph, dataset, card_name, card_order):
    
    obj_dict = {}
    obj_list = []    
    
    for obj in source_list:
        
        if 'typeOfBuilding' in obj.keys():
            
            if isinstance(obj['typeOfBuilding'], str):
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['card'] = 'building_type'
                obj_dict['card_order'] = '1'
 
                obj_dict['building_type'] = obj['typeOfBuilding']
                obj_list.append(obj_dict)
                obj_dict = {}

            elif isinstance(obj['typeOfBuilding'], list): 
                for building_type in obj['typeOfBuilding']:
                    obj_dict['ResourceID'] = obj['ResourceID']
                    obj_dict['card'] = card_name
                    obj_dict['card_order'] = card_order

                    obj_dict['building_type'] = building_type
                    obj_list.append(obj_dict)
                    obj_dict = {}
            
        
        
    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
        
    if number_of_records > 0:                       
        #save_it = save_mapped_set(source_df, package, resource_model, card)
        source_df.to_csv(f'out/buildings/building_type.csv', index =False)
        #return '%s saved: %s' % (card_name, number_of_records)
        return obj_list

## Alternative names

In [7]:
def make_alternate_names(source_list, node_mappings, graph, dataset, card_name, card_order):
    
    obj_dict = {}
    obj_list = []    
    for obj in source_list:

        if 'alternateName' in obj.keys():
            
            if isinstance(obj['alternateName'], str):
                
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['card'] = 'alternate_names'
                obj_dict['card_order'] = '2'
                obj_dict['alternative_name_content'] = obj['alternateName']
                obj_dict['alternative_name_content_type'] = 'Alternative'
                obj_dict['alternative_name_content_language'] = "Nederlands"
                
                obj_list.append(obj_dict)
                obj_dict = {}

            elif isinstance(obj['alternateName'], list): 
                for alternate_name in obj['alternateName']:
                    obj_dict['ResourceID'] = obj['ResourceID']
                    obj_dict['card'] = 'alternate_names'
                    obj_dict['card_order'] = '2'
                    obj_dict['alternative_name_content'] = alternate_name
                    obj_dict['alternative_name_content_type'] = "Alternative"
                    obj_dict['alternative_name_content_language'] = "Nederlands"
                    
                    obj_list.append(obj_dict)
                    obj_dict = {}
            
        
        
    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
        
    if number_of_records > 0:                       
        #save_it = save_mapped_set(source_df, package, resource_model, card)
        source_df.to_csv(f'out/buildings/alternate_names.csv', index =False)
        #return '%s saved: %s' % (card_name, number_of_records)
        return obj_list

## Religion

In [8]:
def make_religion(source_list, node_mappings, graph, dataset, card_name, card_order):
    
    obj_dict = {}
    obj_list = []    
    for obj in source_list:

        if 'religionURI' in obj.keys():
            if isinstance(obj['religionURI'], dict):
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['card'] = 'religion'
                obj_dict['card_order'] = '3'
                obj_dict['religion'] = obj['religionURI']['Concept']['prefLabel']
                
                obj_list.append(obj_dict)
                obj_dict = {}

            elif isinstance(obj['religionURI'], list):                
                for concept in obj['religionURI']:
                    obj_dict['ResourceID'] = obj['ResourceID']
                    obj_dict['card'] = 'religion'
                    obj_dict['card_order'] = '3'
                    obj_dict['religion'] = concept['Concept']['prefLabel']
                    
                    obj_list.append(obj_dict)
                    obj_dict = {}
            
        
        
    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
        
    if number_of_records > 0:                       
        #save_it = save_mapped_set(source_df, package, resource_model, card)
        source_df.to_csv(f'out/buildings/religion.csv', index =False)
        #return '%s saved: %s' % (card_name, number_of_records)
        return obj_list

## Religious Order

In [9]:
def make_religious_order(source_list, node_mappings, graph, dataset, card_name, card_order):
    
    obj_dict = {}
    obj_list = []    
    for obj in source_list:

        if 'religiousOrder' in obj.keys():
            if isinstance(obj['religiousOrder'], dict):
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['card'] = 'religious_order'
                obj_dict['card_order'] = '4'
                obj_dict['id'] = obj['religiousOrder']['Concept']['id']
                obj_dict['name'] = obj['religiousOrder']['Concept']['prefLabel']
                rel_order_uri = str(uuid.uuid5(uuid.NAMESPACE_DNS, obj['religiousOrder']['Concept']['id']))
                resourceXresourceId = str(uuid.uuid5(uuid.NAMESPACE_DNS, obj['religiousOrder']['Concept']['id'] + 'relation'))                
                obj_dict['religious_order'] = '[{"resourceId": "%s", "ontologyProperty": "", "resourceXresourceId": "%s", "inverseOntologyProperty": ""}]' % (rel_order_uri, resourceXresourceId)

                
                obj_list.append(obj_dict)
                obj_dict = {}

            elif isinstance(obj['religiousOrder'], list):                
                for concept in obj['religiousOrder']:                    
                    obj_dict['ResourceID'] = obj['ResourceID']
                    obj_dict['card'] = 'religious_order'
                    obj_dict['card_order'] = '4'
                    obj_dict['id'] = concept['Concept']['id']
                    obj_dict['name'] = concept['Concept']['prefLabel']
                    rel_order_uri = str(uuid.uuid5(uuid.NAMESPACE_DNS, concept['Concept']['id']))
                    resourceXresourceId = str(uuid.uuid5(uuid.NAMESPACE_DNS, concept['Concept']['id'] + 'relation'))                
                    obj_dict['religious_order'] = '[{"resourceId": "%s", "ontologyProperty": "", "resourceXresourceId": "%s", "inverseOntologyProperty": ""}]' % (rel_order_uri, resourceXresourceId)
                    
                    obj_list.append(obj_dict)
                    obj_dict = {}

    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
        
    if number_of_records > 0:                       
        #save_it = save_mapped_set(source_df, package, resource_model, card)
        source_df.to_csv(f'out/buildings/religious_order.csv', index =False)
        #return '%s saved: %s' % (card_name, number_of_records)
        return obj_list

## Buiding type AAT

In [10]:
def make_aat_building_type(source_list, node_mappings, graph, dataset, card_name, card_order):
    
    obj_dict = {}
    obj_list = []    
    
    for obj in source_list:
        
        if 'additionalType' in obj.keys():
            
            if isinstance(obj['additionalType'], str):
                
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['card'] = 'building_type'
                obj_dict['card_order'] = '1'
 
                obj_dict['building_aat_type'] = lookup(obj['additionalType'], 'building_aat_type.csv')
                obj_list.append(obj_dict)
                obj_dict = {}

            elif isinstance(obj['additionalType'], list): 
                
                for building_aat_type in obj['additionalType']:                    
                    obj_dict['ResourceID'] = obj['ResourceID']
                    obj_dict['card'] = card_name
                    obj_dict['card_order'] = card_order

                    obj_dict['building_aat_type'] = lookup(building_aat_type, 'building_aat_type.csv')
                    
                    obj_list.append(obj_dict)
                    obj_dict = {}
            
        
        
    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
        
    if number_of_records > 0:                       
        #save_it = save_mapped_set(source_df, package, resource_model, card)
        source_df.to_csv(f'out/buildings/building_aat_type.csv', index =False)
        #return '%s saved: %s' % (card_name, number_of_records)
        return obj_list

## Images

In [11]:
def make_images(source_list, node_mappings, graph, dataset, card_name, card_order):
    
    obj_dict = {}
    obj_list = []    
    
    for obj in source_list:
        
        if 'imageURI' in obj.keys():
            
            if isinstance(obj['imageURI'], dict):
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['card'] = 'image'
                obj_dict['card_order'] = '11'
                
                obj_dict['image_identifier_content'] = obj['imageURI']['WebResource']['id']
                obj_dict['image_identifier_type'] = 'Brabant Cloud ARK Identifier'
                obj_dict['image_other_identifier_content'] = obj['imageURI']['WebResource']['wikiBaseURI']
                obj_dict['image_other_identifier_type'] = 'Wikibase'
                obj_dict['image_name_content'] = obj['imageURI']['WebResource']['prefLabel']
                obj_dict['image_name_content_type'] = 'Primary'
                obj_dict['image_name_content_language'] = 'Nederlands'
                obj_dict['image_right_type'] = lookup(obj['imageURI']['WebResource']['rightsLabel'], 'creative_commons.csv')
                obj_dict['image_url'] = obj['imageURI']['WebResource']['imageLink']
                
                obj_list.append(obj_dict)
                obj_dict = {}

            elif isinstance(obj['imageURI'], list): 
                for image in obj['imageURI']:
                    obj_dict['ResourceID'] = obj['ResourceID']
                    obj_dict['card'] = 'image'
                    obj_dict['card_order'] = '11'

                    obj_dict['image_identifier_content'] = image['WebResource']['id']
                    obj_dict['image_identifier_type'] = 'Brabant Cloud ARK Identifier'
                    obj_dict['image_other_identifier_content'] = image['WebResource']['wikiBaseURI']
                    obj_dict['image_other_identifier_type'] = 'Wikibase'
                    obj_dict['image_name_content'] = image['WebResource']['prefLabel']
                    obj_dict['image_name_content_type'] = 'Primary'
                    obj_dict['image_name_content_language'] = 'Nederlands'
                    obj_dict['image_right_type'] = lookup(image['WebResource']['rightsLabel'], 'creative_commons.csv')
                    obj_dict['image_url'] = image['WebResource']['imageLink']

                    obj_list.append(obj_dict)
                    obj_dict = {}

        
    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
        
    if number_of_records > 0:                       
        #save_it = save_mapped_set(source_df, package, resource_model, card)
        source_df.to_csv(f'out/buildings/images.csv', index =False)
        #return '%s saved: %s' % (card_name, number_of_records)
        return obj_list

## Other identifiers

In [12]:
def make_other_identifiers(source_list, node_mappings, graph, dataset, card_name, card_order):
    
    obj_dict = {}
    obj_list = []
    for obj in source_list:        
        for node_mapping in node_mappings:
            if node_mapping['from'] in obj.keys():
                
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['card'] = 'other_identifiers'
                obj_dict['card_order'] = '2'                        
                if 'constants' in node_mapping.keys():
                    obj_dict[node_mapping['to']] = obj[node_mapping['from']]  
                    obj_dict.update(get_constants(node_mapping))
                       
            if obj_dict:
                obj_list.append(obj_dict)
            obj_dict = {}

    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
        
    if number_of_records > 0:                       
        source_df.to_csv(f'out/buildings/other_identifiers.csv', index =False)
        return obj_list

## Production date

In [13]:
def make_production_date(source_list, node_mappings, graph, dataset, card_name, card_order):
    
    obj_dict = {}
    obj_list = []
    for obj in source_list:        
        for node_mapping in node_mappings:
            if node_mapping['from'] in obj.keys():
                
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['card'] = 'production_date'
                obj_dict['card_order'] = '12'                        
                obj_dict[node_mapping['to']] = obj[node_mapping['from']]  
                       
            obj_list.append(obj_dict)
            obj_dict = {}
    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
        
    if number_of_records > 0:                       
        source_df.to_csv(f'out/buildings/production_date.csv', index =False)
        return obj_list

## Generate dataset

In [14]:
%%time

with open(mappings_path + 'mappings_bg.json') as f:
    mappings_dict = json.load(f)  
    
    record_dict = {}
    record_list = []

    single_list = []
    building_type_list = []
    building_aat_type_list = []
    alternate_names_list = []
    religion_list = []
    religious_order_list = []
    places_list = []
    image_list = []
    other_identifiers_list = []
    production_date_list = []

    name = mappings_dict['name']
    graph = mappings_dict['graph']
    print('-', name)
    print('  -', graph)

    with open(source_file_name) as f:
        records = json.load(f)
        _landmarks = records['records']['record']

    for cards, cards_values in mappings_dict['mappings'].items():  
        card_name = cards
        
        for item,item_value in cards_values.items():
            if item == 'dataset': 
                dataset = item_value
            if item == 'card_order':
                card_order = item_value
            if item == 'filter':
                card_filter = item_value    
            if item == 'node_mappings':
                node_mappings = item_value    
                
        node_mappings_keys = [item['from'] for item in node_mappings]
        
        for _record in _landmarks:
            
            record = _record['LandmarksOrHistoricalBuildings']
            record_dict['ResourceID'] = str(uuid.uuid5(uuid.NAMESPACE_DNS, record['id']))
            record_dict['card'] = card_name
            record_dict['card_order'] = card_order

            for mapping_key in node_mappings_keys:
                if mapping_key in record.keys():
                    if len(record[mapping_key]) > 0:                                
                        record_dict[mapping_key] = record[mapping_key]
                    
            record_list.append(record_dict)                            
            record_dict = {}
                        
        if card_name == 'single':            
            single_map = node_mappings
            #print(card_name, single_map)
            single_list.extend(record_list)                    

        if card_name == 'building_type':
            building_type_map = node_mappings
            #print(card_name, building_type_map)
            building_type_list.extend(record_list)                    

        if card_name == 'alternate_names':
            alternate_names_map = node_mappings
            #print(card_name, alternate_names_map)
            alternate_names_list.extend(record_list)                    

        if card_name == 'religion':
            religion_map = node_mappings
            #print(card_name, alternate_names_map)
            religion_list.extend(record_list)                    

        if card_name == 'religious_order':
            religious_order_map = node_mappings
            #print(card_name, alternate_names_map)
            religious_order_list.extend(record_list)

        if card_name == 'places':
            place_map = node_mappings
            #print(card_name, alternate_names_map)
            places_list.extend(record_list)
        
        if card_name == 'building_aat_type':            
            building_aat_map = node_mappings
            #print(card_name, alternate_names_map)
            building_aat_type_list.extend(record_list)
            
        if card_name == 'images':            
            image_map = node_mappings            
            #print(card_name, alternate_names_map)
            image_list.extend(record_list)

        if card_name == 'other_identifiers':            
            other_identifiers_map = node_mappings            
            #print(card_name, alternate_names_map)
            other_identifiers_list.extend(record_list)

        if card_name == 'production_date':            
            production_date_map = node_mappings            
            #print(card_name, alternate_names_map)
            production_date_list.extend(record_list)


        
        record_list = []
            
            
    if len(single_list) > 0:        
        Single = make_single(single_list, single_map, graph, dataset, card_name, card_order)
        print('    -', "Single", len(Single))
        
    if len(building_type_list) > 0:        
        BuildingType = make_building_type(building_type_list, single_map, graph, dataset, card_name, card_order)
        print('    -', "BuildingType", len(BuildingType))

    if len(alternate_names_list) > 0:        
        AlternateName = make_alternate_names(alternate_names_list, alternate_names_map, graph, dataset, card_name, card_order)
        print('    -', "AlternateName", len(AlternateName))

    if len(religious_order_list) > 0:        
        ReligiousOrder = make_religious_order(religious_order_list, alternate_names_map, graph, dataset, card_name, card_order)
        print('    -', "ReligiousOrder", len(ReligiousOrder))

    if len(religion_list) > 0:        
        Religion = make_religion(religion_list, religion_map, graph, dataset, card_name, card_order)
        print('    -', "Religion", len(Religion))

    if len(building_aat_type_list) > 0:        
        BuildingAatType = make_aat_building_type(building_aat_type_list, building_aat_map, graph, dataset, card_name, card_order)
        print('    -', "BuildingAatType", len(BuildingAatType))

    if len(image_list) > 0:        
        Image = make_images(image_list, image_map, graph, dataset, card_name, card_order)
        print('    -', "Image", len(Image))

    if len(other_identifiers_list) > 0:        
        OtherIdentifiers = make_other_identifiers(other_identifiers_list, other_identifiers_map, graph, dataset, card_name, card_order)
        print('    -', "OtherIdentifiers", len(OtherIdentifiers))

    if len(production_date_list) > 0:        
        ProductionDate = make_production_date(production_date_list, production_date_map, graph, dataset, card_name, card_order)
        print('    -', "ProductionDate", len(ProductionDate))




print('---------') 
print(datetime.datetime.now())
#print(json.dumps(Image, indent=2))
#%reset -f 

- brabantse_gebouwen
  - Building
    - Single 966
    - BuildingType 974
    - AlternateName 2832
    - ReligiousOrder 1012
    - Religion 972
    - BuildingAatType 974
    - Image 158
    - OtherIdentifiers 1276
    - ProductionDate 966
---------
2025-06-09 21:04:31.235833
CPU times: user 386 ms, sys: 22.9 ms, total: 409 ms
Wall time: 411 ms
