## Cards

In [1]:
import os
import json
import uuid
import pandas as pd
import datetime
import shutil
import csv

lookup_path = './lookup/'
source_path = './out/'
mappings_path = './mappings/'
#save_path = '../../../../digipolis-arches-shoku-pkg/source/'

## Functions

In [2]:
def resource_identifier(source_id):
    return str(uuid.uuid5(uuid.NAMESPACE_DNS, source_id))


def generate_uri(source_id):
    return str(f'https://www.geonames.org/{source_id}')


def coordinates_geo_json(source_value): 
    
    return f'POINT {source_value}'.replace(",", "")


def coordinates_wkt(source_value):    
    return f'{source_value}'.replace(",", "")


def get_constants(node_mapping):
    constant_dict = {}
    constants = node_mapping['constants']
    for constant in constants:
        for constant_label, value in constant.items():
            constant_dict[constant_label] = value
    return constant_dict


def lookup(code, lookup_file):
    lookup_df = pd.read_csv(lookup_path + lookup_file)
    try:
        concept_name = lookup_df[lookup_df['code'] == code]['concept'].iloc[0]
        return concept_name
    except:
        print(f'{code} NOT FOUND IN {lookup_file}')
        return f'{code} NOT FOUND IN {lookup_file}'


## Save csv

In [3]:
def save_mapped_set(source_df, _package, _resource_model, _card):
    
    package = _package.split('_')[1]     
    
    file_name = '%s/%s/%s/%s.csv' % (save_path, package, _resource_model, _card)
    source_df.to_csv(file_name, index=False)    
    del source_df
    return 'Saved OK'

In [4]:
def related_resource(source_value):
    resourceid = str(uuid.uuid5(uuid.NAMESPACE_DNS, source_value))
    resourceXresourceId = str(uuid.uuid5(uuid.NAMESPACE_DNS, source_value + 'relation'))
    relation = '[{"resourceId": "%s", "ontologyProperty": "", "resourceXresourceId": "%s", "inverseOntologyProperty": ""}]' % (resourceid, resourceXresourceId)
    return relation

## Mapper

In [5]:
def make_mapping(source_list, node_mappings, graph, dataset, card_name, card_order):

    obj_dict = {}
    obj_list = []
    
    for obj in source_list:        
        for node_mapping in node_mappings:            
            if node_mapping['from'] in obj.keys():                
                
                if 'resource_id' in node_mapping.keys():
                    obj_dict['ResourceID'] = resource_identifier(str(obj[node_mapping['from']]))
                    obj_dict['card'] = card_name
                    obj_dict['card_order'] = card_order
                        
                if 'constants' in node_mapping.keys():
                    obj_dict[node_mapping['to']] = obj[node_mapping['from']]  
                    obj_dict.update(get_constants(node_mapping))
                       
                if 'function' in node_mapping.keys():
                    
                    # OBS, try: result = globals()[func_name]()
                    
                    if node_mapping['function']['name'] == 'generate_uri':
                        obj_dict[node_mapping['to']] = generate_uri(obj[node_mapping['from']])                                            
                    
                    if node_mapping['function']['name'] == 'lookup':
                        obj_dict[node_mapping['to']] = lookup(obj[node_mapping['from']], node_mapping['function']['args']['lookup_file'])                                            
                    
                    if node_mapping['function']['name'] == 'related_resource':
                        obj_dict[node_mapping['to']] = related_resource(obj[node_mapping['from']])      

                    if node_mapping['function']['name'] == 'coordinates_geo_json':
                        obj_dict[node_mapping['to']] = coordinates_geo_json(obj[node_mapping['from']])      
                    
                    if node_mapping['function']['name'] == 'coordinates_wkt':
                        obj_dict[node_mapping['to']] = coordinates_wkt(obj[node_mapping['from']])      
                else:
                    obj_dict[node_mapping['to']] = obj[node_mapping['from']]                  
        obj_list.append(obj_dict)
        obj_dict = {}
        
    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
        
    if number_of_records > 0:                       
        #save_it = save_mapped_set(source_df, package, resource_model, card)
        source_df.to_csv(f'out/{card_name}_mapped.csv', index =False)
        #return '%s saved: %s' % (card_name, number_of_records)
        return obj_list


#relation = '[{"resourceId": "%s", "ontologyProperty": "", "resourceXresourceId": "%s", "inverseOntologyProperty": ""}]' % (usr_dict['ResourceID_mat'] , uuid.uuid4())


## Generate dataset

In [6]:
%%time

with open(mappings_path + 'mappings_geonames.json') as f:
    mappings_dict = json.load(f)  
    
    record_dict = {}
    record_list = []

    single_list = []
    alternate_name_list = []
    digital_reference_list = []

    name = mappings_dict['name']
    graph = mappings_dict['graph']
    print('-', name)
    print('  -', graph)
    
    for cards, cards_values in mappings_dict['mappings'].items():  
        card_name = cards
        for item,item_value in cards_values.items():
            if item == 'dataset': 
                dataset = item_value
            if item == 'card_order':
                card_order = item_value
            if item == 'filter':
                card_filter = item_value    
            if item == 'node_mappings':
                node_mappings = item_value    
           
        node_mappings_keys = [item['from'] for item in node_mappings]
        records = csv.DictReader(open(f'{source_path}{dataset}', encoding='utf-8-sig'), delimiter=',')

        for record in records:
                ## add filter from mapping!            
            for mapping_key in node_mappings_keys:
                if mapping_key in record.keys():
                    if len(record[mapping_key]) > 0:                                
                        record_dict[mapping_key] = record[mapping_key]
                    
            record_list.append(record_dict)                            
            record_dict = {}
            
        if card_name == 'single':                
            single_list.extend(record_list)                    
        if card_name == 'alternate_name':
            alternate_name_list.extend(record_list)                    
        if card_name == 'digital_reference':
            digital_reference_list.extend(record_list)        
            
        record_list = []
            
            
    if len(single_list) > 0:
        Single = make_mapping(single_list, node_mappings, graph, dataset, card_name, card_order)
        print('    -', "Single", len(Single))
        
    if len(alternate_name_list) > 0:
        AlternateName = make_mapping(alternate_name_list, mappings, graph, dataset, card_name, card_order)
        print('    -', "AlternateName", len(AlternateName))

        #if len(digital_reference_list) > 0:
        #    DigitalReference = make_mapping(digital_reference_list, mappings, graph, dataset, card_name, card_order)
        #    print('    -', "DigitalReference", len(DigitalReference))


print('---------') 
print(datetime.datetime.now())
#%reset -f 

- brabantse_gebouwen
  - Place
    - Single 3173
---------
2024-10-04 01:36:39.125763
CPU times: user 1.95 s, sys: 44.9 ms, total: 2 s
Wall time: 2 s


# Merge Cards

In [7]:
Single_df = pd.DataFrame(Single)
#AlternateName_df = pd.DataFrame(AlternateName)

geonames_df = pd.concat([
    Single_df
    #AlternateName_df
])

sorted_geonames_df = geonames_df.sort_values(by=['ResourceID', 'card_order'])
sorted_geonames_df.to_csv('../../../brabantse_gebouwen/pkg/business_data/Place.csv', index=False)
sorted_geonames_df

Unnamed: 0,ResourceID,card,card_order,source_id,identifier_content,identifier_type,type,name_content,name_type,name_language,part_of_location,annotation_population,annotation_timezone,country,coordinates_geo_json,coordinates_wkt,annotation_elevation
3125,0012d2ca-4170-5d7e-9d0b-94c0d126da66,single,0,9954560,https://www.geonames.org/9954560,Geonames identifier,hotel,Amrath Pierre,Primary,Nederlands,"[{""resourceId"": ""0457e876-e117-5709-b183-81743...",0,Europe/Amsterdam,NL,POINT (5.4895 51.42599),(5.4895 51.42599),
1052,0022b087-31fb-5b8d-a9af-a1ab8c837300,single,0,2751102,https://www.geonames.org/2751102,Geonames identifier,locality,Mastpolder,Primary,Nederlands,"[{""resourceId"": ""7fedf03f-c477-5986-94d8-a3545...",0,Europe/Amsterdam,NL,POINT (4.53598 51.54757),(4.53598 51.54757),
577,002e6b1a-3874-5657-87f9-1bfe2373f3ab,single,0,2747773,https://www.geonames.org/2747773,Geonames identifier,populated place,Sambeek,Primary,Nederlands,"[{""resourceId"": ""8635ce4d-207b-5b8e-9b03-d5c52...",0,Europe/Amsterdam,NL,POINT (5.96667 51.63583),(5.96667 51.63583),
597,0031831b-b57c-5665-85ba-271822aca53d,single,0,2747859,https://www.geonames.org/2747859,Geonames identifier,polder,Rubeerepolder,Primary,Nederlands,"[{""resourceId"": ""f8356c5a-af05-59a6-b3b6-9ede0...",0,Europe/Amsterdam,NL,POINT (4.25664 51.5682),(4.25664 51.5682),
1539,003d4529-cd9f-5a45-a39b-b0715ce9ec6b,single,0,2753911,https://www.geonames.org/2753911,Geonames identifier,forest(s),Hollandsche Bosschen,Primary,Nederlands,"[{""resourceId"": ""f8356c5a-af05-59a6-b3b6-9ede0...",0,Europe/Amsterdam,NL,POINT (4.87654 51.4387),(4.87654 51.4387),
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1753,ff611490-9573-50b8-9420-46c04799a7fe,single,0,2754720,https://www.geonames.org/2754720,Geonames identifier,populated place,Hedel,Primary,Nederlands,"[{""resourceId"": ""90527867-9327-5413-acf3-cf2f1...",0,Europe/Amsterdam,NL,POINT (5.3 51.51917),(5.3 51.51917),
2880,ff891b77-83ca-59e5-9fc1-d3c138ce6b87,single,0,6930538,https://www.geonames.org/6930538,Geonames identifier,fort,Crevecoeur,Primary,Nederlands,"[{""resourceId"": ""e9bd29bd-5e5c-5c23-993a-b1568...",0,Europe/Amsterdam,NL,POINT (5.26704 51.73452),(5.26704 51.73452),
1471,ffb5ed91-ad55-5d8e-82c2-759085a8f612,single,0,2753553,https://www.geonames.org/2753553,Geonames identifier,populated place,Houterd,Primary,Nederlands,"[{""resourceId"": ""2091b323-7697-5ad5-abc5-0083b...",0,Europe/Amsterdam,NL,POINT (5.44028 51.6425),(5.44028 51.6425),
2715,ffbfd0a7-3413-5da5-ac44-b25af1f909d6,single,0,6533061,https://www.geonames.org/6533061,Geonames identifier,hotel,Golden Tulip Mastbosch Hotel Breda,Primary,Nederlands,"[{""resourceId"": ""471670cc-1ea7-5034-97f2-2f005...",0,Europe/Amsterdam,NL,POINT (4.7727 51.5649),(4.7727 51.5649),
