## Cards

In [1]:
import os
import json
import csv
import uuid
import pandas as pd
from edtf import text_to_edtf
import datetime
import shutil
import requests
from import_spec import * # package and number of records to import, see import_spec.py
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

lookup_path = '../../lookup/'
source_path = '../../source/tg/'
mappings_path = '../../mappings/'
lookup_e73_uiid_id_path = lookup_path + 'record_ids_uuids/e73/'
place_source_dir = '../../source/static/places/'
transactions_dir = '../../source/static/tg_history/'
save_path = '../../../digipolis-arches-shoku-pkg/source'

## Lookups

In [2]:
lookup_agents_df = pd.read_csv(lookup_path + 'aspace/as2arches_merged.csv')
lookup_relation_df = pd.read_csv(lookup_path + 'isaar_relatie_types.csv')
lookup_all_plus_one_df = pd.read_csv(lookup_path + 'aspace/as2arches_all_plus_one.csv')
with open(lookup_path + 'aspace/replace_source_au_codes.csv') as csvfile:
    replace_au_code_dict = dict(csv.reader(csvfile))


In [3]:
with open(mappings_path + 'mappings_e73.json') as f:
    multi_value_mappings = json.load(f)  

with open(mappings_path + 'resource_models_%s.json' % (resource_model) ) as f:
    resource_model_list = json.load(f)   

with open(mappings_path + 'static.json') as f:
    static_list = json.load(f)   

## Functions

In [4]:
def add_ark_identifier_by_row_value(package, uuid):
    return '%s/ark:/%s/%s' % (static_list[package]['ark_url'], static_list[package]['naan'], uuid)

In [5]:
def add_ead_identifier_by_row_value(package, uuid):
    return '%s%s' % (static_list[package]['ead_uri'], uuid)

In [6]:
def add_ark_identifier_df(source_df, uuid_column, package):
    for idx, row in source_df.iterrows():
        source_df.loc[idx, _args['column']] = '%s/ark:/%s/%s'  % (static_list[package]['ark_url'], static_list[package]['naan'], row[uuid_column])         
    return source_df

In [7]:
def lookup_column_df(source_df, column, args):
    lookup_df = pd.read_csv(lookup_path + args['lookup_file'])
    source_df[column] = source_df[args['match_source']].map(lookup_df.set_index(args['code'])[args['concept']])
    return source_df 

In [8]:
def lookup_column_value(code, lookup_file, card, brocade_id, source_field):
    lookup_df = pd.read_csv(lookup_path + lookup_file)
    try:
        concept_name = lookup_df[lookup_df['code'] == code]['concept'].iloc[0]
        return concept_name
    except:
        #print('%s,%s,%s,%s,%s' % (card, brocade_id, source_field, code, lookup_file))
        print(brocade_id, ',', code + ',NOT FOUND IN,' + lookup_file)
        return code + ' NOT FOUND IN: ' + lookup_file

In [9]:
def save_mapped_set(source_df, _package, _resource_model, _card):
    if _package in ['pkg_lhps', 'pkg_lhph', 'pkg_lhbr']:
        package = 'lh'
    else:
        package = _package.split('_')[1]     
    
    file_name = '%s/%s/%s/%s.csv' % (save_path, package, _resource_model, _card)
    source_df.to_csv(file_name, index=False)    
    del source_df
    return 'Saved OK'


In [10]:
def get_constant_type(mapping, source_key):
    # pull constants from a node mapping
    for node_mapping in mapping['node_mappings']:
        if node_mapping['from'] == source_key:
            return node_mapping['constants']

In [11]:
def make_default_dict(obj, card, source_value, source_field, model_class):

    default_dict = {}
    default_dict['ResourceID'] = obj['ResourceID']
    default_dict['brocade.id'] = obj['brocade.id']
    default_dict['card'] = card
    default_dict['order'] = obj['order']
    default_dict['source_field'] = source_field
    default_dict['source_code'] =  source_value
    
    return default_dict

In [12]:
def get_mark_type(mapping, source_key):

    for node_mapping in mapping['node_mappings']:
        if node_mapping['from'] == source_key:
            return node_mapping['constants']


## Keys

In [13]:
def get_group_keys(v):
    group_key_list = []
    for mapping in v['node_mappings']:
        group_key_list.append(mapping['from'].split('.')[0])
    group_key_list = list(dict.fromkeys(group_key_list) ) 
    return(group_key_list)

def get_field_keys(v):
    field_list = []
    try:
        for mapping in v['node_mappings']:
            field_list.append(mapping['from'].split('.')[1])
        return(field_list)
    except:
        print('Error in:', mapping)
        
def get_from_keys(v):
    map_list = []
    for mapping in v['node_mappings']:
        map_list.append(mapping['from'])
    return(map_list)

## Cards:
* Object Types 
* Genre Types 
* Languages 
* Merkteken Types 
* Writing Material
* ProductStadiums
* Document status
* Carrier


# E73 Titles

In [14]:
def get_title_type(dataset, mapping, source_key):

    for node_mapping in mapping['node_mappings']:
        if node_mapping['from'] == source_key:
            return node_mapping['constants']['Title Type']
lang_replace = {
    "hun": "Magyar (Hungarian) (language)",    
    "afr": "Afrikaans (language)",
    "dut": "Dutch (language)",
    "eng": "English (language)",
    "fre": "French (language)",
    "ger": "German (language)",
    "grc": "Ancient Greek (language)",
    "gri": "Greek (language)",
    "heb": "Hebrew (language)",
    "ita": "Italian (language)",
    "lat": "Latin (language)",
    "noo": "Norwegian (language)",
    "nor": "Norwegian (language)",
    "por": "Portuguese (language)",
    "spa": "Spanish (language)",
    "zwe": "Swedish (language)",
    "nds": "Low German (language)",
    "mul": "Dutch (language)",
    "und": "Dutch (language)" 
}                
def make_titles(source_list, resource_model, package, dataset, card):

    title_dict = {}
    title_list = []
    mappings = multi_value_mappings[card]
    
    for title in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in title.keys():
                if 'value' in title[node_mapping['from']][0].keys():
                    title_dict['ResourceID'] = title['ResourceID']
                    title_dict['brocade.id'] = title['brocade.id']
                    title_dict['card'] = card
                    title_dict['order'] = title['order']
                    title_dict['source_field'] = node_mapping['from']
                    _title = title[node_mapping['from']][0]['value'].replace('<i>', '').replace('</i>', '').replace('<I>', '').replace('</I>', '')#.replace('\"\"Inspanning\"\"', 'Inspanning')
                    title_dict['Title'] = _title.replace('""','"')
                    title_dict['Title Type'] = node_mapping['constants']['Title Type']
                    if 'language' in title[node_mapping['from']][0].keys():
                        title_dict['Title Language'] = title[node_mapping['from']][0]['language']
        
                    title_list.append(title_dict)
                    #print(title['brocade.id'], title['order'], _title)
                    title_dict = {}

    source_df = pd.DataFrame(title_list)
    source_df = source_df.replace({'Title Language': lang_replace})        
    
    
    # save
    if len(source_df) > 0:
        save_it = save_mapped_set(source_df, package, resource_model, card)
    #source_df.to_csv('%s_%s_Group_%s.csv' % (resource_model, card, package), index=False)

    #dim_dict = {}
    #dim_list = []
   
    return '%s saved: %s' % (card, len(source_df)) 

* Languages

In [15]:
def e73_make_with_function_and_include_codes(source_list, resource_model, package, dataset, card):
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    default_dict_items = {}
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                source_value = obj[node_mapping['from']][0]['value']
                if 'include_codes' in node_mapping.keys(): 
                    if source_value in node_mapping['include_codes']:
                        obj_dict.update(make_default_dict(obj, card, source_value, node_mapping['from'], 'e73'))
                        if 'function' in node_mapping.keys():
                            if node_mapping['function']['name'] == 'column_lookup': 
                                _type = lookup_column_value(obj[node_mapping['from']][0]['value'], 
                                            node_mapping['function']['args']['lookup_file'],
                                            card, obj['brocade.id'], node_mapping['from'])
                                obj_dict[node_mapping['to']] = _type                            
                        obj_list.append(obj_dict)
                        obj_dict = {}

    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
    save_it = save_mapped_set(source_df, package, resource_model, card)
    del source_df
    del source_list
    del obj
    return '%s saved: %s' % (card, number_of_records)

## Cards:
* Keywords
* CreationTimeSpans

In [16]:
def make_plain_mappings(source_list, resource_model, package, dataset, card):
    
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['brocade.id'] = obj['brocade.id']
                obj_dict['card'] = card
                obj_dict['order'] = obj['order']
                obj_dict['source_field'] = node_mapping['from']
                
                obj_dict[node_mapping['from']] = obj[node_mapping['from']][0]['value']
                if 'constants' in node_mapping.keys():
                    constants = get_constant_type(mappings, node_mapping['from'])
                    for _const, _value in constants.items():
                        obj_dict[_const] = _value                
                obj_list.append(obj_dict)
                obj_dict = {}
    source_df = pd.DataFrame(obj_list)
    
    for node_mappings in mappings['node_mappings']:
        obj_dict[node_mappings['from']] = node_mappings['to']        
    dataset_df = source_df.rename(columns=obj_dict)
    number_of_record = len(source_df)
    
    save_it = save_mapped_set(dataset_df, package, resource_model, card)
    del source_df
    del dataset_df
    return '%s saved: %s' % (card, number_of_record)

## External Relations Extra

In [17]:
def make_external_relations_extra(source_list, resource_model, package, dataset, card):
    
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    _type = ''
    
    #raw_df = pd.DataFrame(source_list)
    #raw_df.to_csv('out/' + resource_model + '_from_source_ext_ext.csv', index=False)
    
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['brocade.id'] = obj['brocade.id']
                obj_dict['card'] = card
                obj_dict['order'] = obj['order']
                obj_dict['source_field'] = node_mapping['from']
                
                if node_mapping['from'] == 'ere.urltype':    
                    _prefix_type = lookup_column_value(obj[node_mapping['from']][0]['value'], 
                                    'url_prefix_lookup.csv',
                                    card, obj['brocade.id'], node_mapping['from'])
                    obj_dict['External Relation URL Prefix'] = _prefix_type

                if node_mapping['from'] == 'doc.mpmtranscription':    
                    _prefix_type = lookup_column_value(obj[node_mapping['from']][0]['type'], 
                                    'url_prefix_lookup.csv',
                                    card, obj['brocade.id'], node_mapping['from'])
                    obj_dict['External Relation URL Prefix'] = _prefix_type

                if node_mapping['from'] == 'ow.link':    
                    obj_dict['External Relation URL Prefix'] = 'https://rkd.nl/nl/explore/images/'

                if 'function' in node_mapping.keys():
                    if node_mapping['function']['name'] == 'column_lookup':
                        source_value = obj[node_mapping['from']][0]['value']
                        if source_value.endswith(':1'):
                            lookup_value = source_value[:-2] 
                        else:
                            lookup_value = source_value
                        _type = lookup_column_value(lookup_value, 
                            node_mapping['function']['args']['lookup_file'],
                            card, obj['brocade.id'], node_mapping['from'])
                        obj_dict[node_mapping['to']] = _type
                else:
                    obj_dict[node_mapping['to']] = obj[node_mapping['from']][0]['value']
                if 'constants' in node_mapping.keys():
                    constants = get_constant_type(mappings, node_mapping['from'])
                    for _const, _value in constants.items():
                        obj_dict[_const] = _value 
                        
                if _type != 'DO_NOT_MIGRATE':        
                    obj_list.append(obj_dict)
                obj_dict = {}
    source_df = pd.DataFrame(obj_list)
    
    for node_mappings in mappings['node_mappings']:
        obj_dict[node_mappings['from']] = node_mappings['to']        
    dataset_df = source_df.rename(columns=obj_dict)
    number_in_source_list = len(source_list)
    number_in_dataset = len(dataset_df)

    if number_in_dataset > 0:
        save_it = save_mapped_set(dataset_df, package, resource_model, card)
    del source_df
    del dataset_df
    return f'{card}: found in source: {number_in_source_list}, saved: {number_in_dataset}' 

## Correspondance

In [18]:
def make_correspondence(source_list, resource_model, package, dataset, card):
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    
    out_df = pd.DataFrame()
    
    source_df = pd.DataFrame(source_list)
    for idx, row in source_df.iterrows():
        key_column = 'kenmerken.rubsender'
        if key_column in source_df.columns:
            if pd.notna(row[key_column]):
                if isinstance(row[key_column], list): 
                    out_dict = {'ResourceID': row['ResourceID'], 
                        'brocade.id': row['brocade.id'], 
                        'card': row['card'],
                        'order': row['order'],
                        'Creation Actor Note': row[key_column][0]['value'],
                        'Creation Actor Role': 'correspondents (correspondence writers)',                   
                       }
                    out_df = out_df.append(out_dict, ignore_index = True)
                            
        key_column = 'kenmerken.rubrecipient'
        if key_column in source_df.columns:
            if pd.notna(row[key_column]):
                if isinstance(row[key_column], list): 
                    source_df.loc[idx, 'rubrecipient'] = row[key_column][0]['value']     

                    out_dict = {'ResourceID': row['ResourceID'], 
                        'brocade.id': row['brocade.id'], 
                        'card': row['card'],
                        'order': row['order'],
                        'Creation Actor Note': row[key_column][0]['value'],
                        'Creation Actor Role': 'recipients (people)',                   
                       }

                out_df = out_df.append(out_dict, ignore_index = True)

    dataset_df = out_df
    
    save_it = save_mapped_set(dataset_df, package, resource_model, card)
    return '%s saved: %s' % (card, len(dataset_df))

## Descriptions

In [19]:
  
def make_descriptions(source_list, resource_model, package, dataset, card):

    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['brocade.id'] = obj['brocade.id']
                obj_dict['card'] = card
                obj_dict['order'] = obj['order']
                obj_dict['source_field'] = node_mapping['from']
                obj_dict[node_mapping['to']] = obj[node_mapping['from']][0]['value']
                if 'constants' in node_mapping.keys():
                    constants = get_constant_type(mappings, node_mapping['from'])
                    for _const, _value in constants.items():
                        obj_dict[_const] = _value                
                obj_list.append(obj_dict)
                obj_dict = {}
    source_df = pd.DataFrame(obj_list)
    #source_df.to_csv('out/%s_%s_Group_%s.csv' % (resource_model, card, package), index=False)
    
    for node_mappings in mappings['node_mappings']:
        obj_dict[node_mappings['from']] = node_mappings['to']        
    dataset_df = source_df.rename(columns=obj_dict)

    
    save_it = save_mapped_set(dataset_df, package, resource_model, card)

    return '%s saved: %s' % (card, len(dataset_df))

## Legacy Information

In [20]:
def make_legacy_info(source_list, resource_model, package, dataset, card):
    usr_dict = {}
    usr_list = []
    mapping = multi_value_mappings[card]
    
    for usr in source_list:
        for node_mapping in mapping['node_mappings']:
            if node_mapping['from'] in usr.keys():
                if isinstance(usr[node_mapping['from']], list):
                    usr_dict['ResourceID'] = usr['ResourceID']
                    usr_dict['card'] = card
                    usr_dict['order'] = usr['order']
                    usr_dict['source_field'] = node_mapping['from']
                    source_value = usr[node_mapping['from']][0]['value']
                    usr_dict[node_mapping['to']] = source_value    
                    if 'constants' in node_mapping.keys():
                        constants = get_constant_type(mapping, node_mapping['from'])
                        for _const, _value in constants.items():
                            usr_dict[_const] = _value
                    usr_list.append(usr_dict)
                    usr_dict = {}

    source_df = pd.DataFrame(usr_list)
    
    # save
    #source_df.to_csv('out/' + dataset + '_legacy_merged.csv', index=False)
    save_it = save_mapped_set(source_df, package, resource_model, card)
    return '%s saved: %s' % (card, len(source_df))

## Legacy Information Merged

In [21]:
def make_legacy_info_merged(source_list, resource_model, package, dataset, card):

    mapped_df = pd.DataFrame(source_list)
    mappings = multi_value_mappings[card]
    
    for column in mapped_df.columns:
        for idx, row in mapped_df.iterrows():
            if isinstance(row[column], list): 
                mapped_df.loc[idx, column] = row[column][0]['value']
    m_dict = {}
    for mapping in mappings['node_mappings']:
        m_dict[mapping['from']] = mapping['to']
    mapped_df = mapped_df.rename(columns=m_dict)    
    mapped_df['Legacy Information Type'] = 'groepsbeschrijving'
    mapped_df['Legacy Information Published'] = 'False'

    for idx, row in mapped_df.iterrows():
        if 'number' in mapped_df.columns:
            if pd.notna(row['number']):    
                mapped_df.loc[idx, 'Legacy Information Value'] = '%s %s' % (row['number'],row['type'])
            else:
                mapped_df.loc[idx, 'number'] = '1'
                mapped_df.loc[idx, 'Legacy Information Value'] = '%s %s' % ('1',row['type'])
    
    mapped_df = mapped_df[['ResourceID', 'brocade.id', 'card', 'order', 'number', 
                               'type', 'Legacy Information Value', 'Legacy Information Type', 'Legacy Information Published']]  

    mapped_df = mapped_df.loc[(mapped_df['type'] == 'brgr') | (mapped_df['number'] != "1")]
    
    #mapped_df.to_csv('out/' + dataset + '_legacy_merged.csv', index=False)
    save_it = save_mapped_set(mapped_df, package, resource_model, card)

    number_of_records = len(mapped_df)
    del mapped_df    

    return '%s saved: %s' % (card, number_of_records)

## Associated Archive

In [22]:
lookup_aspace_archives_df = pd.read_csv(lookup_path + 'aspace/as2arches_archiveobject.csv')
lookup_rub_isad_df = pd.read_csv(lookup_path + 'rub_isad_internal_relations.csv')

def make_associated_archive(source_list, resource_model, package, dataset, card):
    
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                if obj['order'] >= '2':
                    obj_dict['ResourceID'] = obj['ResourceID']
                    obj_dict['brocade.id'] = obj['brocade.id']
                    obj_dict['card'] = card
                    obj_dict['order'] = obj['order']
                    obj_dict['source_field'] = node_mapping['from']
                    obj_dict[node_mapping['from']] = obj[node_mapping['from']][0]['value']           
                    obj_list.append(obj_dict)
                    obj_dict = {}

    source_df = pd.DataFrame(obj_list)

    
    if (len(source_df) > 0):
        source_df['isaad'] = source_df[node_mapping['from']].map(lookup_rub_isad_df.set_index('code')['isad'])  
        source_df['isaad'] = source_df['isaad'] + ':1' 
        source_df['Associated Archive'] = source_df['isaad'].map(lookup_aspace_archives_df.set_index('archesID')['json'])

    #source_df.to_csv('out/' + dataset + '_ass_arch.csv')

#    for node_mappings in mappings['node_mappings']:
#        obj_dict[node_mappings['from']] = node_mappings['to']        
#    dataset_df = source_df.rename(columns=obj_dict)
    dataset_df = source_df
    if len(dataset_df) > 0:
        save_it = save_mapped_set(dataset_df, package, resource_model, card)
    return '%s saved: %s' % (card, len(dataset_df))

## E73 Material records

In [23]:
def make_material_records(source_list, resource_model, package, dataset, card):
    usr_dict = {}
    usr_list = []
    mapping = multi_value_mappings[card]
    related_resource_dict = {}
    related_resource_list = []

    
    for usr in source_list:
        for node_mapping in mapping['node_mappings']:
            if node_mapping['from'] in usr.keys():
                n=1
                for mat_rec_uuid in usr[node_mapping['from']]:
                    usr_dict['ResourceID'] = usr['ResourceID']
                    usr_dict['brocade.id'] = usr['brocade.id']
                    usr_dict['card'] = card
                    usr_dict['order'] = n
                    usr_dict['source_field'] = node_mapping['from']                    
                    relation = '[{"resourceId": "%s", "ontologyProperty": "", "resourceXresourceId": "%s", "inverseOntologyProperty": ""}]' % (mat_rec_uuid, uuid.uuid4())
                    usr_dict[node_mapping['to']] = relation
                    n+=1
                    usr_list.append(usr_dict)
                    usr_dict = {}
                    
                    related_resource_dict['resourceinstanceidfrom'] = usr['ResourceID']
                    related_resource_dict['resourceinstanceidto'] = mat_rec_uuid
                    related_resource_dict['relationshiptype'] = 'is related to'
                    related_resource_dict['datestarted'] = ''
                    related_resource_dict['dateended'] = ''
                    related_resource_dict['notes'] = ''
                
                    related_resource_list.append(related_resource_dict)
                    related_resource_dict = {}
                    
                    

    source_df = pd.DataFrame(usr_list)
    related_resource_df = pd.DataFrame(related_resource_list)

    
    # save
    save_it = save_mapped_set(source_df, package, resource_model, card)
    save_it = save_mapped_set(related_resource_df, package, resource_model, 'RelatedResources')
    
    return '%s saved: %s' % (card, len(source_df))

## IRE

In [24]:
deleted_extensions = {
   "AM": True, "AMV": True, "BB": True, "BBV": True, "BH": True, "BHV": True, "BI": True, "BIV": True, "CO": True,
    "COV": True, "EM": True, "EMV": True, "FB": True, "FBV": True, "GB": True, "GBV": True, "GEOB": True, "GEON": True,
    "GR": True, "GRV": True, "IS": True, "ISV": True, "KV": True, "LE": True, "LEV": True, "LLVL": True, "LLVO": True,
    "LV": True, "LVLL": True, "MI": True, "MIV": True, "ORG": True, "OVK": True, "OVL": True, "OVLL": True, "OVZ": True, "PM": True,
    "PMV": True, "PR": True, "PRV": True, "PS": True, "PSV": True, "RE": True, "REV": True, "ROG": True, "SC": True, "SCV": True, 
    "UIE": True, "UIR": True, "ZO": True,    
}

def split_extension(id):
    parts = id.split(":")
    ext = parts[-1]
    return ext.split(".")[0]

def clean_up_rel(id):
    if ":N:" in id:
        id = id.replace(":N:", "::")
    if id.endswith(':N'):
        id = id[:-2]
    if ":1.1" in id:
        id = id.replace(":1.1", ":1")
    
    ext = split_extension(id)
    if ext in deleted_extensions:
        parts = id.split("::")
        id = "::".join(parts[0:2])
        
    return id

clean_up_rel('au::28114:2')

'au::28114:2'

In [25]:
def fix_codes(au_code):
    
    f_dict = {}
    au_split = au_code.split(':')
    g_code = au_code

    if len(au_split) == 3:
        new_au_code = au_code + ':1'
        new_g_code = g_code + 'g:1'
    
    elif len(au_split) == 4:
        new_au_code = au_code
        new_g_code = '%s::%sg:%s' % (au_split[0], au_split[2], au_split[3])
    
    elif len(au_split) == 5:
        new_au_code = au_code
        new_g_code = '%s::%sg:%s:%s' % (au_split[0], au_split[2], au_split[3], au_split[4])
    
    elif len(au_split) == 6:
        if ':::' in au_code:
            new_au_code = au_code.replace(':::', ':1::')
            new_g_code = g_code.replace(':::', 'g:1::')
        elif ':1:N:' in au_code:   
            new_au_code = au_code
            new_g_code = g_code.replace(':1:N:', 'g:1:N:')
        elif ':2:N:' in au_code: 
            new_au_code = au_code
            new_g_code = g_code.replace(':2:N:', 'g:2:N:')
        elif ':2::' in au_code: 
            new_au_code = au_code
            new_g_code = g_code.replace(':2::', 'g:2::')

        elif ':1::' in au_code:
            new_g_code = g_code.replace(':1::', 'g:1::')
            new_au_code = au_code
            
    elif au_code.endswith(':1'):
        new_g_code = g_code[:-2] + 'g:1'
        new_au_code = au_code
    else:
        new_g_code = g_code
        new_au_code = au_code
    
    try:
        f_dict['au_code'] = clean_up_rel(new_au_code)
        f_dict['g_code'] = clean_up_rel(new_g_code)
        
    except:
        f_dict['au_code'] = 'nope'
        f_dict['g_code'] = 'nope'
        print('no fixed au or g code')
    
    return f_dict

bulle = fix_codes('au::28114:2')
print(bulle)
print(len('au::28114:2'.split(':')))


{'au_code': 'au::28114:2', 'g_code': 'au::28114g:2'}
4


In [26]:
def match_au_codes(dataset_df, package, resource_model, card, mappings):
    node_mapping_dict = {}  
    
    ## 
    ## all_au_df = all records that matches on au_code
    ## g_df = all records that matches on the g_code
    ## au_df = all_au_df - g_df
    ##
    ## if card == "e73_CreationActors":
    ##    out_df = g_df + au_df
    ## else:
    ##   out_df = all_au   
        
    # g codes
    g_df = dataset_df.copy()
    g_df['ire.widget'] = g_df['g_code'].map(lookup_agents_df.set_index('archesID')['json'])
    g_df['ire.identifier'] = g_df['g_code'].map(lookup_agents_df.set_index('archesID')['ark'])
    g_df = g_df[g_df['ire.widget'].notnull()]
    g_df['matched on'] = 'g_code'    
    print()
    print('g_df =', len(g_df))
    
    # au codes
    all_au_df = dataset_df.copy()
    all_au_df['matched on'] = 'au_code'
    all_au_df['ire.widget'] = all_au_df['au_code'].map(lookup_agents_df.set_index('archesID')['json'])
    all_au_df['ire.identifier'] = all_au_df['au_code'].map(lookup_agents_df.set_index('archesID')['ark'])
    print('all_au_df =', len(all_au_df))

    ## au codes minus g codes
    #resource_ids_in_g = g_df['ResourceID']    

    ## Drop rows from all_au_df where ResourceID is in g_df
    #au_df = all_au_df[~all_au_df['ResourceID'].isin(resource_ids_in_g)]
    #print('au_df (all_au_df - g_df) =', len(au_df))
    
    xmerged_df = pd.merge(all_au_df, g_df[['ResourceID', 'au_code']], on=['ResourceID', 'au_code'], how='left', indicator='is_in_g_df')

    # Filter out the rows that are also present in g_df
    au_df = xmerged_df[xmerged_df['is_in_g_df'] == 'left_only']

    # Drop the indicator column as it's no longer needed
    #au_df.drop(columns=['is_in_g_df'], inplace=True)
    au_df = au_df.drop(columns=['is_in_g_df'])
    print('au_df (all_au_df - g_df) =', len(au_df))

    if card == "e73_CreationActors" and len(g_df) > 0:
            print(card, 'has Gs, g_df + au_df saved')
            merged_df = pd.concat([g_df, au_df])
            #merged_df.to_csv(f'out/{resource_model}_{card}_au_and_g.csv', index=False)
    else:
        print(card, 'has NO Gs, all_au_df saved')
        merged_df = all_au_df
        #merged_df.to_csv(f'out/{resource_model}_{card}_au_all.csv', index=False)
        

    merged_df['ire.role'] = merged_df['ire.role'].map(lookup_relation_df.set_index('code')['concept'])        

    un_matched_df = merged_df[merged_df['ire.widget'].isna()]
        
    # map colomn names
    for node_mappings in mappings['node_mappings']:
        node_mapping_dict[node_mappings['from']] = node_mappings['to']        
    merged_df = merged_df.rename(columns=node_mapping_dict)
        
    #matched_df.to_csv(f'out/{package}_{resource_model}_{card}_matched.csv', index=False)
    if len(un_matched_df) > 0:
        un_matched_df.to_csv(f'out/unmatched_{package}_{resource_model}_{card}.csv', index=False)
    return merged_df 

In [27]:
def check_missing_matches(_dataset):
    
    for idx, row in _dataset.iterrows():
        if pd.isna(_dataset.loc[idx,'ire.isaar.id']) is False:
            mumps_response = requests.get('https://hub3.lh.delving.io/api/sync/resolve/%s?format=mumps' % (row['ire.isaar.id']))
            _dataset.loc[idx, 'MUMPS'] = str(mumps_response.status_code)
            mumps_response = requests.get('https://hub3.lh.delving.io/api/sync/resolve/%s?format=mumps' % (row['ire.isaar.id'][:-2]))
            _dataset.loc[idx, 'MUMPS_NO_:1'] = str(mumps_response.status_code)
    
    _dataset['agent_type_in_all'] = _dataset['ire.isaar.id'].map(lookup_all_plus_one_df.set_index('brocadeID')['type'])
    _dataset['place_type_in_all'] = _dataset['place_au_code'].map(lookup_all_plus_one_df.set_index('brocadeID')['type'])

    return _dataset


In [28]:
def make_ire_record(ire):
    
    fixed_codes_dict = {}
    m_dict = {}
    
    m_dict['ResourceID'] = ire['ResourceID']
    m_dict['brocade.id'] = ire['brocade.id']
    m_dict['museum'] = package.split('_')[1]            
    m_dict['dataset'] = ire['dataset']
    m_dict['resource_model'] = resource_model            
    m_dict['card'] = card
    m_dict['ire.isaar.uuid'] = ire['ire.isaar'][0]['uuid']                       
    m_dict['source_ire_type'] = ire['ire.type'][0]['value']
    m_dict['ire.role'] = ire['ire.type'][0]['value']        
    source_code = ire['ire.isaar'][0]['value']            
    m_dict['source_code'] = source_code            
            
    # Normalize the source code for +g, +:1 etc
    #replace_source_au_codes.csv
    
    if source_code in replace_au_code_dict.keys():
        use_code = replace_au_code_dict[source_code]
        #print(source_code, use_code)
    else:
        use_code = source_code
    
    fixed_codes_dict['fixed_codes'] = fix_codes(use_code)
        
    m_dict['au_code'] = fixed_codes_dict['fixed_codes']['au_code']
    m_dict['g_code'] = fixed_codes_dict['fixed_codes']['g_code']
            
    if 'ire.markdown' in ire.keys():
        m_dict['ire.markdown'] = ire['ire.markdown'][0]['value']
    else:
        m_dict['ire.markdown'] = None

    return m_dict

## Plain IRE

In [29]:
def make_plain_ire(source_list, resource_model, package, dataset, card, model_class):

    node_mapping_dict = {}
    isaar_list = []
    card = card
    mappings = multi_value_mappings[card]
    
    for ire in source_list:
        if ire['ire.type'][0]['value'] in mappings['include_codes']: 
            isaar_list.append(make_ire_record(ire))        

    dataset_df = pd.DataFrame(isaar_list)

    if len(dataset_df) > 0:
        matched_df = match_au_codes(dataset_df, package, resource_model, card, mappings)
        matched_df.drop(matched_df.loc[matched_df['source_code']=='au::113865'].index, inplace=True)
        save_it = save_mapped_set(matched_df, package, resource_model, card)

        #return '%s saved: %s' % (card, len(dataset_df))   
        return '%s saved: %s' % (card, len(matched_df))   

## Make exception IREs

In [30]:
def make_exception_ire(source_list, resource_model, package, dataset, card, model_class):

    node_mapping_dict = {}
    isaar_list = []
    card = card
    mappings = multi_value_mappings[card]
    
    for ire in source_list:
        if ire['ire.type'][0]['value'] in mappings['include_codes'].keys():
            au_type = ire['ire.type'][0]['value']
            if ire['dataset'] in mappings['include_codes'][au_type]:
                isaar_list.append(make_ire_record(ire))        

    dataset_df = pd.DataFrame(isaar_list)
    
    if len(dataset_df) > 0:
        matched_df = match_au_codes(dataset_df, package, resource_model, card, mappings)
        matched_df.drop(matched_df.loc[matched_df['source_code']=='au::113865'].index, inplace=True)
        save_it = save_mapped_set(matched_df, package, resource_model, card)

        fixed_codes_dict = {}
        #return '%s saved: %s' % (card, len(dataset_df))
        return '%s saved: %s' % (card, len(matched_df))   

## Annotations

In [31]:
def check_constant(card, _key, _constant):
    for item in multi_value_mappings[card]['node_mappings']:
        if item['from'] == _key:
            return item['constants'][0][_constant]

def make_annotations(source_list, resource_model, package, dataset, card, model_class):

    anno_dict = {}
    anno_list = []
    mappings = multi_value_mappings[card]

    for anno in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in anno.keys():
                source_value = anno[node_mapping['from']][0]['value']
                if 'include_codes' in node_mapping.keys(): 
                    if source_value in node_mapping['include_codes']:
                        anno_dict.update(make_default_dict(anno, card, source_value, node_mapping['from'], model_class))
                        if anno[node_mapping['from']][0]['value'] == 'acai':
                            anno_dict['Annotation'] = 'Alfabetische index'
                        else:
                            anno_dict['Annotation'] = anno[node_mapping['from']][0]['value']
                        if 'constants' in node_mapping.keys():
                            constants = get_mark_type(mappings, node_mapping['from'])
                            for _type, _value in constants.items():
                                anno_dict[_type] = _value
                else: # no include codes
                    anno_dict.update(make_default_dict(anno, card, source_value, node_mapping['from'], model_class))
                    anno_dict['Annotation'] = anno[node_mapping['from']][0]['value']
                    if 'constants' in node_mapping.keys():
                        constants = get_mark_type(mappings, node_mapping['from'])
                        for _type, _value in constants.items():
                            anno_dict[_type] = _value
                    
                                
        
        anno_list.append(anno_dict)
        anno_dict = {}

    source_df = pd.DataFrame(anno_list)
    #source_df.sort_values(by=['ResourceID', 'card', 'order'])
    
    # save
    save_it = save_mapped_set(source_df, package, resource_model, card)
    return '%s saved: %s' % (card, len(source_df))


## External Relations

In [32]:
def make_external_relations(source_list, resource_model, package, dataset, card):
    mappings = multi_value_mappings[card]
    include_codes_list = mappings['include_codes']
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    default_dict_items = {}

    #raw_df = pd.DataFrame(source_list)    
    #raw_df.to_csv('out/' + resource_model + '_externals_raw.csv')
    
    for obj in source_list:
        include_value = obj['ere.type'][0]['value']
        if include_value in include_codes_list:
        
            #print(include_value)
            obj_dict['ResourceID'] = obj['ResourceID']
            obj_dict['brocade.id'] = obj['brocade.id']
            obj_dict['card'] = obj['card']
            obj_dict['order'] = obj['order']
            for node_mapping in mappings['node_mappings']:
                if node_mapping['from'] in obj.keys():
                    if node_mapping['from'] == 'ere.url' or node_mapping['from'] == 'ere.markdown':
                        if obj[node_mapping['from']][0]['value'].startswith("http"):
                            url = obj[node_mapping['from']][0]['value']
                            url_splitted = obj[node_mapping['from']][0]['value'].split('/') 
                            obj_dict['External Relation URL Prefix'] = url.replace(url_splitted[-1], '')
                            obj_dict[node_mapping['to']] = url_splitted[-1]
                            #print(obj['brocade.id'], obj[node_mapping['from']][0]['value'], url_splitted[-1], url.replace(url_splitted[-1], ''))
                            

                        else:    
                            obj_dict[node_mapping['to']] = obj[node_mapping['from']][0]['value']                        
                        
                    if node_mapping['from'] == 'ere.urltype':                        
                        exclude_list = ['url', 'purl']
                        
                        if obj[node_mapping['from']][0]['value'] not in exclude_list:
                            _prefix_type = lookup_column_value(obj[node_mapping['from']][0]['value'], 
                                    'url_prefix_lookup.csv',
                                    card, obj['brocade.id'], node_mapping['from'])
                            obj_dict['External Relation URL Prefix'] = _prefix_type

                    
                    if 'function' in node_mapping.keys():
                        if node_mapping['function']['name'] == 'column_lookup': 
                            _type = lookup_column_value(obj[node_mapping['from']][0]['value'], 
                                        node_mapping['function']['args']['lookup_file'],
                                        card, obj['brocade.id'], node_mapping['from'])
                            obj_dict[node_mapping['to']] = _type

                    if 'constants' in node_mapping.keys():
                        constants = get_mark_type(mappings, node_mapping['from'])
                        for _const, _value in constants.items():
                            obj_dict[_const] = _value        
                    
            obj_list.append(obj_dict)
            obj_dict = {}
            #print(obj_list)
                    
    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
    save_it = save_mapped_set(source_df, package, resource_model, card)
    del source_df
    del source_list
    del obj
    return '%s saved: %s' % (card, number_of_records)

## Single

In [33]:
def make_single(source_list, resource_model, package, dataset, card):
    
#    single_df = pd.DataFrame(source_list)
#    single_df.to_csv('out/' + package + 'single.csv', index=False)

    mappings = multi_value_mappings[card]
    obj_dict = {}
    obj_list = []
    
    for obj in source_list:
        obj_dict['ResourceID'] = obj['ResourceID']
        obj_dict['brocade.id'] = obj['brocade.id'][0]['value']    
        obj_dict['card'] = 'AA'
        obj_dict['order'] = '0'
        if 'mpm' in obj['brocade.type'][0]['value']:
            obj_dict['Keeper'] = 'Museum Plantin-Moretus'
        elif 'lh' in obj['brocade.type'][0]['value']:
            obj_dict['Keeper'] = 'Letterenhuis'
        elif 'rub' in obj['brocade.type'][0]['value']:
            obj_dict['Keeper'] = 'Rubenianum'

        obj_dict['Object Number'] = obj['brocade.id'][0]['value']    
        obj_dict['Object Number Type'] = 'record identifiers'

        obj_dict['Object Identifier'] = add_ark_identifier_by_row_value(package, obj['ResourceID'])
        obj_dict['Object Identifier Type'] = 'object identifier'

        
        
        record_type = ''
        if package == 'pkg_rub' and obj['brocade.subtype'][0]['value'] == 'gr':
            
            if resource_model == 'Foto':
                record_type = "Foto's"
            elif resource_model == 'Brief':
                record_type = 'Brieven'
            elif resource_model == 'Iconografie':
                record_type = 'Prenten'
            elif resource_model == 'Tekstdrager':
                record_type = 'Handschriften'                    
            
            #print('if gr', resource_model, package, dataset, obj['brocade.subtype'][0]['value'], record_type)            
        else:
            record_type = lookup_column_value(obj['brocade.subtype'][0]['value'], 
                                            'record_types.csv',
                                            card, obj['brocade.id'], '')
            #print('not gr', resource_model, package, dataset, obj['brocade.subtype'][0]['value'], record_type)            
            
        obj_dict['Record Type'] = record_type 
        obj_list.append(obj_dict)
        obj_dict = {}
    source_df = pd.DataFrame(obj_list)
#    source_df.to_csv('out/' + package + 'single.csv', index=False)

    
    save_it = save_mapped_set(source_df, package, resource_model, card)
    return '%s saved: %s' % (card, len(source_df))



## Transaction History

In [34]:
source_dir = '../../source/static/tg_history/'
def make_transaction_history(resource_model, package, dataset):

    read_name = source_dir  + dataset + '_transaction_history.csv'
    save_name = '%s%s/%s/%s.csv' % (save_path, package, resource_model, 'e73_Transaction_history')
    try:
        shutil.copy2(read_name, save_name) 
        _return_msg = 'e73_Transaction_history.csv saved'
    except:
        _return_msg = ''
    
    return _return_msg


## Dams Links

In [35]:
dams_dir = '../../source/static/dams/'
def make_dams(resource_model, package, dataset):
    read_name = dams_dir + 'e22_DamsLinks_' + package.split('_')[1] + '_' + resource_model + '.csv'
    
    if package in ['pkg_rub', 'pkg_mpm']:
        save_name = '%s/%s/%s/%s.csv' % (save_path, package.split('_')[1], resource_model, 'e22_DamsLinks')
    else:
        save_name = '%s/%s/%s/%s.csv' % (save_path, 'lh', resource_model, 'e22_DamsLinks')
    print(read_name)
    print(save_name)
    try:
        shutil.copy2(read_name, save_name) 
        _return_msg = 'e22_DamsLinks saved'
    except:
        _return_msg = 'NO DAMS FILE FOUND'
    
    return _return_msg

## Images

In [36]:
def make_legacy_images(source_list, resource_model, package, dataset, card):

    images_dir = '../../source/static/images/'
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    images_in_resource_df = pd.DataFrame()
    
    for _dataset in resource_model_list[resource_model][package]:
        try:
            images_from_file_df = pd.read_csv(images_dir + _dataset + '_images.csv')
            images_in_resource_df = images_in_resource_df.append(images_from_file_df, ignore_index=True)

        except:
            print('no such image file')
        
    source_df = pd.DataFrame(source_list)
    resource_id_df = source_df['ResourceID']
    result_df = pd.merge(images_in_resource_df, resource_id_df, on='ResourceID', how='inner')
    result_df[' card'] = card
    #result_df.to_csv('out/' + resource_model + '_result.csv', index=False)    

    
    if len(result_df) > 0:
        save_it = save_mapped_set(result_df, package, resource_model, card)
        number_of_records = len(result_df)
        del source_df
        del result_df
        del images_from_file_df
        del images_in_resource_df
        return '%s saved: %s' % (card, number_of_records)

## Associated Records

In [37]:
def lookup_externals_df(source_df, column, args):
    lookup_external_df = pd.read_csv(lookup_path + args['lookup_file'])
    source_df[column] = source_df[column].map(lookup_external_df.set_index('code')['concept'])
    return source_df 

def make_associated_records(source_list, resource_model, package, dataset, card):

    source_df = pd.DataFrame(source_list)
    mappings = multi_value_mappings[card]
    
    # make dataset
    m_dict = {}
    for column in source_df.columns:
        for idx, row in source_df.iterrows():
            if isinstance(row[column], list): 
                source_df.loc[idx, column] = row[column][0]['value']

    source_df = source_df[source_df['ere.type'].isin(mappings['include_codes'])]    
    # functions
    for mapping in mappings['node_mappings']:
        if 'function' in mapping.keys():            
            source_df = lookup_externals_df(source_df, mapping['from'], mapping['function']['args'])
        
    # get ResourceID for the related object
    
    if package == "pkg_rub":
        _dataset = 'rubgr'
    else:
        _dataset = dataset
   
    lookup_relations_df = pd.read_csv(lookup_e73_uiid_id_path + 'e73_' + _dataset + '_id_uuid_lookup.csv')
    source_df['related_uuid'] = source_df['ere.url'].map(lookup_relations_df.set_index('brocade.id')['ResourceID'])
    
    for idx, row in source_df.iterrows():
        source_df.loc[idx, 'import_relation'] = '[{"resourceId": "%s", "ontologyProperty": "", "resourceXresourceId": "%s", "inverseOntologyProperty": ""}]' % (row['related_uuid'], uuid.uuid4())


    # do the mapping
    for mapping in mappings['node_mappings']:
        m_dict[mapping['from']] = mapping['to']
    mapped_df = source_df.rename(columns=m_dict)    
    #hack...
    mapped_df = mapped_df.rename(columns={"import_relation": "Associated Record"})    
    
    # save
    if len(mapped_df) > 0:
        save_it = save_mapped_set(mapped_df, package, resource_model, card)

    dim_dict = {}
    dim_list = []
    
    return '%s saved: %s' % (card, len(mapped_df))

## Places from Source

In [38]:
def make_places_from_source_mpm(source_list, resource_model, package, dataset, card):
    
    creation_source_places_list = 'pkg_mpm' #["mpmbr", "mpmhs", "mpmph", "mpmtk" ]
    documentation_source_places_list = 'pkg_rub' #["rubhs"]

    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []

    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['brocade.id'] = obj['brocade.id']
                obj_dict['card'] = card
                obj_dict['order'] = obj['order']
                obj_dict['source_field'] = node_mapping['from']
                if obj[node_mapping['from']][0]['value'].startswith("au::"):
                    obj_dict['place_au_code'] = obj[node_mapping['from']][0]['value']                    
                else:    
                    obj_dict['place_note'] = obj[node_mapping['from']][0]['value']                    
                    obj_dict['place_type'] = 'remarks'
                    obj_dict['place_language'] = 'Nederlands'
                
                obj_list.append(obj_dict)
                obj_dict = {}
                
    source_df = pd.DataFrame(obj_list)
#    source_df.to_csv('out/' + resource_model + '_resu.csv', index=False)
    
    if len(obj_list) > 0:
        lookup_place_json_df = pd.read_csv(lookup_path + 'aspace/as2arches_place.csv')
        source_df['json'] = source_df['place_au_code'].map(lookup_place_json_df.set_index('brocadeID')['json'])
        
    if package in creation_source_places_list:
        card = 'e73_CreationPlacesSource'
        source_df = source_df.rename(columns={"json": "Creation Place Widget",
                                             "place_note": "Creation Place Note",
                                             "place_type": "Creation Place Note Type",
                                             "place_language": "Creation Place Note Language"})

    if package in documentation_source_places_list:
        card = 'e73_DocumentedPlacesSource'
        source_df = source_df.rename(columns={"json": "Documented Place Name Widget", 
                                                 "order": "Documented Place Order",
                                                 "place_note": "Documented Place Note",
                                                 "place_type": "Documented Place Note Type",
                                                 "place_language": "Documented Place Note Language"})

    
    number_of_records = len(obj_list)
    if number_of_records > 0:
        save_it = save_mapped_set(source_df, package, resource_model, card)

        del source_df
        del obj_list
        return '%s saved: %s' % (card, number_of_records)

In [39]:
def make_places_from_source_rub(source_list, resource_model, package, dataset, card):
    
    creation_source_places_list = 'pkg_mpm' #["mpmbr", "mpmhs", "mpmph", "mpmtk" ]
    documentation_source_places_list = ['tg:rubhs:88', 'tg:rubhs:108', 'tg:rubhs:145', 'tg:rubhs:137', 'tg:rubhs:111', 
                                        'tg:rubhs:91', 'tg:rubhs:81', 'tg:rubhs:75', 'tg:rubhs:65', 'tg:rubhs:110', 
                                        'tg:rubhs:80', 'tg:rubhs:64', 'tg:rubhs:74', 'tg:rubhs:89', 'tg:rubhs:109', 
                                        'tg:rubhs:144', 'tg:rubhs:136', 'tg:rubhs:92', 'tg:rubhs:82', 'tg:rubhs:59', 
                                        'tg:rubhs:112', 'tg:rubhs:76', 'tg:rubhs:66', 'tg:rubhs:134', 'tg:rubhs:146', 
                                        'tg:rubhs:135', 'tg:rubhs:147', 'tg:rubhs:83', 'tg:rubhs:113', 'tg:rubhs:67', 
                                        'tg:rubhs:77', 'tg:rubhs:72', 'tg:rubhs:62', 'tg:rubhs:139', 'tg:rubhs:174', 
                                        'tg:rubhs:86', 'tg:rubhs:116', 'tg:rubhs:106', 'tg:rubhs:131', 'tg:rubhs:143', 
                                        'tg:rubhs:153', 'tg:rubhs:55', 'tg:rubhs:138', 'tg:rubhs:63', 'tg:rubhs:73', 
                                        'tg:rubhs:175', 'tg:rubhs:87', 'tg:rubhs:107', 'tg:rubhs:141', 'tg:rubhs:133', 
                                        'tg:rubhs:68', 'tg:rubhs:78', 'tg:rubhs:57', 'tg:rubhs:71', 'tg:rubhs:61', 
                                        'tg:rubhs:158', 'tg:rubhs:115', 'tg:rubhs:105', 'tg:rubhs:85', 'tg:rubhs:60', 
                                        'tg:rubhs:70', 'tg:rubhs:84', 'tg:rubhs:94', 'tg:rubhs:176', 'tg:rubhs:79', 
                                        'tg:rubhs:69', 'tg:rubhs:132', 'tg:rubhs:56']


    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    obj_documented_list = []
    obj_created_list = []

    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['brocade.id'] = obj['brocade.id']
                obj_dict['card'] = card
                obj_dict['order'] = obj['order']
                obj_dict['source_field'] = node_mapping['from']
                if obj[node_mapping['from']][0]['value'].startswith("au::"):
                    obj_dict['place_au_code'] = obj[node_mapping['from']][0]['value']                    
                else:    
                    obj_dict['place_note'] = obj[node_mapping['from']][0]['value']                    
                    obj_dict['place_type'] = 'remarks'
                    obj_dict['place_language'] = 'Nederlands'
                
                if obj['brocade.id'] in documentation_source_places_list:
                    obj_documented_list.append(obj_dict)
                    obj_dict = {}
                else:                        
                    obj_created_list.append(obj_dict)
                    obj_dict = {}
                
    source_documented_df = pd.DataFrame(obj_documented_list)
    source_created_df = pd.DataFrame(obj_created_list)
#    source_df.to_csv('out/' + resource_model + '_resu.csv', index=False)
    
    if len(obj_documented_list) > 0:
        lookup_place_json_df = pd.read_csv(lookup_path + 'aspace/as2arches_place.csv')
        source_documented_df['json'] = source_documented_df['place_au_code'].map(lookup_place_json_df.set_index('brocadeID')['json'])
        card_documented = 'e73_DocumentedPlacesSource'
        source_documented_df = source_documented_df.rename(columns={"json": "Documented Place Name Widget", 
                                                 "order": "Documented Place Order",
                                                 "place_note": "Documented Place Note",
                                                 "place_type": "Documented Place Note Type",
                                                 "place_language": "Documented Place Note Language"})

        number_of_doumented_records = len(obj_documented_list)
        if number_of_doumented_records > 0:
            save_it = save_mapped_set(source_documented_df, package, resource_model, card_documented)

    if len(obj_created_list) > 0:
        lookup_place_json_df = pd.read_csv(lookup_path + 'aspace/as2arches_place.csv')
        source_created_df['json'] = source_created_df['place_au_code'].map(lookup_place_json_df.set_index('brocadeID')['json'])
        card_created = 'e73_CreationPlacesSource'
        source_created_df = source_created_df.rename(columns={"json": "Creation Place Widget",
                                             "place_note": "Creation Place Note",
                                             "place_type": "Creation Place Note Type",
                                             "place_language": "Creation Place Note Language"})
        
        number_of_created_records = len(obj_created_list)
        if number_of_created_records > 0:
            save_it = save_mapped_set(source_created_df, package, resource_model, card_created)
            

        del source_documented_df
        del source_created_df
        obj_documented_list = []
        obj_created_list = []
        return '%s saved: %s, %s saved: %s' % (card_documented, number_of_doumented_records, card_created, number_of_created_records)
    
    

## Places from file

In [40]:
def make_places_from_file(source_list, single_list, resource_model, package, dataset, card):

    creation_file_places_list = ["lhob",  "lhpr", "lhsc", "lhtk", "lhhs", "lhph"]
    documentation_file_places_list = ["lhps"]
    
    images_dir = '../../source/static/images/'
    mappings = multi_value_mappings[card]
    #columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    
    
    single_df = pd.DataFrame(single_list)
    
    try:
        places_from_file_df = pd.read_csv(place_source_dir + dataset + '_places.csv')
    except:
        print('no such place file')
                
    if dataset in creation_file_places_list:
        card = 'e73_CreationPlacesSource' + '_' + dataset
        places_from_file_df['card'] = card
        places_from_file_df = places_from_file_df.rename(columns={"record_uuid": "ResourceID",
                                             "brocade.id": "brocade.id", "json": "Creation Place Widget",
                                             "note": "Creation Place Note",
                                             "note_type": "Creation Place Note Type",
                                             "note_language": "Creation Place Note Language"})

    if dataset in documentation_file_places_list:
        card = 'e73_DocumentedEventsPlaces' + '_' + dataset
        places_from_file_df['card'] = card
        
        places_from_file_df = places_from_file_df.rename(columns={"record_uuid": "ResourceID",
                                              "brocade.id": "brocade.id","json": "Documented Event Place Name Widget",                                                                   
                                              "order": "Documented Event Place Order",
                                              "note": "Documented Event Place Note",
                                              "note_type": "Documented Event Place Note Type",
                                              "note_language": "Documented Event Place Note Language"})

    if len(places_from_file_df) > 0:
        
        # Hack to weed out extra places from the manually generated place file
        #single_df = pd.DataFrame(single_list)
        #filtered_df = places_from_file_df[places_from_file_df['ResourceID'].isin(single_df['ResourceID'])]
        #save_it = save_mapped_set(filtered_df, package, resource_model, card)
        #number_of_records = len(filtered_df)
        #del places_from_file_df
        #del filtered_df
    
        save_it = save_mapped_set(places_from_file_df, package, resource_model, card)
        number_of_records = len(places_from_file_df)
        del places_from_file_df
        
        return '%s saved: %s' % (card, number_of_records)

## Change History

In [41]:
def make_change_history_relations(resource_model, package, card):

    change_history_df = pd.read_csv('%s%s_Change History Log.csv' % (transactions_dir, package.split('_')[1])) 
    resource_model_df = change_history_df[change_history_df['resource_model']==resource_model]
    relation_dict = {}
    relation_list = []
    related_resource_dict = {}
    related_resource_list = []
    

    for idx, row in resource_model_df.iterrows():
        relation_dict['ResourceID'] = row['source_uuid']
        relation_dict['card'] = 'e73_ChangeHistoryRelations'
        relation_dict['order'] = '1'
        relation = '[{"resourceId": "%s", "ontologyProperty": "", "resourceXresourceId": "%s", "inverseOntologyProperty": ""}]' % (row['ResourceID'], str(uuid.uuid4()))
        relation_dict['Transactions'] = relation

        relation_list.append(relation_dict)    
        relation_dict = {}
        
        related_resource_dict['resourceinstanceidfrom'] = row['source_uuid']
        related_resource_dict['resourceinstanceidto'] = row['ResourceID']
        related_resource_dict['relationshiptype'] = 'is related to'
        related_resource_dict['datestarted'] = ''
        related_resource_dict['dateended'] = ''
        related_resource_dict['notes'] = ''
                
        related_resource_list.append(related_resource_dict)
        related_resource_dict = {}        
    
    relation_df = pd.DataFrame(relation_list)
    relation_df = relation_df.sort_values(by=['ResourceID'])
    related_resource_df = pd.DataFrame(related_resource_list)
    
    save_it = save_mapped_set(relation_df, package, resource_model, card)
    save_it = save_mapped_set(related_resource_df, package, resource_model, 'RelatedChangeHistory')


    number_of_records = len(relation_df)
    del change_history_df
    del resource_model_df
    del relation_df
    del related_resource_list
    del related_resource_df
    return '%s saved: %s' % (card, number_of_records)    



## Generate dataset

In [42]:
%%time

file_places = ["lhob",  "lhpr", "lhsc", "lhtk", "lhhs", "lhph", "lhps"]
source_places = ["mpmbr", "mpmhs", "mpmph", "mpmtk", "rubhs"]



source_df = []
record_dict = {}
record_list = []
records_list = []

for resource_model, packages in resource_model_list.items():
    print(resource_model)    
    for package, datasets in packages.items():
        
        Languages_list = []
        GenreTypes_List = []
        Keywords_list = []
        Correspondence_list = []
        AssociatedArchives_list = []
        ExternalRelationsExtras_list = []
        Descriptions_list = []
        CreationTimeSpanSourceNotes_list = []
        Titles_list = [] 
        MaterialRecords_list = []
        LegacyInformation_list = []
        LegacyInformationMerged_list = []
        LegacyImages_list = []
            
        # make_ire

        CreationActors_list = []
        RecordActors_list = []
        
        # make_exceptions
        ConnectedSubjects_list = []
        DepictedItems_list = []
        OriginalBrocade_list = [] 
        
        #from RDM
        AnnotationsImmaterial_list = []
        ExternalRelations_list = []
        AssociatedRecords_list = []

        Single_list = []
        
        PlacesSource_list = []
        
        
        print('- ', package)
        for dataset in datasets:
            
            if dataset_size == 'all':
                source_file_name = source_path + dataset + '.json'
            else:
                source_file_name = source_path + 'slices/' + dataset + '_' + dataset_size +'.json'

            with open(source_file_name) as f:
                records = json.load(f)
                
            print('   - ', dataset)
            for record in records:
                if record['ID'].endswith('_#1'):                
                    for card, v in multi_value_mappings.items():
                        brocade_id = record['immaterialID']
                        brocade_uuid = record['groups']['brocade']['Entries']['1']['Fields']['uuid'][0]['value']
                        for group_key in get_group_keys(v): 
                            if (group_key in record['groups'].keys() or group_key == 'materialRecords'):

                                if group_key == 'materialRecords':
                                    record_dict['ResourceID'] = brocade_uuid
                                    record_dict['brocade.id'] = brocade_id
                                    record_dict['card'] = card
                                    record_dict['order'] = order
                                    record_dict['dataset'] = dataset
                                    group_values = record[group_key]
                                    record_dict[group_key] = record['materialRecords']
                                    record_list.append(record_dict)
                                    record_dict = {}   

                                else:
                                    group_values = record['groups'][group_key]
                                    for order, item in group_values['Entries'].items():
                                        if any(key in item['Fields'].keys() for key in get_field_keys(v)):
                                            record_dict['ResourceID'] = brocade_uuid
                                            record_dict['brocade.id'] = brocade_id
                                            record_dict['card'] = card
                                            record_dict['order'] = order
                                            record_dict['dataset'] = dataset
                                            for _key, _value in item['Fields'].items():
                                                record_dict['%s.%s' % (group_key, _key)] = _value
                                            record_list.append(record_dict)
                                            record_dict = {}   
                                
                                if card == 'e73_AnnotationsImmaterial':
                                    AnnotationsImmaterial_list.extend(record_list)
                                if card == 'e73_AssociatedArchives': #uuid_immat OK
                                    AssociatedArchives_list.extend(record_list)                                    
                                if card == 'e73_AssociatedRecords':
                                    AssociatedRecords_list.extend(record_list)
                                if card == 'e73_ConnectedSubjects':
                                    ConnectedSubjects_list.extend(record_list)                                    
                                if card == 'e73_Correspondence': #uuid_immat OK 
                                    Correspondence_list.extend(record_list)                                    
                                if card == 'e73_CreationActors':
                                    CreationActors_list.extend(record_list)                                    
                                if card == 'e73_CreationTimeSpanSourceNotes': #uuid_immat OK
                                    CreationTimeSpanSourceNotes_list.extend(record_list)                                    
                                if card == 'e73_DepictedItems':
                                    DepictedItems_list.extend(record_list)                                    
                                if card == 'e73_Descriptions': #uuid_immat OK
                                    Descriptions_list.extend(record_list)                                    
                                if card == 'e73_ExternalRelationsExtras': #uuid_immat OK
                                    ExternalRelationsExtras_list.extend(record_list)  
                                if card == 'e73_ExternalRelations':
                                    ExternalRelations_list.extend(record_list)
                                if card == 'e73_GenreTypes':
                                    GenreTypes_List.extend(record_list)                                    
                                if card == 'e73_Keywords': #uuid_immat OK
                                    Keywords_list.extend(record_list)                                    
                                if card == 'e73_Languages':
                                    Languages_list.extend(record_list)                                    
                                if card == 'e73_LegacyInformation':
                                    LegacyInformation_list.extend(record_list)                                    
                                if card == 'e73_LegacyInformationMerged':
                                    LegacyInformationMerged_list.extend(record_list)                                    
                                    

                                if card == 'e73_Titles': #uuid_immat OK
                                    Titles_list.extend(record_list)                                    
                                if card == 'e73_MaterialRecords': #uuid_immat OK
                                    MaterialRecords_list.extend(record_list)                                    
                                    
                                if card == 'e73_RecordActors':
                                    RecordActors_list.extend(record_list)                                    
                                if card == 'e73_OriginalBrocade':
                                    OriginalBrocade_list.extend(record_list)                                    
                                if card == 'e22_ProductionPlacesSource':
                                    ProductionPlacesSource_list.extend(record_list)                                    
                                    
                                    

                                if card == 'e73_Single':
                                    Single_list.extend(record_list)                                    
                                if card == 'Places':
                                    PlacesSource_list.extend(record_list)                                    
                                    
                                    
                                record_list = []                                


            if dataset in file_places:
                if len(PlacesSource_list) > 0:                
                    PlacesFromFiles = make_places_from_file(PlacesSource_list, Single_list, resource_model, package, dataset, 'Places')
                    print('      - ', PlacesFromFiles)
                   
        if package in ['pkg_mpm']:
            if len(PlacesSource_list) > 0:
                PlacesSource = make_places_from_source_mpm(PlacesSource_list, resource_model, package, dataset, 'Places')
                print('      - ', PlacesSource)                
        if package in ['pkg_rub']:
            if len(PlacesSource_list) > 0:
                PlacesSource = make_places_from_source_rub(PlacesSource_list, resource_model, package, dataset, 'Places')
                print('      - ', PlacesSource)                

                
        if len(AnnotationsImmaterial_list) > 0:
            AnnotationsImmaterial = make_annotations(AnnotationsImmaterial_list, resource_model, package, dataset, 'e73_AnnotationsImmaterial', 'e73')
            print('      - ', AnnotationsImmaterial)    
        if len(AssociatedArchives_list) > 0:
            AssociatedArchives = make_associated_archive(AssociatedArchives_list, resource_model, package, dataset, 'e73_AssociatedArchives')
            print('      - ', AssociatedArchives)    
        if len(AssociatedRecords_list) > 0:
            AssociatedRecords = make_associated_records(AssociatedRecords_list, resource_model, package, dataset, 'e73_AssociatedRecords')
            print('      - ', AssociatedRecords)    
        if len(Correspondence_list) > 0:
            Correspondence = make_correspondence(Correspondence_list, resource_model, package, dataset, 'e73_Correspondence')
        if len(CreationTimeSpanSourceNotes_list) > 0:
            CreationTimeSpanSourceNotes = make_plain_mappings(CreationTimeSpanSourceNotes_list, resource_model, package, dataset, 'e73_CreationTimeSpanSourceNotes')
            print('      - ', CreationTimeSpanSourceNotes)
        if len(Descriptions_list) > 0:
            Descriptions = make_descriptions(Descriptions_list, resource_model, package, dataset, 'e73_Descriptions')
            print('      - ', Descriptions)    
        if len(ExternalRelationsExtras_list) > 0:
            ExternalRelationsExtras = make_external_relations_extra(ExternalRelationsExtras_list, resource_model, package, dataset, 'e73_ExternalRelationsExtras')
            print('      - ', ExternalRelationsExtras)                            
        if len(ExternalRelations_list) > 0:
            ExternalRelations = make_external_relations(ExternalRelations_list, resource_model, package, dataset, 'e73_ExternalRelations')
            print('      - ', ExternalRelations)    
        if len(GenreTypes_List) > 0:
            GenreTypes = e73_make_with_function_and_include_codes(GenreTypes_List, resource_model, package, dataset, 'e73_GenreTypes')
            print('      - ', GenreTypes)    
        if len(Keywords_list) > 0:
            Keywords = make_plain_mappings(Keywords_list, resource_model, package, dataset, 'e73_Keywords')
            print('      - ', Keywords)    
        if len(Languages_list) > 0:
            Languages = e73_make_with_function_and_include_codes(Languages_list, resource_model, package, dataset, 'e73_Languages')
            print('      - ', Languages)    
        if len(LegacyInformation_list) > 0:
            LegacyInformation = make_legacy_info(LegacyInformation_list, resource_model, package, dataset, 'e73_LegacyInformation')
            LegacyInformation_list = []
            print('      - ', LegacyInformation)    

# Check if it can be made faster...
        if len(LegacyInformationMerged_list) > 0:
            LegacyInformationMerged = make_legacy_info_merged(LegacyInformationMerged_list, resource_model, package, dataset, 'e73_LegacyInformationMerged')
            LegacyInformationMerged_list = []
            print('      - ', LegacyInformationMerged)    

        if len(MaterialRecords_list) > 0:
            MaterialRecords = make_material_records(MaterialRecords_list, resource_model, package, dataset, 'e73_MaterialRecords')
            MaterialRecords_list = []
            print('      - ', MaterialRecords)    
        if len(Titles_list) > 0:
            Titles = make_titles(Titles_list, resource_model, package, dataset, 'e73_Titles')
            Titles_list = []
            print('      - ', Titles)                                  
        if len(Single_list) > 0:
            Single = make_single(Single_list, resource_model, package, dataset, 'e73_Single')
            print('      - ', Single)                                                          
        if len(Single_list) > 0:
            if package.startswith("pkg_lh"):                
                LegacyImages = make_legacy_images(Single_list, resource_model, package, dataset, 'e73_LegacyImages')
                Single_list = []
                print('      - ', LegacyImages)   
                   
        if len(ConnectedSubjects_list) > 0:
            ConnectedSubjects = make_exception_ire(ConnectedSubjects_list, resource_model, package, dataset, 'e73_ConnectedSubjects', 'e73')
            if not ConnectedSubjects is None:
                print('      - ', ConnectedSubjects)    
        if len(DepictedItems_list) > 0:
            DepictedItems = make_exception_ire(DepictedItems_list, resource_model, package, dataset, 'e73_DepictedItems', 'e73')
            if not DepictedItems is None:
                print('      - ', DepictedItems)    
        if len(OriginalBrocade_list) > 0:
            OriginalBrocade = make_exception_ire(OriginalBrocade_list, resource_model, package, dataset, 'e73_OriginalBrocade', 'e73')
            if not OriginalBrocade is None:
                print('      - ', OriginalBrocade)    
        if len(CreationActors_list) > 0:
            CreationActors = make_plain_ire(CreationActors_list, resource_model, package, dataset, 'e73_CreationActors', 'e73')
            if not CreationActors is None:
                print('      - ', CreationActors)    
        if len(RecordActors_list) > 0:
            RecordActors = make_plain_ire(RecordActors_list, resource_model, package, dataset, 'e73_RecordActors', 'e73')
            RecordActors_list = []
            if not RecordActors is None:
                print('      - ', RecordActors)    
    
    
    
    
    ChangeHistory = make_change_history_relations(resource_model, package, 'e73_ChangeHistoryRelations')
    print('      - ', ChangeHistory)   

    DamsLinks = make_dams(resource_model, package, dataset)
    print('      - ', DamsLinks)  

                   
print('---------') 
print(datetime.datetime.now())
%reset -f 

Foto
-  pkg_rub
   -  rubgr_foto
      -  e73_AnnotationsImmaterial saved: 1
      -  e73_AssociatedArchives saved: 25
      -  e73_AssociatedRecords saved: 188
      -  e73_CreationTimeSpanSourceNotes saved: 34
      -  e73_Descriptions saved: 440
      -  e73_ExternalRelationsExtras: found in source: 154, saved: 135
      -  e73_ExternalRelations saved: 41
      -  e73_Keywords saved: 449
      -  e73_LegacyInformation saved: 1760
      -  e73_MaterialRecords saved: 440
      -  e73_Titles saved: 440
      -  e73_Single saved: 440

g_df = 0
all_au_df = 331
au_df (all_au_df - g_df) = 331
e73_OriginalBrocade has NO Gs, all_au_df saved
      -  e73_OriginalBrocade saved: 331
      -  e73_ChangeHistoryRelations saved: 440
../../source/static/dams/e22_DamsLinks_rub_Foto.csv
../../../digipolis-arches-shoku-pkg/source/rub/Foto/e22_DamsLinks.csv
      -  NO DAMS FILE FOUND
Brief
-  pkg_rub
   -  rubgr_brief
      -  e73_AnnotationsImmaterial saved: 6
      -  e73_AssociatedArchives saved: 11