## Cards

In [1]:
import os
import json
import uuid
import pandas as pd
from edtf import text_to_edtf
import datetime
from import_spec import * # package and number of records to import, see import_spec.py


lookup_path = '../../lookup/'
source_path = '../../source/tg/'
mappings_path = '../../mappings/'
save_path = '../../../digipolis-arches-shoku-pkg/source/'

## Lookups

In [2]:
# Dataframes
lookup_acquisitions_df = pd.read_csv(lookup_path + 'aspace/as2arches_accessions.csv')
lookup_archives_df = pd.read_csv(lookup_path + 'aspace/as2arches_archiveobject.csv')
lookup_imaginary_df = pd.read_csv(lookup_path + 'aspace/as2arches_merged.csv')
lookup_relation_df = pd.read_csv(lookup_path + 'isaar_relatie_types.csv')

In [3]:
with open(mappings_path + 'mappings_e22.json') as f:
    multi_value_mappings = json.load(f)  

with open(mappings_path + 'resource_models_%s.json' % (resource_model) ) as f:
    resource_model_list = json.load(f)   

with open(mappings_path + 'static.json') as f:
    static_list = json.load(f)   

## Functions

In [4]:
def add_ark_identifier_by_row_value(package, uuid):
    return '%s/ark:/%s/%s' % (static_list[package]['ark_url'], static_list[package]['naan'], uuid)

In [5]:
def add_ead_identifier_by_row_value(package, uuid):
    return '%s%s' % (static_list[package]['ead_uri'], uuid)

In [6]:
def add_ark_identifier_df(source_df, uuid_column, package):
    for idx, row in source_df.iterrows():
        source_df.loc[idx, _args['column']] = '%s/ark:/%s/%s'  % (static_list[package]['ark_url'], static_list[package]['naan'], row[uuid_column])         
    return source_df

In [7]:
def lookup_column_df(source_df, column, args):
    lookup_df = pd.read_csv(lookup_path + args['lookup_file'])
    source_df[column] = source_df[args['match_source']].map(lookup_df.set_index(args['code'])[args['concept']])
    del lookup_df
    return source_df 

In [8]:
def lookup_column_value(code, lookup_file, card, brocade_id, source_field):
    lookup_df = pd.read_csv(lookup_path + lookup_file)
    try:
        concept_name = lookup_df[lookup_df['code'] == code]['concept'].iloc[0]
        del lookup_df
        return concept_name
    except:
        #print('%s,%s,%s,%s,%s' % (card, brocade_id, source_field, code, lookup_file))
        print(brocade_id, ',', code + ',NOT FOUND IN,' + lookup_file)
        return code + ' NOT FOUND IN: ' + lookup_file

In [9]:
def save_mapped_set(source_df, _package, _resource_model, _card):
    
    if _package in ['pkg_lhps', 'pkg_lhph', 'pkg_lhbr']:
        package = 'lh'
    else:
        package = _package.split('_')[1]     
    
    file_name = '%s/%s/%s/%s.csv' % (save_path, package, _resource_model, _card)
    source_df.to_csv(file_name, index=False)    
    del source_df
    return 'Saved OK'

In [10]:
def get_constant_type(mapping, source_key):
    # pull constants from a node mapping
    for node_mapping in mapping['node_mappings']:
        if node_mapping['from'] == source_key:
            return node_mapping['constants']

In [11]:
def make_default_dict(obj, card, source_value, source_field, model_class):

    default_dict = {}
    if model_class == 'e73':
        default_dict['ResourceID'] = obj['ResourceID_immat']
    elif model_class == 'e22':   
        default_dict['ResourceID'] = obj['ResourceID']
    default_dict['brocade.id'] = obj['brocade.id']
    default_dict['card'] = card
    default_dict['order'] = obj['order']
    default_dict['source_field'] = source_field
    default_dict['source_code'] =  source_value
    
    return default_dict

In [12]:
def get_mark_type(mapping, source_key):

    for node_mapping in mapping['node_mappings']:
        if node_mapping['from'] == source_key:
            return node_mapping['constants']

## Keys

In [13]:
def get_group_keys(v):
    group_key_list = []
    for mapping in v['node_mappings']:
        group_key_list.append(mapping['from'].split('.')[0])
    group_key_list = list(dict.fromkeys(group_key_list) ) 
    return(group_key_list)

def get_field_keys(v):
    field_list = []
    try:
        for mapping in v['node_mappings']:
            field_list.append(mapping['from'].split('.')[1])
        return(field_list)
    except:
        print('Error in:', mapping)
        
def get_from_keys(v):
    map_list = []
    for mapping in v['node_mappings']:
        map_list.append(mapping['from'])
    return(map_list)

## Cards:

* Genre Types 
* Languages 
* Merkteken Types 
* Writing Material
* ProductStadiums
* Document status


In [14]:
def e22_make_with_function_and_include_codes(source_list, resource_model, package, dataset, card):
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    default_dict_items = {}
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                source_value = obj[node_mapping['from']][0]['value']
                if 'include_codes' in node_mapping.keys(): 
                    if source_value in node_mapping['include_codes']:
                        obj_dict.update(make_default_dict(obj, card, source_value, node_mapping['from'], 'e22'))
                        if 'function' in node_mapping.keys():
                            if node_mapping['function']['name'] == 'column_lookup': 
                                _type = lookup_column_value(obj[node_mapping['from']][0]['value'], 
                                            node_mapping['function']['args']['lookup_file'],
                                            card, obj['brocade.id'], node_mapping['from'])
                                obj_dict[node_mapping['to']] = _type                            
                        obj_list.append(obj_dict)
                        obj_dict = {}

    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
    save_it = save_mapped_set(source_df, package, resource_model, card)
    del source_df
    del source_list
    del obj
    return '%s saved: %s' % (card, number_of_records)

## Carriers

In [15]:
def make_carriers(source_list, resource_model, package, dataset, card):
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    default_dict_items = {}
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                source_value = obj[node_mapping['from']][0]['value']
                if 'include_codes' in node_mapping.keys(): 
                    if source_value in node_mapping['include_codes']:
                        if 'function' in node_mapping.keys():
                            if node_mapping['function']['name'] == 'column_lookup': 
                                _type = lookup_column_value(obj[node_mapping['from']][0]['value'], 
                                            node_mapping['function']['args']['lookup_file'],
                                            card, obj['brocade.id'], node_mapping['from'])
                                obj_dict[node_mapping['to']] = _type
                        else:
                            obj_dict[node_mapping['to']] = source_value

                if node_mapping['to'] == 'Carrier Type': 
                    obj_dict['carrier_type_source_field'] = node_mapping['from']
                    
                if node_mapping['to'] == 'Carrier Colour': 
                    obj_dict['carrier_colour_source_field'] = node_mapping['from']                
                
        if len(obj_dict) > 0:
            obj_dict.update(make_default_dict(obj, card, '', '', 'e22'))
        
        obj_list.append(obj_dict)
        obj_dict = {}

    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
    if 'Carrier Type' in source_df.columns and 'Carrier Colour' in source_df.columns:
        source_df = source_df[['ResourceID', 'brocade.id', 'card', 'order', 'carrier_type_source_field', 'Carrier Type', 'carrier_colour_source_field', 'Carrier Colour']]
    elif 'Carrier Type' in source_df.columns:
        source_df = source_df[['ResourceID', 'brocade.id', 'card', 'order', 'carrier_type_source_field', 'Carrier Type']]
    else:
        source_df = source_df[['ResourceID', 'brocade.id', 'card', 'order', 'carrier_colour_source_field', 'Carrier Colour']]
        
    #source_df.to_csv('out/' + package.split('_')[1] + '_' + resource_model + '_carrier.csv', index=False)
    
    save_it = save_mapped_set(source_df, package, resource_model, card)
    del source_df
    del source_list
    del obj
    return '%s saved: %s' % (card, number_of_records)

## Marks

In [16]:
def get_mark_type(mapping, source_key):

    for node_mapping in mapping['node_mappings']:
        if node_mapping['from'] == source_key:
            return node_mapping['constants']
    
def make_marks(source_list, resource_model, package, dataset, card):

    dim_dict = {}
    dim_list = []
    card = card
    mapping = multi_value_mappings[card]
    
    for dim in source_list:
        for node_mapping in mapping['node_mappings']:
            if node_mapping['from'] in dim.keys():
                dim_dict['ResourceID'] = dim['ResourceID']
                dim_dict['brocade.id'] = dim['brocade.id']
                dim_dict['card'] = card
                dim_dict['order'] = dim['order']
                dim_dict[node_mapping['to']] = dim[node_mapping['from']][0]['value']
                constants = get_mark_type(mapping, node_mapping['from'])
                for _type, _value in constants.items():
                    dim_dict[_type] = _value
                dim_list.append(dim_dict)
                dim_dict = {}

    source_df = pd.DataFrame(dim_list)
    number_of_records = len(dim_list)    
    # save
    save_it = save_mapped_set(source_df, package, resource_model, card)
    #source_df.to_csv('%s_%s_Group_%s.csv' % (resource_model, card, package), index=False)

    dim_dict = {}
    dim_list = []
    del source_df
    del source_list

    return '%s saved: %s' % (card, number_of_records)

## Items

In [17]:
def make_items(source_list, resource_model, package, dataset, card):
    usr_dict = {}
    usr_list = []
    mappings = multi_value_mappings[card]

    source_df = pd.DataFrame(source_list)
    # make dataset
    m_dict = {}
    for column in source_df.columns:
        for idx, row in source_df.iterrows():
            if isinstance(row[column], list): 
                source_df.loc[idx, column] = row[column][0]['value']
            if column == 'adm.note':
                if pd.notna(row['adm.note']):
                    source_df.loc[idx, 'Source Note Type'] = 'remarks'
                    source_df.loc[idx, 'Source Note Language'] = 'Nederlands'

    if package in ['pkg_lh', 'pkg_lhps', 'pkg_lhbr', 'pkg_lhph']:
        source_df = source_df.drop(columns=['adm.note', 'Source Note Type', 'Source Note Language'])
    if package in ['pkg_rub']:
        source_df = source_df.drop(columns=['adm.volgnummer'])
    if package in ['pkg_mpm']:
        source_df = source_df.rename(columns={'adm.inv': 'Plaatsingsnummer'})

    
    # do the mapping
    for node_mappings in mappings['node_mappings']:
        usr_dict[node_mappings['from']] = node_mappings['to']        
    dataset_df = source_df.rename(columns=usr_dict)
    dataset_df['card'] = card
    #dataset_df.to_csv('out/' + resource_model + '_item.csv', index=False)

    # save
    save_it = save_mapped_set(dataset_df, package, resource_model, card)
    
    number_of_records = len(dataset_df)
    m_dict = {}
    usr_dict = []
    del source_df
    del dataset_df
    del source_list
    
    return '%s saved: %s' % (card, number_of_records)

## Associated Archive

In [18]:
lookup_aspace_archives_df = pd.read_csv(lookup_path + 'aspace/as2arches_archiveobject.csv')
lookup_rub_isad_df = pd.read_csv(lookup_path + 'rub_isad_internal_relations.csv')

def make_associated_archive(source_list, resource_model, package, dataset, card):
    
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                if obj['order'] == '2':
                    obj_dict['ResourceID'] = obj['ResourceID']
                    obj_dict['brocade.id'] = obj['brocade.id']
                    obj_dict['card'] = card
                    obj_dict['order'] = obj['order']
                    obj_dict[node_mapping['from']] = obj[node_mapping['from']][0]['value']                
                    obj_list.append(obj_dict)
                    obj_dict = {}

    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)    
    
    if (len(source_df) > 0):
        source_df['isaad'] = source_df[node_mapping['from']].map(lookup_rub_isad_df.set_index('code')['isad'])
        source_df['Associated Archive'] = source_df['isaad'].map(lookup_aspace_archives_df.set_index('brocadeID')['json'])

    #source_df.to_csv('out/' + dataset + '_ass_arch.csv')

#    for node_mappings in mappings['node_mappings']:
#        obj_dict[node_mappings['from']] = node_mappings['to']        
#    dataset_df = source_df.rename(columns=obj_dict)
    dataset_df = source_df
    if len(dataset_df) > 0:
        save_it = save_mapped_set(dataset_df, package, resource_model, card)
    
    del source_df
    del dataset_df
    del source_list
    del obj_list
    return '%s saved: %s' % (card, number_of_records)

# Formats

In [19]:
def get_format_type(mapping, source_key):

    for node_mapping in mapping['node_mappings']:
        if node_mapping['from'] == source_key:
            return node_mapping['constants']
        
def make_formats(source_list, resource_model, package, dataset, card):
    
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['brocade.id'] = obj['brocade.id']
                obj_dict['card'] = card
                obj_dict['order'] = obj['order']
                obj_dict['source_field'] = node_mapping['from']
                obj_dict[node_mapping['to']] = obj[node_mapping['from']][0]['value']
                if 'constants' in node_mapping.keys():
                    constants = get_format_type(mappings, node_mapping['from'])
                    for _const, _value in constants.items():
                        obj_dict[_const] = _value
                if 'function' in node_mapping.keys():
                    if node_mapping['function']['args']['from'] in obj.keys():
                        obj_dict[node_mapping['function']['args']['to']] = obj[node_mapping['function']['args']['from']][0]['value']


                obj_list.append(obj_dict)
                obj_dict = {}
    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)    
    
    save_it = save_mapped_set(source_df, package, resource_model, card)
    
    del source_df
    del source_list
    del obj_list
    return '%s saved: %s' % (card, number_of_records)

## Cards:
* Material 
* Colour
* Production Technique

In [20]:
def make_with_include_codes(source_list, resource_model, package, dataset, card, model_class):
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    default_dict_items = {}
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                source_value = obj[node_mapping['from']][0]['value']
                if 'include_codes' in node_mapping.keys(): 
                    if source_value in node_mapping['include_codes']:
                        obj_dict.update(make_default_dict(obj, card, source_value, node_mapping['from'], model_class))
                        if 'function' in node_mapping.keys():
                            if node_mapping['function']['name'] == 'column_lookup': 
                                _type = lookup_column_value(obj[node_mapping['from']][0]['value'], 
                                            node_mapping['function']['args']['lookup_file'],
                                            card, obj['brocade.id'], node_mapping['from'])
                                obj_dict[node_mapping['to']] = _type                            
                        obj_list.append(obj_dict)
                        obj_dict = {}

    source_df = pd.DataFrame(obj_list)
    
    if package == 'pkg_rub' and card == 'e22_ProductionTechniques' and resource_model == 'Foto':
        drop_codes_list = ["dtrvnt"]        
        source_df = source_df.loc[source_df['source_code'].isin(drop_codes_list) == False]
        drop_codes_list = []
    
    number_of_records = len(source_df)
    save_it = save_mapped_set(source_df, package, resource_model, card)

    del source_df
    del source_list
    del obj_list
    return '%s saved: %s' % (card, number_of_records)

## Archives

In [21]:
def make_archives(source_list, resource_model, package, dataset, card):
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                #obj_dict['ResourceID'] = obj['adm.uuid_mat'][0]['value']
                #obj_dict['brocade.id'] = obj['adm.id_mat'][0]['value']
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['brocade.id'] = obj['brocade.id']
                obj_dict['card'] = card
                obj_dict['order'] = obj['order']
                obj_dict[node_mapping['to']] = obj[node_mapping['from']][0]['value']
                obj_list.append(obj_dict)
                obj_dict = {}
    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
    source_df['Archive Widget'] = source_df[node_mapping['to']].map(lookup_archives_df.set_index('brocadeID')['json'])
    source_df['Archive Identifier'] = source_df[node_mapping['to']].map(lookup_archives_df.set_index('brocadeID')['ark'])

    
    save_it = save_mapped_set(source_df, package, resource_model, card)
    #source_df.to_csv('out/%s_%s_Group_%s.csv' % (resource_model, card, package), index=False)
    del source_df
    del source_list
    del obj_list
    return '%s saved: %s' % (card, number_of_records)

## Acquisitions

In [22]:
def make_acquisitions(source_list, resource_model, package, dataset, card):
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                obj_dict['ResourceID'] = obj['ResourceID']
                obj_dict['brocade.id'] = obj['brocade.id']
                obj_dict['card'] = card
                obj_dict['order'] = obj['order']
                obj_dict[node_mapping['to']] = obj[node_mapping['from']][0]['value']
                obj_list.append(obj_dict)
                obj_dict = {}
    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
    source_df['Acquisition Widget'] = source_df[node_mapping['to']].map(lookup_acquisitions_df.set_index('brocadeID')['json'])
    source_df['Acquisition Identifier'] = source_df[node_mapping['to']].map(lookup_acquisitions_df.set_index('brocadeID')['ark'])

    
    save_it = save_mapped_set(source_df, package, resource_model, card)
    #source_df.to_csv('out/%s_%s_Group_%s.csv' % (resource_model, card, package), index=False)
    del source_df
    del source_list
    del obj_list

    return '%s saved: %s' % (card, number_of_records)

## Conditions

In [23]:
def make_conditions(source_list, resource_model, package, dataset, card, model_class):
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                source_value = obj[node_mapping['from']][0]['value']
                obj_dict.update(make_default_dict(obj, card, source_value, node_mapping['from'], model_class))
                obj_dict[node_mapping['to']] = obj[node_mapping['from']][0]['value']
                if 'function' in node_mapping.keys():
                    _type = lookup_column_value(obj[node_mapping['from']][0]['value'], 
                                            node_mapping['function']['args']['lookup_file'],
                                            card, obj['brocade.id'], node_mapping['from'])
                    obj_dict[node_mapping['to']] = _type
                    
                if 'constants' in node_mapping.keys():
                    constants = get_mark_type(mappings, node_mapping['from'])
                    for _const, _value in constants.items():
                        obj_dict[_const] = _value
                obj_list.append(obj_dict)
                obj_dict = {}

    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
    save_it = save_mapped_set(source_df, package, resource_model, card)
    del source_df
    return '%s saved: %s' % (card, number_of_records)

## Dimension Source Notes

In [24]:
def make_dimension_source_notes(source_list, resource_model, package, dataset, card):
    mapping = multi_value_mappings['e22_Dimensions']
    columns = get_from_keys(mapping)
    dim_dict = {}
    dim_list = []
    const_dict = {}
    const_list = []
    
    for dim in source_list:
        for node_mapping in mapping['node_mappings']:
            if node_mapping['from'] in dim.keys():
                source_value = dim[node_mapping['from']][0]['value']
                dim_dict.update(make_default_dict(dim, card, source_value, node_mapping['from'], 'e22'))
                constants = get_constant_type(mapping, node_mapping['from'])
                for _type, _value in constants.items():
                    if _type != 'unit':
                        const_list.append(_type)
                _dimension = dim[node_mapping['from']][0]['value']
                _constants = ', '.join(map(str, const_list))
                _source_field = node_mapping['from']
                const_list = []
                dim_dict['Dimension Source Note'] = '%s - %s, %s' % (_dimension, _constants, _source_field)
                dim_list.append(dim_dict)
                dim_dict = {}

    source_df = pd.DataFrame(dim_list)
    number_of_records = len(dim_list)

    save_it = save_mapped_set(source_df, package, resource_model, card)
    del source_df
    del source_list
    del dim_list

    return '%s saved: %s' % (card, number_of_records)

## Annotations

In [25]:
## OBS 

### {"from": "adm.note", "to": "Annotation",
## ONLY IF DATASET = LH



def check_constant(card, _key, _constant):
    for item in multi_value_mappings[card]['node_mappings']:
        if item['from'] == _key:
            return item['constants'][0][_constant]

def make_annotations(source_list, resource_model, package, dataset, card, model_class):

    anno_dict = {}
    anno_list = []
    mappings = multi_value_mappings[card]

    for anno in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in anno.keys():
                source_value = anno[node_mapping['from']][0]['value']
                if 'include_codes' in node_mapping.keys(): 
                    if source_value in node_mapping['include_codes']:
                        anno_dict.update(make_default_dict(anno, card, source_value, node_mapping['from'], model_class))
                        if anno[node_mapping['from']][0]['value'] == 'acai':
                            anno_dict['Annotation'] = 'Alfabetische index'
                        else:
                            anno_dict['Annotation'] = anno[node_mapping['from']][0]['value']
                        if 'constants' in node_mapping.keys():
                            constants = get_mark_type(mappings, node_mapping['from'])
                            for _type, _value in constants.items():
                                anno_dict[_type] = _value
                else: # no include codes
                    anno_dict.update(make_default_dict(anno, card, source_value, node_mapping['from'], model_class))
                    anno_dict['Annotation'] = anno[node_mapping['from']][0]['value']
                    if 'constants' in node_mapping.keys():
                        constants = get_mark_type(mappings, node_mapping['from'])
                        for _type, _value in constants.items():
                            anno_dict[_type] = _value
                    
                                
        
        anno_list.append(anno_dict)
        anno_dict = {}

    source_df = pd.DataFrame(anno_list)
    
    ## OBS 

    ### {"from": "adm.note", "to": "Annotation"} if LH. - > Filter out if not in LH
    if package in ['pkg_mpm', 'pkg_rub']:
        source_df = source_df[source_df['source_field']!='adm.note']
    
    number_of_records = len(source_df)
    #source_df.sort_values(by=['ResourceID', 'card', 'order'])
    
    # save
    if number_of_records > 0:
        save_it = save_mapped_set(source_df, package, resource_model, card)
    
    del source_df
    del source_list
    del anno_list    
    return '%s saved: %s' % (card, number_of_records)



## Dimensions

In [26]:
error_list = []
replace_dict_x = {"X": "x",
                "A4": "29,7x21,0",
                "a4": "29,7x21,0",
                "A 4": "29,7x21,0",
                "A3": "42,0x29,7",
                "35mm": "2,4x3,6",
                "35 mm": "2,4x3,6",
                "Quarto": "29,7x21,0",                                    
                "quarto": "29,7x21,0",                                    
                "cm": "",
                " x ": "x", 
                ",": ".",
                " ": "",
                ".5.": ".5",
                "xxx":"x",
                "xx":"x",
                "'": ",",
                ",7,":",7",
                ",,":",",
                "h":"",
                "b":"",
                "d":"",
    } 


replace_dict_hbd = {"X": "x",
                "A4": "29,7x21,0",
                "a4": "29,7x21,0",
                "A 4": "29,7x21,0",
                "A3": "42,0x29,7",
                "35mm": "2,4x3,6",
                "35 mm": "2,4x3,6",
                "Quarto": "29,7x21,0",    
                "quarto": "29,7x21,0",                                    
                "cm": "",
                " x ": "x", 
                ",": ".",
                " ": "",
                "'": ",",
                ",,":",",
                "x b": "x",
                "x d": "x",                    
                "h":"",
                "b":"x",
                "d":"x",
                "H":"",
                "B":"x",
                "D":"x",
                "xxx":"x",
                "xx":"x",
    }

def lookup_dimension_type(card, source_field, list_for_typing):

    for item in multi_value_mappings[card]['node_mappings']:
        if item['from'] == source_field:
            for dim_to_type in list_for_typing:
                dim_to_type['type'] = item['constants'][dim_to_type['type']]
                dim_to_type['unit'] = item['constants']['unit']

    return list_for_typing    

def lookup_dimension_type_single(card, source_field, list_for_typing):

    for item in multi_value_mappings[card]['node_mappings']:
        if item['from'] == source_field:
            for dim_to_type in list_for_typing:
                dim_to_type['type'] = item['constants']['dimension']
                dim_to_type['unit'] = item['constants']['unit']

    return list_for_typing    


def transform_cm_to_mm(_dimensions_list):
    
    for dim in _dimensions_list:        
        dim['value'] = int(dim['value'] * 10)
    return _dimensions_list

def split_height_width_depth(dim_value, dataset, brocade_id, source_field, transform, card, replacedict):
    
    dimensions_list = []
    
    repl_value = dim_value    
    for k, v in replacedict.items():
        repl_value = repl_value.replace(k, v)
    replaced_value = repl_value.strip()
        
    try:
        split_dim = replaced_value.split('x')
        split_dim_0 = float(split_dim[0])
        split_dim_1 = float(split_dim[1])
        split_dim_2 = float(split_dim[2])
        
        dimensions_list =  [{'value': split_dim_0, 'type': 'height'}, 
                {'value': split_dim_1, 'type': 'width' },
                {'value': split_dim_2, 'type': 'depth' },
               ]
        if transform == "True":
            dimensions_list = transform_cm_to_mm(dimensions_list)
        for comma_float in dimensions_list:
            comma_float['value'] = str(comma_float['value']).replace('.', ',')
 
        typed_dimensions = lookup_dimension_type(card, source_field, dimensions_list)    
        return typed_dimensions
    
    except:
#        error = ['%s,%s,%s,%s,height x width x depth,split and float check failed' % (dataset, brocade_id, source_field, dim_value)]
        error = [dataset,brocade_id,source_field,dim_value,'height x width x depth','split and float check failed']
        #print(error)
        #print('source:', dim_value, 'cleaned:',replaced_value)
        error_list.append(error)
        return False

def split_height_width(dim_value, dataset, brocade_id, source_field, transform, card):
    
    dimensions_list = []
    
    repl_value = dim_value    
    for k, v in replace_dict_x.items():
        repl_value = repl_value.replace(k, v)
    replaced_value = repl_value.strip()
    
    
    try:
        
        split_dim = replaced_value.split('x')
        split_dim_0 = float(split_dim[0])
        split_dim_1 = float(split_dim[1])
        
        dimensions_list =  [{'value': split_dim_0, 'type': 'height'}, 
                {'value': split_dim_1, 'type': 'width' }
               ]
        
        if transform == "True":
            dimensions_list = transform_cm_to_mm(dimensions_list)   
            for comma_float in dimensions_list:
                comma_float['value'] = str(comma_float['value']).replace('.', ',')
        elif transform == "False":
            if source_field == 'kenmerken.lhformatph' and split_dim_0 == 2.4 and split_dim_1 == 3.6:
                #print(brocade_id, source_field, split_dim_0, split_dim_1)
                dimensions_list = transform_cm_to_mm(dimensions_list)   
                for comma_float in dimensions_list:
                    comma_float['value'] = str(comma_float['value']).replace('.', ',')                
            else:
                for comma_float in dimensions_list:
                    comma_float['value'] = str(comma_float['value']).replace('.0', '')            
            
            
        typed_dimensions = lookup_dimension_type(card, source_field, dimensions_list)    
        return typed_dimensions
    
    except:
        #error = ['%s,%s,%s,%s,height x width,split and float check failed' % (dataset, brocade_id, source_field, dim_value)]
        error = [dataset,brocade_id,source_field,dim_value,'height x width','split and float check failed']
        #print(error)
        #print(dim_value)
        error_list.append(error)
        return False

def split_diameter(dim_value, dataset, brocade_id, source_field, transform, card):
    
    dimensions_list = []
    
    repl_value = dim_value    
    for k, v in replace_dict_x.items():
        repl_value = repl_value.replace(k, v)
    replaced_value = repl_value.strip()
    
    try:
        
        split_dim = float(replaced_value)
        
        dimensions_list =  [{'value': split_dim, 'type': 'diameter'}]
        if transform == "True":
            dimensions_list = transform_cm_to_mm(dimensions_list)        
        for comma_float in dimensions_list:
            comma_float['value'] = str(comma_float['value']).replace('.', ',')
            
        typed_dimensions = lookup_dimension_type(card, source_field, dimensions_list)    
        return typed_dimensions
    
    except:
        #error = ['%s,%s,%s,%s,height x width,split and float check failed' % (dataset, brocade_id, source_field, dim_value)]
        error = [dataset,brocade_id,source_field,dim_value,'diameter','split and float check failed']
        #print(error)
        #print(dim_value)
        error_list.append(error)
        return False
        
def split_single(dim_value, dataset, brocade_id, source_field, transform, card):
    #print('single', brocade_id, dim_value)
    dimensions_list = []
    
    repl_value = dim_value    
    for k, v in replace_dict_x.items():
        repl_value = repl_value.replace(k, v)
    replaced_value = repl_value.strip()


    
    try:
        
        split_dim = float(replaced_value)
        dimensions_list =  [{'value': split_dim}]
        
        if transform == "True":
            dimensions_list = transform_cm_to_mm(dimensions_list)        
        for comma_float in dimensions_list:
            comma_float['value'] = str(comma_float['value']).replace('.', ',')
            
        typed_dimensions = lookup_dimension_type_single(card, source_field, dimensions_list)
        #print(typed_dimensions, brocade_id, dimensions_list)
        
        return typed_dimensions
    
    except:
        #error = ['%s,%s,%s,%s,height x width,split and float check failed' % (dataset, brocade_id, source_field, dim_value)]
        error = [dataset,brocade_id,source_field,dim_value,'diameter','split and float check failed']
        print(error)
        #print(dim_value)
        error_list.append(error)
        return False
            
def make_dimensions(source_list, resource_model, package, dataset, card):

    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    dim_dict = {}
    dim_list = []
    
    for dim in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in dim.keys():
                dimension_value = dim[node_mapping['from']][0]['value']
                if node_mapping['type'] == "height_width_depth":
                    splitted_dimensions = split_height_width_depth(dimension_value, dataset, dim['brocade.id'], 
                                             node_mapping['from'], node_mapping['transform'], card, replace_dict_x)
                if node_mapping['type'] == "height_width_depth_hbd":
                    splitted_dimensions = split_height_width_depth(dimension_value, dataset, dim['brocade.id'], 
                                             node_mapping['from'], node_mapping['transform'], card, replace_dict_hbd)
                elif node_mapping['type'] == "height_width":
                    splitted_dimensions = split_height_width(dimension_value, dataset, dim['brocade.id'], 
                                             node_mapping['from'], node_mapping['transform'], card)
                elif node_mapping['type'] == "diameter":
                    splitted_dimensions = split_diameter(dimension_value, dataset, dim['brocade.id'], 
                                             node_mapping['from'], node_mapping['transform'], card)
                elif node_mapping['type'] == "single":
                    splitted_dimensions = split_single(dimension_value, dataset, dim['brocade.id'], 
                                             node_mapping['from'], node_mapping['transform'], card)
                #print(splitted_dimensions)
                if splitted_dimensions:
                    for typed_dim in splitted_dimensions:
                        dim_dict['ResourceID'] = dim['ResourceID']
                        dim_dict['brocade.id'] = dim['brocade.id']
                        dim_dict['card'] = card
                        dim_dict['source_field'] = node_mapping['from']
                        dim_dict['Dimension Value'] = typed_dim['value']
                        dim_dict['Dimension Type'] = typed_dim['type']
                        dim_dict['Dimension Measurement Unit'] = typed_dim['unit']
                        dim_list.append(dim_dict)
                        dim_dict = {}

    source_df = pd.DataFrame(dim_list)
    number_of_records = len(dim_list)
#    source_df['Dimension Type'] = source_df['Dimension Type'].astype(int)
    
    # save
    save_it = save_mapped_set(source_df, package, resource_model, card)
    #source_df.to_csv('out/' + dataset + '_dims.csv')
    del source_df
    del source_list
    del dim_list

    return '%s saved: %s' % (card, number_of_records)

## Make Single

In [27]:
def make_single(source_list, resource_model, package, dataset, card):
    
    single_df = pd.DataFrame(source_list)
    #single_df.to_csv('out/' + package + 'single.csv', index=False)

    mappings = multi_value_mappings[card]
    obj_dict = {}
    obj_list = []
    
    for obj in source_list:
        obj_dict['ResourceID'] = obj['ResourceID']
        obj_dict['brocade.id'] = obj['brocade.id']
        obj_dict['card'] = 'AA' ## dummy for sorting later on
        obj_dict['order'] = '0'

        if 'mpm' in obj['brocade.type'][0]['value']:
            obj_dict['Keeper'] = 'Museum Plantin-Moretus'
        elif 'lh' in obj['brocade.type'][0]['value']:
            obj_dict['Keeper'] = 'Letterenhuis'
        elif 'rub' in obj['brocade.type'][0]['value']:
            obj_dict['Keeper'] = 'Rubenianum'

        obj_dict['Object Number'] = obj['brocade.id'].replace('_#', ':m')    
        obj_dict['Object Number Type'] = 'record identifiers'
        obj_dict['Object Identifier'] = add_ark_identifier_by_row_value(package, obj['ResourceID'])
        obj_dict['Object Identifier Type'] = 'object identifier'
        obj_list.append(obj_dict)
        obj_dict = {}
    
    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)    
    
    save_it = save_mapped_set(source_df, package, resource_model, card)
    del source_df
    del source_list
    del obj_list

    return '%s saved: %s' % (card, number_of_records)

## Production Actors

In [28]:
def make_plain_ire(source_list, resource_model, package, dataset, card, model_class):
    
    m_dict = {}
    node_mapping_dict = {}
    isaar_list = []
    card = card
    mappings = multi_value_mappings[card]
    print(dataset)
    
    #print(mappings['include_codes'])
    for ire in source_list:
        if ire['ire.type'][0]['value'] in mappings['include_codes']: 
            if model_class == 'e73':
                m_dict['ResourceID'] = ire['ResourceID']
            elif model_class == 'e22':   
                m_dict['ResourceID'] = ire['ResourceID']
            m_dict['brocade.id'] = ire['brocade.id']
            m_dict['card'] = card
            m_dict['order'] = ire['order']
            m_dict['ire.isaar.uuid'] = ire['ire.isaar'][0]['uuid']
            au_code = ire['ire.isaar'][0]['value']
            
            #Quick fix for MPM
            if dataset == 'mpmtk':
                m_dict['ire.isaar.id'] = au_code + ':1'
            
            #Quick fix for LH
            if dataset in ['lhtk', 'lhsc', 'lhhs']:                
                if au_code == 'au::46030:1':                    
                    m_dict['ire.isaar.id'] = 'au::46030:1'                                
                elif au_code == 'au::114141:2':                    
                    m_dict['ire.isaar.id'] = 'au::114141:2'  
                elif au_code == 'au::18121:1':                    
                    m_dict['ire.isaar.id'] = 'au::18121:1'                                                                                                                                                  
                elif au_code == 'au::5420:3':                    
                    m_dict['ire.isaar.id'] = 'au::5420:3'                                                                                                                                                  
                elif au_code == 'au::5420:3':                    
                    m_dict['ire.isaar.id'] = 'au::5420:3'                     
                elif au_code == 'au::47769:1':                    
                    m_dict['ire.isaar.id'] = 'au::47769:1' 
                elif au_code == 'au::33117:1':                    
                    m_dict['ire.isaar.id'] = 'au::33117:1' 
                elif au_code == 'au::40049:1':                    
                    m_dict['ire.isaar.id'] = 'au::40049:1' 
                elif au_code == 'au::6645:1':                    
                    m_dict['ire.isaar.id'] = 'au::6645:1' 
                elif au_code == 'au::34521:1':                    
                    m_dict['ire.isaar.id'] = 'au::34521:1' 
                elif au_code == 'au::25344:1':                    
                    m_dict['ire.isaar.id'] = 'au::25344:1' 
                elif au_code == 'au::45684:1':                    
                    m_dict['ire.isaar.id'] = 'au::45684:1' 
                elif au_code == 'au::7356:1':                    
                    m_dict['ire.isaar.id'] = 'au::7356:1' 
                elif au_code == 'au::12170:1':                    
                    m_dict['ire.isaar.id'] = 'au::12170:1' 
                                        

                else:
                    m_dict['ire.isaar.id'] = au_code + ':1'
            
            # hack for Affiche
            if dataset == 'lhps': 
                if au_code == 'au::20710:2:N':                    
                    m_dict['ire.isaar.id'] = 'au::20710:2'                                
                elif au_code == 'au::5420:3':                    
                    m_dict['ire.isaar.id'] = 'au::5420:3'                
                elif au_code == '114141:2':                    
                    m_dict['ire.isaar.id'] = '114141:2'                  
                elif au_code == '114141:2':                    
                    m_dict['ire.isaar.id'] = '114141:2'  
                elif au_code == 'au::18121:1':                    
                    m_dict['ire.isaar.id'] = 'au::18121:1'                                                          
                else:
                    m_dict['ire.isaar.id'] = au_code + ':1'                                        
                
            m_dict['source role'] = ire['ire.type'][0]['value']
            m_dict['ire.role'] = ire['ire.type'][0]['value']
            if 'ire.markdown' in ire.keys():
                m_dict['ire.markdown'] = ire['ire.markdown'][0]['value']
            isaar_list.append(m_dict)
            m_dict = {}
        
    dataset_df = pd.DataFrame(isaar_list)
    number_of_records = len(isaar_list)
    if len(dataset_df) > 0:            
        #dataset_df['ire.identifier'] = dataset_df['ire.isaar.id'].map(lookup_imaginary_df.set_index('archesID')['ark'])
        
        dataset_df['ire.widget'] = dataset_df['ire.isaar.id'].map(lookup_imaginary_df.set_index('archesID')['json'])
        dataset_df['ire.role'] = dataset_df['ire.role'].map(lookup_relation_df.set_index('code')['concept'])
        
    
        for node_mappings in mappings['node_mappings']:
            node_mapping_dict[node_mappings['from']] = node_mappings['to']        
        dataset_df = dataset_df.rename(columns=node_mapping_dict)
        
        save_it = save_mapped_set(dataset_df, package, resource_model, card)

    del dataset_df
    del source_list
    del isaar_list

    return '%s saved: %s' % (card, number_of_records)


## Object Types

In [29]:
def make_object_types(single_list, source_list, resource_model, package, dataset, card):
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    no_obj_dict = {}
    no_inc_dict = {}
    obj_list = []
    default_dict_items = {}
    single_id_list = []
    source_id_list = []
    no_type_list = []
    no_include_codes_list = []
    
    for source in source_list:
        source_id_list.append(source['brocade.id'])

    for single in single_list:
        single_id_list.append(single['brocade.id'])
    
    # records without object codes:        
    no_type_list = list(set(single_id_list) - set(source_id_list))
        
#    print('single', len(single_list))
#    print('has types', len(source_list))
#    print('no types', len(no_type_list), no_type_list)
    
    ## Generate from the ones in the mapping and that have an object code: source_list
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                source_value = obj[node_mapping['from']][0]['value']
                if 'include_codes' in node_mapping.keys(): 
                    if source_value in node_mapping['include_codes']:
                        obj_dict.update(make_default_dict(obj, card, source_value, node_mapping['from'], 'e22'))
                        if 'function' in node_mapping.keys():
                            if node_mapping['function']['name'] == 'column_lookup':
                                source_value = obj[node_mapping['from']][0]['value']                                
                                if obj['dataset'] == 'rubdoc' and source_value == 'dcdo':
                                    lookup_value = 'dcdo_rub'
                                else:
                                    lookup_value = source_value
                                _type = lookup_column_value(lookup_value, 
                                        node_mapping['function']['args']['lookup_file'],
                                        card, obj['brocade.id'], node_mapping['from'])
                                obj_dict[node_mapping['to']] = _type                            
                        obj_list.append(obj_dict)
                        obj_dict = {}
                    else:
                        no_include_codes_list.append(obj['brocade.id'])
                        

    single_dict = {item['brocade.id']: item for item in single_list}    
    for no_obj in no_type_list:            
        if no_obj in single_dict:
            no_obj_single = single_dict[no_obj]
            #print(f"Match found: {no_obj} and {no_obj_single}")
        
            no_obj_dict.update(make_default_dict(no_obj_single, card, 'none', 'default_object_type', 'e22'))
            _dataset = single['brocade.id'].split(':')[1]
            #print('no_type_list: ', single['brocade.id'],_dataset)#
            default_type = lookup_column_value(_dataset, 
                                        'default_object_types.csv',
                                        card, single['brocade.id'], 'none')
            no_obj_dict['Object Type'] = default_type                            
            obj_list.append(no_obj_dict)
            no_obj_dict = {}
    
    print(len(source_list), len(no_type_list), len(no_include_codes_list))

    for no_inc in no_include_codes_list:
        if no_inc in single_dict:
            no_inc_single = single_dict[no_inc]

            no_inc_dict.update(make_default_dict(no_inc_single, card, 'none', 'default_object_type', 'e22'))
            _dataset = single['brocade.id'].split(':')[1]
            default_type = lookup_column_value(_dataset, 
                                        'default_object_types.csv',
                                        card, single['brocade.id'], 'none')
            no_inc_dict['Object Type'] = default_type                            
            obj_list.append(no_inc_dict)
            no_inc_dict = {}
             
    
    source_df = pd.DataFrame(obj_list)
    
    if package == 'pkg_rub' and resource_model != 'Iconografie':
        if resource_model == 'Brief':
            drop_codes_list = ["dtrvat","dtrvbm","dtrvbu","dtrvfk","dtrvfo","dtrvhs","dtrvka","dtrvnt","dtrvod","dtrvpr","dtrvre","dtrvrg","dtrvty"]        
        elif resource_model == 'Foto':        
            drop_codes_list = ["dtrvnt","dtrvre"]            
        elif resource_model == 'Tekstdrager':                    
            drop_codes_list = ["dtrvfo"]    
        source_df = source_df.loc[source_df['source_code'].isin(drop_codes_list) == False]
        drop_codes_list = []
        
    number_of_records = len(source_df)
    
    save_it = save_mapped_set(source_df, package, resource_model, card)
    del source_df
    del source_list
    del no_type_list
    return '%s saved: %s' % (card, number_of_records)

## Acquisition Numbers

In [30]:
def make_acquisition_numbers(source_list, resource_model, package, dataset, card):

    dim_dict = {}
    dim_list = []
    _acq_number_list = []
    _acq_number_dict = []

    card = card
    mapping = multi_value_mappings[card]
    
    for dim in source_list:
        for node_mapping in mapping['node_mappings']:
            if node_mapping['from'] in dim.keys():
                if package == 'pkg_mpm':
                    pass
#                    order = 1
#                    for _acq_number in dim['adm.inv']:                    
#                        if order == 2:
#                            dim_dict['ResourceID'] = dim['ResourceID']
#                            dim_dict['brocade.id'] = dim['brocade.id']
#                            dim_dict['card'] = card
#                            dim_dict[node_mapping['to']] = _acq_number['value']
#                            _acq_number = ''
#                            constants = get_constant_type(mapping, node_mapping['from'])
#                            for _type, _value in constants.items():
#                                dim_dict[_type] = _value
#                            dim_list.append(dim_dict)
#                            dim_dict = {}
#                        order += 1 
                else:

                    dim_dict['ResourceID'] = dim['ResourceID']
                    dim_dict['brocade.id'] = dim['brocade.id']
                    dim_dict['card'] = card
                    dim_dict['order'] = dim['order']
                    for _acq_number in dim['adm.inv']:                    
                        _acq_number_list.append(_acq_number['value'])
                    _acq_number = ', '.join(_acq_number_list)
                    dim_dict[node_mapping['to']] = _acq_number
                    _acq_number_list = []
                    _acq_number = ''
                    constants = get_constant_type(mapping, node_mapping['from'])
                    for _type, _value in constants.items():
                        dim_dict[_type] = _value
                    dim_list.append(dim_dict)
                    dim_dict = {}
                    
                

    source_df = pd.DataFrame(dim_list)
    number_of_records = len(dim_list)    

    # save
    if len(source_df) > 0:
        
        # hack for truncated strings. in source
        add_trail_df = pd.read_csv(lookup_path + 'adm-inv-commas-20231023.csv')
        source_df.set_index('brocade.id', inplace=True)
        add_trail_df.set_index('brocade.id', inplace=True)
        source_df.update(add_trail_df)
        source_df.reset_index(inplace=True)    
        source_df = source_df[["ResourceID","brocade.id","card","order","Acquisition Number","Acquisition Number Type"]]            
        
        save_it = save_mapped_set(source_df, package, resource_model, card)

    dim_dict = {}
    dim_list = []
    del source_df
    del source_list

    return '%s saved: %s' % (card, number_of_records)

## make_production_technique_note

In [31]:
def make_production_technique_note(source_list, resource_model, package, dataset, card):
    mappings = multi_value_mappings[card]
    columns = get_from_keys(mappings)
    obj_dict = {}
    obj_list = []
    default_dict_items = {}
    
    for obj in source_list:
        for node_mapping in mappings['node_mappings']:
            if node_mapping['from'] in obj.keys():
                source_value = obj[node_mapping['from']][0]['value']
                obj_dict.update(make_default_dict(obj, card, source_value, node_mapping['from'], 'e22'))
                obj_dict[node_mapping['to']] = source_value
                obj_list.append(obj_dict)
                obj_dict = {}

    source_df = pd.DataFrame(obj_list)
    number_of_records = len(obj_list)
    if number_of_records > 0:
        save_it = save_mapped_set(source_df, package, resource_model, card)
        del source_df
        del source_list
        return '%s saved: %s' % (card, number_of_records)

## Other identifiers

In [32]:
def make_other_identifier(source_list, resource_model, package, dataset, card):

    dim_dict = {}
    dim_list = []
    _acq_number_list = []
    _acq_number_dict = []

    card = card
    mapping = multi_value_mappings[card]
    
    for dim in source_list:
        for node_mapping in mapping['node_mappings']:
            if node_mapping['from'] in dim.keys():
                if package == 'pkg_mpm': 
                    order = 1
                    for _acq_number in dim['adm.inv']:                    
                        if order == 2:
                            dim_dict['ResourceID'] = dim['ResourceID']
                            dim_dict['brocade.id'] = dim['brocade.id']
                            dim_dict['card'] = card
                            dim_dict[node_mapping['to']] = _acq_number['value']
                            _acq_number = ''
                            constants = get_constant_type(mapping, node_mapping['from'])
                            for _type, _value in constants.items():
                                dim_dict[_type] = _value
                            dim_list.append(dim_dict)
                            dim_dict = {}
                        order += 1 

    source_df = pd.DataFrame(dim_list)
    number_of_records = len(dim_list)    

    # save
    if len(source_df) > 0:
        save_it = save_mapped_set(source_df, package, resource_model, card)

    dim_dict = {}
    dim_list = []
    del source_df
    del source_list

    return '%s saved: %s' % (card, number_of_records)


## Dams Links

In [33]:
dams_dir = '../../source/static/dams/'
def make_dams(resource_model, package, dataset):
    read_name = dams_dir + 'e22_DamsLinks_' + package.split('_')[1] + '_' + resource_model + '.csv'
    print(read_name)
    save_name = '%s%s/%s/%s.csv' % (save_path, package.split('_')[1], resource_model, 'e22_DamsLinks')
    print(save_name)
    try:
        shutil.copy2(read_name, save_name) 
        _return_msg = 'e22_DamsLinks saved'
    except:
        _return_msg = 'no dams file'
    
    return _return_msg

## Generate dataset

In [34]:
%%time
source_df = []
record_dict = {}
record_list = []
records_list = []

source_place_list = ["rubhs"]
file_places_list = ["lhps"]


for resource_model, packages in resource_model_list.items():
    print(resource_model)    
    for package, datasets in packages.items():
        
        ObjectTypes_List = []
        DefaultObjectTypes_List = []
        HallmarkTypes_list = []
        Marks_list = []
        Items_list = []
        ProductStadiums_list = []
        DocumentStatus_list = []
        Carrier_list = []
        Formats_list = []
        
        # FROM RDM
        ProductionActors_list = []
        ProductionTechniques_list = []
        ExtraProductionTechniques_list = []
        Materials_list = []
        Colours_list = []
        Acquisitions_list = []
        Archives_list = []
        Conditions_list = []
        DimensionSourceNotes_list = []
        AnnotationsMaterial_list = []
        Dimensions_list = []
        Single_list = []
        AcquisitionNumbers_List = []

        PlacesSource_list = []
        ProductionTechniqueNote_list = []
        OtherIdentifiers_list = []
        
        print('- ', package)
        for dataset in datasets:
            if dataset_size == 'all':
                source_file_name = source_path + dataset + '.json'
            else:
                source_file_name = source_path + 'slices/' + dataset + '_' + dataset_size +'.json'

            with open(source_file_name) as f:
                records = json.load(f)
                
            print('   - ', dataset)
            for record in records:
                
                for card, v in multi_value_mappings.items():
                    brocade_id = record['ID']
                    _order = brocade_id.split('_')[1].replace('#', '')
                    brocade_uuid = record['groups']['adm']['Entries'][_order]['Fields']['uuid_mat'][0]['value']
                    for group_key in get_group_keys(v): 
                        if group_key in record['groups'].keys():
                            group_values = record['groups'][group_key]
                            for order, item in group_values['Entries'].items():
                                if any(key in item['Fields'].keys() for key in get_field_keys(v)):
                                    record_dict['card'] = card
                                    record_dict['order'] = order
                                    record_dict['dataset'] = dataset
                                    for _key, _value in item['Fields'].items():
                                        record_dict['%s.%s' % (group_key, _key)] = _value
                                    record_dict['ResourceID'] = brocade_uuid
                                    record_dict['brocade.id'] = brocade_id

                                    record_list.append(record_dict)
                                    record_dict = {}   
                                    
                                if card == 'e22_ObjectTypes':
                                    ObjectTypes_List.extend(record_list)                                    
                                if card == 'e22_HallmarkTypes':
                                    HallmarkTypes_list.extend(record_list)                                    
                                if card == 'e22_Marks':
                                    Marks_list.extend(record_list)  
                                if card == 'e22_Items':
                                    Items_list.extend(record_list)
                                if card == 'e22_ProductStadiums':
                                    ProductStadiums_list.extend(record_list)                                    
                                if card == 'e22_DocumentStatus':
                                    DocumentStatus_list.extend(record_list)                                    
                                if card == 'e22_Carrier':
                                    Carrier_list.extend(record_list)                                    
                                if card == 'e22_Formats':
                                    Formats_list.extend(record_list)

                                # FROM RDM
                                if card == 'e22_ProductionActors':
                                    ProductionActors_list.extend(record_list)                                    
                                if card == 'e22_ProductionTechniques':
                                    ProductionTechniques_list.extend(record_list)                                    
                                if card == 'e22_ExtraProductionTechniques':
                                    ExtraProductionTechniques_list.extend(record_list)                                    
                                if card == 'e22_Colours':
                                    Colours_list.extend(record_list)                                    
                                if card == 'e22_Materials':
                                    Materials_list.extend(record_list)                                    
                                if card == 'e22_Acquisitions':                    
                                    Acquisitions_list.extend(record_list)
                                if card == 'e22_Archives':
                                    Archives_list.extend(record_list)
                                if card == 'e22_Conditions':
                                    Conditions_list.extend(record_list)                                    
                                if card == 'e22_DimensionSourceNotes':
                                    DimensionSourceNotes_list.extend(record_list)                                    
                                if card == 'e22_AnnotationsMaterial':
                                    AnnotationsMaterial_list.extend(record_list)
                                if card == 'e22_Dimensions':
                                    Dimensions_list.extend(record_list)                                    

                                if card == 'e22_Single':
                                    Single_list.extend(record_list)                                    
                                if card == 'e22_AcquisitionNumbers':
                                    AcquisitionNumbers_List.extend(record_list)                                    
                                if card == 'e22_ProductionTechniqueNote':
                                    ProductionTechniqueNote_list.extend(record_list)                                    
                                if card == 'e22_OtherIdentifiers':
                                    OtherIdentifiers_list.extend(record_list)                                    
                                    
                                    

                                    
                                record_list = []                                

                                
    
        if len(Acquisitions_list) > 0:
            Acquisitions = make_acquisitions(Acquisitions_list, resource_model, package, dataset, 'e22_Acquisitions')                    
            Acquisitions_list = []
            print('      - ', Acquisitions)
        if len(AcquisitionNumbers_List) > 0:
            AcquisitionNumbers = make_acquisition_numbers(AcquisitionNumbers_List, resource_model, package, dataset, 'e22_AcquisitionNumbers')
            AcquisitionNumbers_List = []
            print('      - ', AcquisitionNumbers)
        if len(AnnotationsMaterial_list) > 0:
            AnnotationsMaterial = make_annotations(AnnotationsMaterial_list, resource_model, package, dataset, 'e22_AnnotationsMaterial', 'e22')
            AnnotationsMaterial_list =[]
            print('      - ', AnnotationsMaterial)    
        if len(Archives_list) > 0:
            Archives = make_archives(Archives_list, resource_model, package, dataset, 'e22_Archives')
            Archives_list = []
            print('      - ', Archives)    
        if len(Carrier_list) > 0:
            Carrier = make_carriers(Carrier_list, resource_model, package, dataset, 'e22_Carrier')
            Carrier_list = []
            print('      - ', Carrier)    
        if len(Conditions_list) > 0:
            Conditions = make_conditions(Conditions_list, resource_model, package, dataset, 'e22_Conditions', 'e22')
            Conditions_list = []
            print('      - ', Conditions)    
        if len(Dimensions_list) > 0:
            Dimensions = make_dimensions(Dimensions_list, resource_model, package, dataset, 'e22_Dimensions')
            Dimensions_list = []
            print('      - ', Dimensions)    
        if len(DimensionSourceNotes_list) > 0:
            DimensionSourceNotes = make_dimension_source_notes(DimensionSourceNotes_list, resource_model, package, dataset, 'e22_DimensionSourceNotes')
            DimensionSourceNotes_list = []
            print('      - ', DimensionSourceNotes)    
        if len(DocumentStatus_list) > 0:
            DocumentStatus = e22_make_with_function_and_include_codes(DocumentStatus_list, resource_model, package, dataset, 'e22_DocumentStatus')
            DocumentStatus_list = []
            print('      - ', DocumentStatus)    
        if len(Formats_list) > 0:
            Formats = make_formats(Formats_list, resource_model, package, dataset, 'e22_Formats')
            Formats_list = []
            print('      - ', Formats)    
        if len(HallmarkTypes_list) > 0:
            HallmarkTypes = e22_make_with_function_and_include_codes(HallmarkTypes_list, resource_model, package, dataset, 'e22_HallmarkTypes')
            HallmarkTypes_list = []
            print('      - ', HallmarkTypes)    
        if len(Items_list) > 0:
            Items = make_items(Items_list, resource_model, package, dataset, 'e22_Items')
            Items_list = []
            print('      - ', Items)    
        if len(Marks_list) > 0:
            Marks = make_marks(Marks_list, resource_model, package, dataset, 'e22_Marks')
            Marks_list = []
            print('      - ', Marks)    
        if len(Materials_list) > 0:
            Materials = make_with_include_codes(Materials_list, resource_model, package, dataset, 'e22_Materials', 'e22')
            Materials_list = []
            print('      - ', Materials)    
        if len(ProductionActors_list) > 0:
            ProductionActors = make_plain_ire(ProductionActors_list, resource_model, package, dataset, 'e22_ProductionActors', 'e22')
            ProductionActors_list =[]
            print('      - ', ProductionActors)    
        if len(ProductStadiums_list) > 0:
            ProductStadiums = e22_make_with_function_and_include_codes(ProductStadiums_list, resource_model, package, dataset, 'e22_ProductStadiums')
            ProductStadiums_list = []
            print('      - ', ProductStadiums)    
        if len(ProductionTechniques_list) > 0:
            ProductionTechniques = make_with_include_codes(ProductionTechniques_list, resource_model, package, dataset, 'e22_ProductionTechniques', 'e22')
            ProductionTechniques_list = []
            print('      - ', ProductionTechniques)
        if len(ExtraProductionTechniques_list) > 0:
            ExtraProductionTechniques = make_with_include_codes(ExtraProductionTechniques_list, resource_model, package, dataset, 'e22_ExtraProductionTechniques', 'e22')
            ExtraProductionTechniques_list = []
            print('      - ', ExtraProductionTechniques)
        if len(Single_list) > 0:
            Single = make_single(Single_list, resource_model, package, dataset, 'e22_Single')
            print('      - ', Single)
        if len(ObjectTypes_List) >= 0:
            ObjectTypes = make_object_types(Single_list, ObjectTypes_List, resource_model, package, dataset, 'e22_ObjectTypes')
            Single_list = []
            ObjectTypes_List = []
            print('      - ', ObjectTypes)                                    
        if len(OtherIdentifiers_list) >= 0:
            OtherIdentifiers = make_other_identifier(OtherIdentifiers_list, resource_model, package, dataset, 'e22_OtherIdentifiers')
            OtherIdentifiers_list = []
            print('      - ', OtherIdentifiers)                                    
        if len(ProductionTechniqueNote_list) >= 0:
            ProductionTechniqueNote = make_production_technique_note(ProductionTechniqueNote_list, resource_model, package, dataset, 'e22_ProductionTechniqueNote')
            ProductionTechniqueNote_list = []
            print('      - ', ProductionTechniqueNote)                                    

    #DamsLinks = make_dams(resource_model, package, dataset)
    #print('      - ', DamsLinks)   
       
          
print('---------') 
print(datetime.datetime.now())
%reset -f 



Iconografie
-  pkg_mpm
   -  mpmtk
      -  e22_AcquisitionNumbers saved: 0
      -  e22_Archives saved: 2
      -  e22_Carrier saved: 19
      -  e22_Dimensions saved: 38
      -  e22_DimensionSourceNotes saved: 19
      -  e22_Items saved: 19
      -  e22_Marks saved: 11
mpmtk
      -  e22_ProductionActors saved: 2
      -  e22_ProductionTechniques saved: 18
      -  e22_Single saved: 19
19 0 0
      -  e22_ObjectTypes saved: 19
      -  e22_OtherIdentifiers saved: 0
      -  None
Foto
-  pkg_mpm
   -  mpmph
      -  e22_AcquisitionNumbers saved: 0
      -  e22_AnnotationsMaterial saved: 0
      -  e22_Archives saved: 97
      -  e22_Dimensions saved: 12
      -  e22_DimensionSourceNotes saved: 97
      -  e22_Items saved: 91
mpmph
      -  e22_ProductionActors saved: 0
      -  e22_Single saved: 97
97 0 41
      -  e22_ObjectTypes saved: 97
      -  e22_OtherIdentifiers saved: 0
      -  None
Brief
-  pkg_mpm
   -  mpmbr
      -  e22_AcquisitionNumbers saved: 0
      -  e22_Annotati