# Importing InKind from FileMaker

We use an XML export of the ``inkind`` table.

In [1]:
import os,sys,re,collections
from copy import deepcopy
from lxml import etree

First the config.

In [2]:
HOME_DIR = os.path.expanduser('~').replace('\\', '/')
BASE_DIR = '{}/projects/has/dacs'.format(HOME_DIR)
INFILE = 'dariah.xml'
INPATH = '{}/{}'.format(BASE_DIR, INFILE)
FMNS = '{http://www.filemaker.com/fmpxmlresult}'
print('BASE_DIR={}'.format(BASE_DIR))

BASE_DIR=/Users/dirk/projects/has/dacs


## Parse the XML file

In [3]:
parser = etree.XMLParser(remove_blank_text=True, ns_clean=True)
root = etree.parse(INPATH, parser).getroot()

## Get the fields and their types

In [4]:
fieldroots = [x for x in root.iter(FMNS+'METADATA')]
fieldroot = fieldroots[0]
fields = collections.OrderedDict((
        x.get('NAME'),
        (
            x.get('TYPE').lower(),
            int(x.get('MAXREPEAT')), 
        )
    ) for x in fieldroot.iter(FMNS+'FIELD'))
nfields = len(fields)
print('{} fields:\n{}'.format(
    nfields, 
    '\n'.join('{:<7}{} {}'.format(*fields[f], f) for f in fields)
))

60 fields:
number 1 ikid
number 1 ikid base
text   1 Country
text   1 Year
text   1 VCC
number 1 submit
number 1 approved
text   1 Title
text   2 Academic entity URL
text   1 Academic entity URL 2
text   1 Description of contribution
text   2 Contribution URL
text   1 Contribution URL 2
text   3 Contact person mail
text   1 Contact person mail 2
text   1 Contact person name
text   1 Costs description
number 1 Costs total
text   1 Type of inkind
text   1 Other type of inkind
text   1 Disciplines associated
text   1 TaDiRAH Research activities
text   1 TaDiRAH Research objects
text   1 TaDiRAH Research techniques
text   1 Other keywords
text   1 Creator
text   1 Creation date time
text   1 Last modifier
text   1 Modification date time
number 1 Total costs total
text   1 VCChead approval
text   1 VCChead disapproval
number 1 Whois
text   8 VCC head decision
text   8 dateandtime approval
text   1 Teller
text   1 gOldPassword
text   1 gNewPassword
text   1 gNewPassword2
text   1 message
tex

## Get the data

In [5]:
dataroots = [x for x in root.iter(FMNS+'RESULTSET')]
dataroot = dataroots[0]
rows = []
errors = collections.defaultdict(list)
for (i, r) in enumerate(dataroot.iter(FMNS+'ROW')):
    row = []
    for c in r.iter(FMNS+'COL'):
        data = ' !FSEP! '.join(x.text or '' for x in c.iter(FMNS+'DATA'))
        row.append(data)
    if len(row) != 60:
        errors['Number of fields'].append(i)
    rows.append(row)
if errors:
    for k in sorted(errors):
        print('{:<20}: {}'.format(k, ','.join(errors[k])))
else:
    print('OK')

OK


In [6]:
len(rows)

309

## Turn the data into a dict

We represent the data wit a dictionary. The keys are the field names.
The values are dictionaries again, with keys the ikids and with values the value that the row with that ikid has for that field.

In [7]:
field_data = collections.defaultdict(dict)
for row in rows:
    ikid = row[0]
    for (fname,v) in zip(fields.keys(), row):
        if fname == 'ikid': continue
        is_single = fields[fname][1] == 1
        values = v if is_single else [x for x in v.split(' !FSEP! ') if x != '']
        field_data[fname][ikid] = values

In [8]:
set(field_data['Country'].values())

{'AT', 'BE', 'DE', 'FR', 'GR', 'HR', 'IE', 'IT', 'LU', 'NL', 'RS', 'SI'}

## Extract related tables

In [9]:
relvalues = collections.defaultdict(dict)
relindex = collections.Counter()
relxtables = collections.defaultdict(list)
reltables = collections.defaultdict(list)

def extract(modeled_field_data, fname):
    is_single = fields[fname][1] == 1 # single value of multiple values
    for ikid in field_data[fname]:
        value = field_data[fname][ikid]
        values = [value] if is_single else value
        new_values = {}
        for value in values:
            vid = relvalues[fname].get(value, None)
            if vid == None:
                relindex[fname] += 1
                vid = relindex[fname]
                reltables[fname].append((vid, value))
            relvalues[fname][value] = vid
            if is_single:
                modeled_field_data[fname][ikid] = vid
            else:
                relxtables[fname].append((ikid, vid))
    if not is_single:
        print('deleting key {}'.format(fname))
        del modeled_field_data[fname]

def model_data(field_list):
    modeled_field_data = deepcopy(field_data)
    for fname in field_list:
        if fname not in fields:
            print('ERROR: wrong field {}'.format(fname))
            continue
        extract(modeled_field_data, fname)
    return modeled_field_data

In [10]:
mfield_data = model_data([
    'Contact person mail',
    'Country',   
])

deleting key Contact person mail
