In [1]:
import requests
import json
import pandas as pd
from pymarc import MARCReader, MARCWriter, Record, Field, Subfield

In [2]:
# Creating two files – input and output.
input_file = 'VIAF_person_toodeldud.mrc'
output_file = 'VIAF_person_updated.mrc'

In [3]:
def process_marc_records(input_marc_file_path, output_marc_file_path, num_records_to_process=None):
    # Opening the input file for reading
    with open(input_marc_file_path, 'rb') as input_marc_file:
        # Creating a MARCReader for reading the input file
        reader = MARCReader(input_marc_file)

        # Creating a MARCWriter for writing the output file
        with open(output_marc_file_path, 'wb') as output_marc_file:
            writer = MARCWriter(output_marc_file)

            # Processing records
            for record_count, record in enumerate(reader):
                # Extracting the identifier from the record
                identifier = record['001'].value()

                # Making a request to VIAF API
                viaf_response = requests.get(f'https://www.viaf.org/viaf/sourceID/ERRR|{identifier}/justlinks.json')

                # Check if the request was successful
                if viaf_response.status_code == 200:
                    try:
                        viaf_data = viaf_response.json()

                        # Extracting ISNI, VIAF, and WKP codes
                        isni_code = viaf_data.get('ISNI', [''])[0] if 'ISNI' in viaf_data else ''
                        viaf_code = viaf_data.get('viafID', '') if 'viafID' in viaf_data else ''
                        wkp_code = viaf_data.get('WKP', [''])[0] if 'WKP' in viaf_data else ''

                        # Finding the index of the last fixed field manually
                        last_fixed_index = -1
                        for i, field in enumerate(record.fields):
                            if isinstance(field, Field) and field.is_control_field():
                                last_fixed_index = i

                        # Creating a new list of fields
                        new_fields = []

                        # Copying existing fields up to the last fixed field
                        new_fields.extend(record.fields[:last_fixed_index + 1])

                        # New 024 field for ISNI (if existing)
                        if isni_code:
                            new_fields.append(Field(
                                tag='024',
                                indicators=['7', ' '],
                                subfields=[
                                    Subfield('a', f'{isni_code}'),
                                    Subfield('2', 'isni')
                                ]
                            ))

                        # New 024 field for VIAF (if existing)
                        if viaf_code:
                            new_fields.append(Field(
                                tag='024',
                                indicators=['7', ' '],
                                subfields=[
                                    Subfield('a', f'http://viaf.org/viaf/{viaf_code}'),
                                    Subfield('2', 'uri')
                                ]
                            ))

                        # New 024 field for Wikidata (if existing)
                        if wkp_code:
                            new_fields.append(Field(
                                tag='024',
                                indicators=['8', ' '],
                                subfields=[
                                    Subfield('a', f'https://www.wikidata.org/wiki/{wkp_code}')
                                ]
                            ))

                        # Adding the remaining fields
                        new_fields.extend(record.fields[last_fixed_index + 1:])
                        record.fields = new_fields

                        # Writing the output file
                        writer.write(record)


                    except ValueError as e:
                        print(f"Error parsing VIAF response JSON: {e}")
                else:
                    print(f"VIAF API request failed with status code {viaf_response.status_code}")

                # Checking if a specific number of records to process is specified
                if num_records_to_process is not None and record_count + 1 >= num_records_to_process:
                    break

    # Closing the MARC files
    input_marc_file.close()
    output_marc_file.close()

process_marc_records('VIAF_person_toodeldud.mrc', 'VIAF_person_updated.mrc', num_records_to_process=10)

In [4]:
marc_file_path = 'VIAF_person_updated.mrc'

# Opening the file
with open(marc_file_path, 'rb') as marc_file:
    reader = MARCReader(marc_file)

    # Processing all the records
    for index, record in enumerate(reader):
        # Printing the entire MARC record as a human-readable string
        print(str(record))

=LDR  00684nz  a2200193n  4500
=001  a11124167
=003  ErRR
=008  990324|n|adnnnaabn\\\\\\\\\\\|\|a|\\\\\\
=024  7\$a0000000120981326$2isni
=024  7\$ahttp://viaf.org/viaf/17823$2uri
=024  8\$ahttps://www.wikidata.org/wiki/Q502
=040  \\$aErRR$best$cErRR$dErTUR
=100  0\$aStendhal,$cpseudonüüm,$d1783-1842.
=400  1\$aBeyle, Marie Henri,$d1783-1842.
=400  1\$aБейль, Анри Мари,$d1783-1842.
=400  1\$aStendhal,$cpseud.,$d1783-1842.
=400  1\$aСтендаль,$cпсевд.,$d1783-1842.
=670  \\$aEE, 8. kd., 1995; БСЭ, 24, 1976.
=680  \\$iPrantsuse kirjanik, kodanikunimega Marie Henri Beyle.

=LDR  00644nz  a2200169n  4500
=001  a11124179
=003  ErRR
=008  990217|n\adnnnaabn\\\\\\\\\\\|\|a|\\\\\\
=024  7\$a0000000122796570$2isni
=024  7\$ahttp://viaf.org/viaf/49228757$2uri
=024  8\$ahttps://www.wikidata.org/wiki/Q9061
=040  \\$aErRR$best$cErRR$dErTUR$dErTrtKR
=100  1\$aMarx, Karl,$d1818-1883.
=400  1\$aMarx, Károly,$d1818-1883.
=400  1\$aМаркс, Карл,$d1818-1883.
=670  \\$aENE, 5. kd., 1973 ; EE, 6. kd., 1992 ; 

In [5]:
# Testing
response = requests.get('http://www.viaf.org/viaf/sourceID/ERRR|a11124209/justlinks.json')
response

<Response [200]>

In [6]:
print(response.text)

  { "viafID":"146149542590900300633",
    "ERRR":["a11124209"]}



In [7]:
print(response.json())

{'viafID': '146149542590900300633', 'ERRR': ['a11124209']}
