In [None]:
from pymarc import Record, Field
from pymarc import MARCWriter

In [None]:
import csv
from dataclasses import dataclass
import datetime
import re

In [None]:
@dataclass
class EtdItem:
    abstract: str
    advisor1: str
    advisor1_email: str
    advisor1_title: str
    advisor2: str
    advisor2_email: str
    advisor2_title: str
    advisor3: str
    advisor3_email: str
    advisor3_title: str
    advisor4: str
    advisor4_email: str
    advisor4_title: str
    advisor5: str
    advisor5_email: str
    advisor5_title: str
    advisor6: str
    advisor6_email: str
    advisor6_title: str
    advisor7: str
    advisor7_email: str
    advisor7_title: str
    author1_email: str
    author1_fname: str
    author1_institution: str
    author1_lname: str
    author1_mname: str
    author1_suffix: str
    author2_email: str
    author2_fname: str
    author2_institution: str
    author2_lname: str
    author2_mname: str
    author2_suffix: str
    author3_email: str
    author3_fname: str
    author3_institution: str
    author3_lname: str
    author3_mname: str
    author3_suffix: str
    author4_email: str
    author4_fname: str
    author4_institution: str
    author4_lname: str
    author4_mname: str
    author4_suffix: str
    author5_email: str
    author5_fname: str
    author5_institution: str
    author5_lname: str
    author5_mname: str
    author5_suffix: str
    availability: str
    availability_description: str
    calc_url: str
    context_key: str
    ctmtime: str
    defense_date: str
    degree_application: str
    degree_name: str
    department: str
    disciplines: str
    document_type: str
    embargo_date: str
    file_name: str
    file_size: str
    fulltext_url: str
    hide_author_email: str
    issue: str
    keywords: str
    legacy_department: str
    lsu_author_email: str
    publication_date: str
    release_date: str
    season: str
    semester_graduation: str
    submission_date: str
    title: str
    urn: str

In [None]:
source_csv = 'DigCommPossiblyNotInCatalog.csv'

rows_dataclass = []
with open(source_csv, newline='', encoding='utf-8') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=",", quotechar='"')
    for number, row in enumerate(spamreader):
        if number == 0:
            headers = row
            continue
        row_dict = dict(zip(headers, row))
        row_dataclass = EtdItem(**row_dict)
        rows_dataclass.append(row_dataclass)

In [None]:
def join_name_parts(first, middle, last, suffix):
    name_str = ' '.join(i.strip() for i in (first, middle, last) if i)
    if suffix.strip():
        name_str += ', {}.'.format(suffix.strip().replace('.', ''))
    else:
        name_str += '.'
    return name_str

def reverse_name_parts(first, middle, last):
    name_str = f"{last}, {' '.join(i for i in (first, middle) if i) }"
    return name_str

def year_only(submission_date, publication_date):
    year = submission_date.split('-')[0] or publication_date.split('-')[0]
    return year

def degree_abbr(full_degree):
    match = re.search(r'\([A-Z]+\)', full_degree)
    if match:
        return match.group(0).replace('(', '').replace(')', '')
    return ''

def make_keyword_subfields(keywords):
    empty_list = []
    for i in keywords.split(', '):
        empty_list.append('a')
        empty_list.append(i)
    return empty_list

def comma_if_present(text):
    if text:
        return f'{ text },'
    return ''

In [None]:
def marc_thesis_template(row_dataclass):
    formal_name = reverse_name_parts(row_dataclass.author1_fname,
                                     row_dataclass.author1_mname,
                                     row_dataclass.author1_lname)
    publication_date = year_only(row_dataclass.submission_date,
                                 row_dataclass.publication_date)
    record = Record(force_utf8=True)

    record.add_field(        
        Field(
            tag = '040',
            indicators = [' ', ' '],
            subfields = [
                'a', "LUU",
                'b', "eng",
                'e', "rda",
                'c', "LUU",
            ]))
    record.add_field(     
        Field(
            tag = '090',
            indicators = [' ', ' '],
            subfields = [
                'b', '',
            ]))
    record.add_field(
        Field(
            tag = '049',
            indicators = [' ', ' '],
            subfields = [
                'a', 'LUUU',
            ]))
    record.add_field(
        Field(
            tag = '100',
            indicators = ['1', ' '],
            subfields = [
                'a', f'{ formal_name },',
                'c', comma_if_present(row_dataclass.author1_suffix),
                'e', 'author.'
            ]))
    record.add_field(
        Field(
            tag = '245',
            indicators = ['1','0'],
            subfields = [
                'a', row_dataclass.title.split(':')[0],
                'b', f'{ ":".join(row_dataclass.title.split(":")[1:]) } /',
                'c', join_name_parts(row_dataclass.author1_fname,
                                    row_dataclass.author1_mname,
                                    row_dataclass.author1_lname,
                                    row_dataclass.author1_suffix)
            ]))
    record.add_field(
        Field(
            tag = '264',
            indicators = [' ', '1'],
            subfields = [
                'a', "[Baton Rouge, Louisiana] :",
                'b', "[Louisiana State University],",
                'c', f'[{ publication_date }]',
            ]))
    record.add_field(
        Field(
            tag = '300',
            indicators = [' ', ' '],
            subfields = [
                'a', "1 online resource (leaves) :",
                'b', "illustrations, text file, PDF",
            ]))
    record.add_field(
        Field(
            tag = '336',
            indicators = [' ', ' '],
            subfields = [
                'a', 'text',
                'b', 'txt',
                '2', 'rdacontent',
            ]))
    record.add_field(
        Field(
            tag = '337',
            indicators = [' ', ' '],
            subfields = [
                'a', 'computer',
                'b', 'c',
                '2', 'rdamedia',
            ]))
    record.add_field(
        Field(
            tag = '338',
            indicators = [' ', ' '],
            subfields = [
                'a', 'online resource',
                'b', 'cr',
                '2', 'rdacarrier',
            ]))
    record.add_field(
        Field(
            tag = '490',
            indicators = ['1', ' '],
            subfields = [
                'a', '[Thesis] / [Louisiana State University] ; ',
                'v', f'[{ publication_date }]',
            ]))
    record.add_field(
        Field(
            tag = '538',
            indicators = [' ', ' '],
            subfields = [
                'a', 'Mode of access: World Wide Web.',
            ]))
    record.add_field(
        Field(
            tag = '500',
            indicators = [' ', ' '],
            subfields = [
                'a', f'Title from PDF title page (viewed { datetime.date.today():%B %d, %Y }.',
            ]))
    record.add_field(
        Field(
            tag = '502',
            indicators = [' ', ' '],
            subfields = [
                'a', '',
                'b', degree_abbr(row_dataclass.degree_name),
                'c', 'Louisiana State University',
                'd', f'{ publication_date }.',
            ]
        ))
    record.add_field(
        Field(
            tag = '653',
            indicators = [' ', ' '],
            subfields = [i for i in make_keyword_subfields(row_dataclass.keywords)],
        ))
    record.add_field(
        Field(
            tag = '710',
            indicators = ['2', ' '],
            subfields = [
                'a', "Louisiana State University (Baton Rouge, La.).",
                'b', f'Department of { row_dataclass.department }.',
            ]))
    record.add_field(
        Field(
            tag = '830',
            indicators = [' ', '0'],
            subfields = [
                'a', "Thesis (Louisiana State University (Baton Rouge, La.)) ;",
                'v', f'{ publication_date }.',
            ]))
    record.add_field(
        Field(
            tag = '856',
            indicators = ['4', '0'],
            subfields = [
                'u', row_dataclass.calc_url,
            ]))
    record.add_field(
        Field(
            tag = "949",
            indicators = [' ', ' '],
            subfields = [
                'a', f'ELECTRONIC THESES { publication_date } { row_dataclass.author1_lname }',
                'w', 'ASIS',
                'm', 'MIDL-MAIN',
                'l', 'ONLINE',
                't', 'ONLINE',
                'x', 'E-THESIS',
            ]))
            
    
    return record

In [None]:
marc_records = [marc_thesis_template(i) for i in rows_dataclass if i.document_type == 'thesis']

In [None]:
# marc_records[0].as_dict()

In [None]:
with open('marc_file.mrc', 'wb') as f:
    for count, record in enumerate(marc_records):
        if count > 5:
            break
        f.write(record.as_marc())

In [None]:
print(len(marc_records))
    