In [954]:
import csv

In [955]:
export_file = '/Users/jdamerow/UpConsulting/files/latex/test_file.csv'
result_file_path = '/Users/jdamerow/UpConsulting/files/latex/result.txt'

In [956]:

author_template = "\\textsc{%s}, %s"
isbn_template = "\\textsc{isbn}: %s."

In [957]:
AUTHOR_SEPARATER = "; "

In [958]:
def get_last_first(name):
    if ',' in name:
        last = name[:name.index(',')]
        first = name[name.index(',')+1:]
    else:
        last = name[name.rfind(" "):].strip()
        first = name[:name.rfind(" ")].strip()
    return last, first

In [959]:
class Author:
    def __init__(self):
        self.name = ''
        self.order = 0
        
    def get_last_name(self):
        return get_last_first(self.name)[0]
        
    def get_first_name(self):
        return get_last_first(self.name)[1]
        
    def get_name_for_sort(self):
        return self.get_last_name() + ", " + self.get_first_name()
    
    def get_order():
        return self.order

In [960]:
class Document:
    def __init__(self):
        self.category_number = ''
        self.print_date = ''
        self.index = 0
        self.isiscb_id = ''

In [961]:
latex_template_book = """
\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s. \\textit{%s}%s. %s, %s. %s \\textsc{\\href{https://data.isiscb.org/isis/citation/%s}{%s}}.
\\vspace{2ex} \\end{footnotesize}
\\paragraph{}
\n
"""

editors_template = ", %s (Eds.)"

class Book(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.editors = []
        self.title = ''
        self.year = 0
        self.publisher = ''
        self.isbn = ''
        self.chapters = []
        self.chapter_ids = []
        
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in (self.authors if self.authors else self.editors)])
        
    def get_editor_string(self):
        editors_latex = []
        if self.editors:
            for e in self.editors:
                last, first = get_last_first(e.name)
                if last and first:
                    editors_latex.append(author_template%(last, first))
        eds_suffix = ""
        if editors_latex:
            eds_suffix = " (Eds.)"
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.)"
                
        return AUTHOR_SEPARATER.join(editors_latex) + eds_suffix
        
    def render(self, counter):
        authors_latex = []
        if self.authors:
            for a in self.authors:
                last, first = get_last_first(a.name)
                authors_latex.append(author_template%(last, first))
        
        editors_latex = []
        if self.editors:
            for e in self.editors:
                last, first = get_last_first(e.name)
                if last and first:
                    editors_latex.append(author_template%(last, first))
        
        author_editor_string = ""
        editors_string = ""
        if editors_latex:
            eds_suffix = " (Eds.)"
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.)"
                    
        if authors_latex:
            author_editor_string = AUTHOR_SEPARATER.join(authors_latex)
            if editors_latex:
                editors_string = ", " + AUTHOR_SEPARATER.join(editors_latex) + eds_suffix
        elif editors_latex:
            author_editor_string = AUTHOR_SEPARATER.join(editors_latex) + eds_suffix

        isbn = ''
        if self.isbn:
            isbn = isbn_template%(self.isbn)
        return latex_template_book%(counter, author_editor_string, self.title, editors_string, self.publisher, self.year, isbn, self.isiscb_id, self.isiscb_id)


In [962]:
latex_template_article = """
\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s.  ``%s.'' \\textit{%s} %s%s (%s)%s. \\textsc{\\href{https://data.isiscb.org/isis/citation/%s}{%s}}.
\\vspace{2ex} \\end{footnotesize}
\\paragraph{}
\n
"""
volume_template = " %s"
issue_template = ", %s"
pages_template = ": %s"
class Article(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.title = ''
        self.year = 0
        self.journal = ''
        self.publisher = ''
        self.isbn = ''
        self.volume = ''
        self.issue = ''
        self.pages = ''
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def render(self, counter):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        vol = ''
        if self.volume:
            vol = volume_template%(self.volume)
        issue = ''
        if self.issue:
            issue = issue_template%(self.issue)
        pages = ''
        if self.pages:
            pages = pages_template%(self.pages)
            
        return latex_template_article%(counter, AUTHOR_SEPARATER.join(authors_latex), self.title, self.journal, vol, issue, self.year, pages, self.isiscb_id, self.isiscb_id)


In [963]:
latex_template_chapter = """
\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s.  ``%s.'' In \\textit{%s.}, edited by %s (%s) [ref. %s], %s. \\textsc{\\href{https://data.isiscb.org/isis/citation/%s}{%s}}.
\\vspace{2ex} \\end{footnotesize}
\\paragraph{}
\n
"""

class Chapter(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.editors = []
        self.title = ''
        self.year = 0
        self.publisher = ''
        self.book = None
        self.pages = ''
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def get_editor_string(self):
        editors_latex = []
        if self.editors:
            for e in self.editors:
                last, first = get_last_first(e.name)
                if last and first:
                    editors_latex.append(author_template%(last, first))
        eds_suffix = ""
        if editors_latex:
            eds_suffix = " (Eds.)"
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.)"
        
        return AUTHOR_SEPARATER.join(editors_latex) + eds_suffix
        
        
    def render(self, counter):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        return latex_template_chapter%(counter, AUTHOR_SEPARATER.join(authors_latex), self.title, self.book.title, self.book.get_editor_string() if self.book.get_editor_string() else self.get_editor_string(), self.year, self.book.index, self.pages, self.isiscb_id, self.isiscb_id)


In [964]:
def create_author(author_string):
    author = Author()
    parts = author_string.split('||')
    for part in parts:
        part = part.strip()
        if part.startswith('AuthorityName '):
            author.name = part[len('AuthorityName '):]
            continue
        if part.startswith('ACRDisplayOrder '):
            author.order = part[len('ACRDisplayOrder '):]
    return author

In [965]:
def get_publisher(publisher_string):
    parts = publisher_string.split('||')
    for part in parts:
        part = part.strip()
        if part.startswith('AuthorityName '):
            return part[len('AuthorityName '):]

In [966]:
def build_book(row):
    book = Book()
    book.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        if a.strip():
            book.authors.append(create_author(a))
        
    def get_order(author):
        return author.order
    book.authors.sort(key=get_order)
        
    editor_strings = row['Editor'].split('//')
    for e in editor_strings:
        book.editors.append(create_author(e))
    
    book.title = row['Title']
    book.year = row['Year of publication']
    book.publisher = get_publisher(row['Place Publisher'])
    book.isbn = row['ISBN']
    
    related_citations = row['Related Citations'].split('//')
    for rel_cit in related_citations:
        if rel_cit.strip():
            cit_info = rel_cit.split("||")
            for info in cit_info:
                if info.startswith("CitationID "):
                    book.chapter_ids.append(info[len("CitationID "):])
                    break
    
    
    return book

In [967]:
def build_chapter(row):
    chapter = Chapter()
    chapter.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        chapter.authors.append(create_author(a))
        
    def get_order(author):
        return author.order
    chapter.authors.sort(key=get_order)
    
    chapter.title = row['Title']
    chapter.year = row['Year of publication']
    
    editor_strings = row['Editor'].split('//')
    for e in editor_strings:
        chapter.editors.append(create_author(e))
    
    related_citations = row['Related Citations'].split("//")
    for cit in related_citations:
        cit_parts = cit.split("||")
        book_title = ""
        book_id = ""
        is_book_info = True
        for part in cit_parts:
            part = part.strip()
            if part.startswith("CCRType "):
                ccrType = part[len("CCRType "):]
                if ccrType.strip() != "Includes Chapter":
                    is_book_info = False
            if part.startswith("CitationTitle "):
                book_title = part[len("CitationTitle "):]
            if part.startswith("CitationID "):
                book_id = part[len("CitationID "):].strip()
                
        if is_book_info:
            chapter.book = Book()
            chapter.book.title = book_title
            chapter.book.isiscb_id = book_id
            break;
            
    pages = row['Pages Free Text']
    if pages:
        chapter.pages = pages[:pages.index("(From")].strip() if "(From" in pages else ""
                          
    return chapter        

In [968]:
def build_article(row):
    article = Article()
    
    article.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        article.authors.append(create_author(a))
    
    article.title = row['Title']
    article.year = row['Year of publication']
    article.publisher = get_publisher(row['Place Publisher'])
    article.journal = row['Journal Link']
    
    vol = row['Journal Volume']
    if vol:
        article.volume = vol[:vol.index("(From")].strip() if "(From" in vol else ""
    
    issue = row['Journal Issue']
    if issue:
        article.issue = issue[:issue.index("(From")].strip() if "(From" in issue else ""
    
    pages = row['Pages Free Text']
    if pages:
        article.pages = pages[:pages.index("(From")].strip() if "(From" in pages else ""
    return article

In [969]:
with open(export_file) as csvfile:
    reader = csv.DictReader(csvfile)
    docs = []
    docs_by_id = {}
    for row in reader:
        doc = None
        if row['Record Type'] == "Book":
            doc = build_book(row)
        elif row['Record Type'] == "Article":
            doc = build_article(row)
        elif row['Record Type'] == "Chapter":
            doc = build_chapter(row)

        if doc:
            doc.category_number = row['CategoryNumbers'].strip()
            doc.print_date = row['Published Print'].strip()
            docs.append(doc)
            docs_by_id[doc.isiscb_id] = doc
            
    def get_sort_by_value(doc):
        return doc.get_sort_by_value()

    docs.sort(key=get_sort_by_value)
    counter = 1
    for doc in docs:
        doc.index = counter
        counter += 1
        
    with open(result_file_path, 'w') as result_file:
        for doc in docs:
            if type(doc) == Chapter and doc.book.isiscb_id in docs_by_id:
                book = docs_by_id[doc.book.isiscb_id]
                if book:
                    doc.book = book
            if type(doc) == Book and doc.chapter_ids:
                for chap_id in doc.chapter_ids:
                    if chap_id in docs_by_id:
                        doc.chapters.append(docs_by_id[chap_id])
            # print all classified records (e.g., have Category Number) 
            # that have not been previously printed (e.g., have Published Print data).
            if doc.category_number and not doc.print_date:
                print("Writing " + str(doc.isiscb_id))
                result_file.write(doc.render(doc.index).strip())


Writing CBB308395057
Writing CBB453708168
Writing CBB672417584
Writing CBB291156817
Writing CBB950706964
Writing CBB353594942
Writing CBB199416066
Writing CBB059160432
Writing CBB090740798
Writing CBB907943319
Writing CBB773087293
Writing CBB221441913
Writing CBB509643575
Writing CBB115450433
Writing CBB288354648
Writing CBB624645272
Writing CBB602324921
Writing CBB919798201
Writing CBB019390551
Writing CBB440677065
Writing CBB809567866
Writing CBB287870208
Writing CBB320944474
Writing CBB552773139
Writing CBB875455759
Writing CBB829647867
Writing CBB638968957
Writing CBB536128730
Writing CBB241308101
Writing CBB973206853
Writing CBB237482450
Writing CBB200080234
Writing CBB171736085
Writing CBB202856288
Writing CBB101471819
Writing CBB567572610
Writing CBB336239265
Writing CBB418039421
Writing CBB585946379
Writing CBB874060847
Writing CBB206197364
Writing CBB060346560
Writing CBB354002504
Writing CBB441805648
Writing CBB802103774
Writing CBB039822113
Writing CBB138957616
Writing CBB13