In [1968]:
import csv

In [1969]:
export_file = '/Users/jdamerow/UpConsulting/files/latex/test_file.csv'
result_file_path = '/Users/jdamerow/UpConsulting/files/latex/result.txt'

In [1970]:

author_template = "\\textsc{%s}, %s"
isbn_template = "\\textsc{isbn}: %s."

In [1971]:
AUTHOR_SEPARATER = "; "
CHAPTER_SEPARATER = "; "

In [1972]:
def get_last_first(name):
    if ',' in name:
        last = name[:name.index(',')]
        first = name[name.index(',')+1:]
    else:
        last = name[name.rfind(" "):].strip()
        first = name[:name.rfind(" ")].strip()
    return last, first

In [1973]:
class Author:
    def __init__(self):
        self.name = ''
        self.order = 0
        
    def get_last_name(self):
        return get_last_first(self.name)[0]
        
    def get_first_name(self):
        return get_last_first(self.name)[1]
        
    def get_name_for_sort(self):
        return self.get_last_name() + ", " + self.get_first_name()
    
    def get_order():
        return self.order

In [1974]:
def build_persons_latex_list(persons):
    persons_latex = []
    if persons:
        for p in persons:
            last, first = get_last_first(p.name)
            if last and first:
                persons_latex.append(author_template%(last, first))
    return persons_latex
  

In [1975]:
class Document:
    def __init__(self):
        self.category_number = ''
        self.print_date = ''
        self.index = 0
        self.isiscb_id = ''
        self.title = ''
        self.year = 0

In [1976]:
latex_template_book = """
 
 
\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s. \\textit{%s}%s. %s, %s. %s \\textsc{\\href{https://data.isiscb.org/isis/citation/%s}{%s}}.
%s

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""

latex_template_book_chapters = """
\\begin{isisdescription} Includes:  %s, 
``\\selectlanguage{frenchb}%s\\selectlanguage{english}''%s%s.\\end{isisdescription}
"""

chapter_ref=" [ref.~%s]"
editors_template = ", %s (Eds.)"
pages_template = " %s"

class Book(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.editors = []
        self.title = ''
        self.year = 0
        self.publisher = ''
        self.isbn = ''
        self.chapters = []
        self.chapter_ids = []
        
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in (self.authors if self.authors else self.editors)])
        
    def get_editor_string(self):
        editors_latex = []
        if self.editors:
            for e in self.editors:
                last, first = get_last_first(e.name)
                if last and first:
                    editors_latex.append(author_template%(last, first))
        eds_suffix = ""
        if editors_latex:
            eds_suffix = " (Eds.)"
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.)"
                
        return AUTHOR_SEPARATER.join(editors_latex) + eds_suffix
        
    def render(self, counter):
        authors_latex = build_persons_latex_list(self.authors)
        editors_latex = build_persons_latex_list(self.editors)
        
        author_editor_string = ""
        editors_string = ""
        if editors_latex:
            eds_suffix = " (Eds.)"
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.)"
                    
        if authors_latex:
            author_editor_string = AUTHOR_SEPARATER.join(authors_latex)
            if editors_latex:
                editors_string = ", " + AUTHOR_SEPARATER.join(editors_latex) + eds_suffix
        elif editors_latex:
            author_editor_string = AUTHOR_SEPARATER.join(editors_latex) + eds_suffix

        isbn = ''
        if self.isbn:
            isbn = isbn_template%(self.isbn)
            
        # render chapters
        chapter_latex = []
        if self.chapters:
            for chap in self.chapters:
                chapter_authors = build_persons_latex_list(chap.authors)
                
                chap_pages = ''
                if chap.pages:
                    chap_pages = pages_template%(chap.pages)
                    
                chap_ref = ""
                if chap.index:
                    chap_ref = chapter_ref%(chap.index)

                chapter_latex.append(latex_template_book_chapters%(AUTHOR_SEPARATER.join(chapter_authors), chap.title, chap_pages, chap_ref))
            
        return latex_template_book%(counter, author_editor_string, self.title, editors_string,  
                                    self.publisher, self.year, isbn, self.isiscb_id, self.isiscb_id, CHAPTER_SEPARATER.join(chapter_latex))
                                    


In [1977]:
latex_template_article = """
 
 
\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s.  ``%s.'' \\textit{%s} %s%s (%s)%s. \\textsc{\\href{https://data.isiscb.org/isis/citation/%s}{%s}}.

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""
volume_template = " %s"
issue_template = ", %s"
pages_template = ": %s"
class Article(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.title = ''
        self.year = 0
        self.journal = ''
        self.publisher = ''
        self.isbn = ''
        self.volume = ''
        self.issue = ''
        self.pages = ''
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def render(self, counter):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        vol = ''
        if self.volume:
            vol = volume_template%(self.volume)
        issue = ''
        if self.issue:
            issue = issue_template%(self.issue)
        pages = ''
        if self.pages:
            pages = pages_template%(self.pages)
            
        return latex_template_article%(counter, AUTHOR_SEPARATER.join(authors_latex), self.title, self.journal, vol, issue, self.year, pages, self.isiscb_id, self.isiscb_id)


In [1978]:
latex_template_chapter = """
 
\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s.  ``%s.'' In \\textit{%s.}, edited by %s (%s) [ref. %s], %s. \\textsc{\\href{https://data.isiscb.org/isis/citation/%s}{%s}}.

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""

class Chapter(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.editors = []
        self.title = ''
        self.year = 0
        self.publisher = ''
        self.book = None
        self.pages = ''
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def get_editor_string(self):
        editors_latex = []
        if self.editors:
            for e in self.editors:
                last, first = get_last_first(e.name)
                if last and first:
                    editors_latex.append(author_template%(last, first))
        eds_suffix = ""
        if editors_latex:
            eds_suffix = " (Eds.)"
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.)"
        
        return AUTHOR_SEPARATER.join(editors_latex) + eds_suffix
        
        
    def render(self, counter):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        return latex_template_chapter%(counter, AUTHOR_SEPARATER.join(authors_latex), self.title, self.book.title, self.book.get_editor_string() if self.book.get_editor_string() else self.get_editor_string(), self.year, self.book.index, self.pages, self.isiscb_id, self.isiscb_id)


In [1979]:
latex_template_thesis = """
 
\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s. ``%s.'' Dissertation at %s  (%s). \\textsc{\\href{https://data.isiscb.org/isis/citation/%s}{%s}}.

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""
class Thesis(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.isbn = ''
        self.school = ''
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def render(self, counter):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        return latex_template_thesis%(counter, AUTHOR_SEPARATER.join(authors_latex), self.title, self.school, self.year, self.isiscb_id, self.isiscb_id)

    

In [1980]:
latex_template_media = """

\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s. ``%s.'' (%s). \\textsc{\\href{https://data.isiscb.org/isis/citation/%s}{%s}}.

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""

class Media(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def render(self, counter):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        return latex_template_media%(counter, AUTHOR_SEPARATER.join(authors_latex), self.title, self.year, self.isiscb_id, self.isiscb_id)
    

In [1981]:
latex_template_essay_review = """

\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s.  ``%s.'' \\textit{%s} %s%s (%s)%s. \\textsc{\\href{https://data.isiscb.org/isis/citation/%s}{%s}}.
\\begin{isisdescription}Essay review of %s, \\emph{%s}  (%s)%s.\\end{isisdescription}

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""

volume_template = " %s"
issue_template = ", %s"
pages_template = ": %s"
ref_template = " [ref. %s]"

class EssayReview(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.journal = ''
        self.publisher = ''
        self.volume = ''
        self.issue = ''
        self.pages = ''
        self.book = None
     
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
   
    def render(self, counter):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        vol = ''
        if self.volume:
            vol = volume_template%(self.volume)
        issue = ''
        if self.issue:
            issue = issue_template%(self.issue)
        pages = ''
        if self.pages:
            pages = pages_template%(self.pages)
            
        book_authors_latex = build_persons_latex_list(self.book.authors)
        book_editors_latex = build_persons_latex_list(self.book.editors)
        
        book_author_editor_string = ""
        book_editors_string = ""
        if book_editors_latex:
            eds_suffix = " (Eds.)"
            if len(book_editors_latex) == 1:
                eds_suffix = " (Ed.)"
                    
        if book_authors_latex:
            book_author_editor_string = AUTHOR_SEPARATER.join(book_authors_latex)
            if book_editors_latex:
                book_editors_string = ", " + AUTHOR_SEPARATER.join(book_editors_latex) + eds_suffix
        elif book_editors_latex:
            book_author_editor_string = AUTHOR_SEPARATER.join(book_editors_latex) + eds_suffix
            
        book_ref_info = ""
        if self.book.index:
            book_ref_info = ref_template%(self.book.index)
            
        return latex_template_essay_review%(counter, AUTHOR_SEPARATER.join(authors_latex), self.title, 
                                            self.journal, vol, issue, self.year, pages, self.isiscb_id, 
                                            self.isiscb_id, book_author_editor_string, self.book.title, self.book.year, book_ref_info)

    

In [1982]:
def create_author(author_string):
    author = Author()
    parts = author_string.split('||')
    for part in parts:
        part = part.strip()
        if part.startswith('AuthorityName '):
            author.name = part[len('AuthorityName '):]
            continue
        if part.startswith('ACRDisplayOrder '):
            author.order = part[len('ACRDisplayOrder '):]
    return author

In [1983]:
def get_publisher(publisher_string):
    parts = publisher_string.split('||')
    for part in parts:
        part = part.strip()
        if part.startswith('AuthorityName '):
            return part[len('AuthorityName '):]

In [1984]:
def build_book(row):
    book = Book()
    book.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        if a.strip():
            book.authors.append(create_author(a))
        
    def get_order(author):
        return author.order
    book.authors.sort(key=get_order)
        
    editor_strings = row['Editor'].split('//')
    for e in editor_strings:
        book.editors.append(create_author(e))
    
    book.title = row['Title']
    book.year = row['Year of publication']
    book.publisher = get_publisher(row['Place Publisher'])
    book.isbn = row['ISBN']
    
    related_citations = row['Related Citations'].split('//')
    for rel_cit in related_citations:
        if rel_cit.strip():
            cit_info = rel_cit.split("||")
            cit_id = ''
            type_is_chapter = False
            for info in cit_info:
                info = info.strip()
                if info.startswith("CitationID "):
                    cit_id = info[len("CitationID "):].strip()
                if info.startswith("CCRType  "):
                    ccrType = info[len("CCRType "):].strip()
                    if ccrType.strip() == "Includes Chapter":
                        type_is_chapter = True
                    
            if type_is_chapter and cit_id:
                book.chapter_ids.append(cit_id)                    
    
    return book

In [1985]:
def build_chapter(row):
    chapter = Chapter()
    chapter.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        chapter.authors.append(create_author(a))
        
    def get_order(author):
        return author.order
    chapter.authors.sort(key=get_order)
    
    chapter.title = row['Title']
    chapter.year = row['Year of publication']
    
    editor_strings = row['Editor'].split('//')
    for e in editor_strings:
        chapter.editors.append(create_author(e))
    
    related_citations = row['Related Citations'].split("//")
    for cit in related_citations:
        cit_parts = cit.split("||")
        book_title = ""
        book_id = ""
        is_book_info = True
        for part in cit_parts:
            part = part.strip()
            if part.startswith("CCRType "):
                ccrType = part[len("CCRType "):]
                if ccrType.strip() != "Includes Chapter":
                    is_book_info = False
            if part.startswith("CitationTitle "):
                book_title = part[len("CitationTitle "):]
            if part.startswith("CitationID "):
                book_id = part[len("CitationID "):].strip()
                
        if is_book_info:
            chapter.book = Book()
            chapter.book.title = book_title
            chapter.book.isiscb_id = book_id
            break;
            
    pages = row['Pages Free Text']
    if pages:
        chapter.pages = pages[:pages.index("(From")].strip() if "(From" in pages else ""
                          
    return chapter        

In [1986]:
def build_article(row):
    article = Article()
    
    article.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        article.authors.append(create_author(a))
    
    article.title = row['Title']
    article.year = row['Year of publication']
    article.publisher = get_publisher(row['Place Publisher'])
    article.journal = row['Journal Link']
    
    vol = row['Journal Volume']
    if vol:
        article.volume = vol[:vol.index("(From")].strip() if "(From" in vol else ""
    
    issue = row['Journal Issue']
    if issue:
        article.issue = issue[:issue.index("(From")].strip() if "(From" in issue else ""
    
    pages = row['Pages Free Text']
    if pages:
        article.pages = pages[:pages.index("(From")].strip() if "(From" in pages else ""
    return article

In [1987]:
def build_thesis(row):
    thesis = Thesis()
    
    thesis.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        thesis.authors.append(create_author(a))
    
    thesis.title = row['Title']
    thesis.year = row['Year of publication']
    
    school_string = row['School']
    for part in school_string.split('||'):
        if part.strip().startswith('AuthorityName'):
            thesis.school = part[len("AuthorityName "):].strip()
            
    return thesis

In [1988]:
def build_media(row):
    media = Media()
    media.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        media.authors.append(create_author(a))
   
    media.title = row['Title']
    media.year = row['Year of publication']
    
    return media

In [1989]:
def build_essay_review(row):
    essay_review = EssayReview()
    
    essay_review.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        essay_review.authors.append(create_author(a))
    
    essay_review.title = row['Title']
    essay_review.year = row['Year of publication']
    essay_review.publisher = get_publisher(row['Place Publisher'])
    essay_review.journal = row['Journal Link']
    
    vol = row['Journal Volume']
    if vol:
        essay_review.volume = vol[:vol.index("(From")].strip() if "(From" in vol else ""
    
    issue = row['Journal Issue']
    if issue:
        essay_review.issue = issue[:issue.index("(From")].strip() if "(From" in issue else ""
    
    pages = row['Pages Free Text']
    if pages:
        essay_review.pages = pages[:pages.index("(From")].strip() if "(From" in pages else ""
        
    related_citations = row['Related Citations'].split("//")
    for cit in related_citations:
        cit_parts = cit.split("||")
        book_title = ""
        book_id = ""
        is_book_info = True
        for part in cit_parts:
            part = part.strip()
            if part.startswith("CCRType "):
                ccrType = part[len("CCRType "):]
                if ccrType.strip() != "Is Reviewed By":
                    is_book_info = False
            if part.startswith("CitationTitle "):
                book_title = part[len("CitationTitle "):]
            if part.startswith("CitationID "):
                book_id = part[len("CitationID "):].strip()
                
        if is_book_info:
            essay_review.book = Book()
            essay_review.book.title = book_title
            essay_review.book.isiscb_id = book_id
            break;
            
    return essay_review

In [1990]:
with open(export_file) as csvfile:
    reader = csv.DictReader(csvfile)
    docs = []
    docs_by_id = {}
    for row in reader:
        doc = None
        if row['Record Type'] == "Book":
            doc = build_book(row)
        elif row['Record Type'] == "Article":
            doc = build_article(row)
        elif row['Record Type'] == "Chapter":
            doc = build_chapter(row)
        elif row['Record Type'] == "Thesis":
            doc = build_thesis(row)
        elif row['Record Type'] == "Multimedia Object":
            doc = build_media(row)
        elif row['Record Type'] == "Essay Review":
            doc = build_essay_review(row)

        if doc and not doc.isiscb_id in docs_by_id:
            doc.category_number = row['CategoryNumbers'].strip()
            doc.print_date = row['Published Print'].strip()
            docs.append(doc)
            docs_by_id[doc.isiscb_id] = doc
            
    def get_sort_by_value(doc):
        return doc.get_sort_by_value()

    docs.sort(key=get_sort_by_value)
    counter = 1
    for doc in docs:
        if doc.category_number and not doc.print_date:
            doc.index = counter
            counter += 1
        
    with open(result_file_path, 'w') as result_file:
        for doc in docs:
            if (type(doc) in [Chapter, EssayReview]) and doc.book.isiscb_id in docs_by_id:
                book = docs_by_id[doc.book.isiscb_id]
                if book:
                    doc.book = book
            if type(doc) == Book and doc.chapter_ids:
                for chap_id in doc.chapter_ids:
                    if chap_id in docs_by_id:
                        doc.chapters.append(docs_by_id[chap_id])
            # print all classified records (e.g., have Category Number) 
            # that have not been previously printed (e.g., have Published Print data).
            if doc.category_number and not doc.print_date:
                print("Writing " + str(doc.isiscb_id))
                result_file.write(doc.render(doc.index).strip() + "\n")


Writing CBB777396056
Writing CBB308395057
Writing CBB453708168
Writing CBB176684710
Writing CBB672417584
Writing CBB263243235
Writing CBB291156817
Writing CBB950706964
Writing CBB353594942
Writing CBB199416066
Writing CBB059160432
Writing CBB351504062
Writing CBB144762770
Writing CBB363555597
Writing CBB333984588
Writing CBB090740798
Writing CBB200504751
Writing CBB907943319
Writing CBB773087293
Writing CBB221441913
Writing CBB509643575
Writing CBB614468631
Writing CBB115450433
Writing CBB288354648
Writing CBB624645272
Writing CBB602324921
Writing CBB919798201
Writing CBB621114449
Writing CBB161127975
Writing CBB019390551
Writing CBB398494498
Writing CBB470748088
Writing CBB440677065
Writing CBB568132797
Writing CBB809567866
Writing CBB287870208
Writing CBB320944474
Writing CBB334513367
Writing CBB552773139
Writing CBB875455759
Writing CBB918689629
Writing CBB709882403
Writing CBB829647867
Writing CBB638968957
Writing CBB536128730
Writing CBB241308101
Writing CBB973206853
Writing CBB23