In [1248]:
import csv

### Proof flag
Change the following to `IS_PROOF = False` if links should go to public IsisCB site.

In [1249]:
IS_PROOF = True

### File paths
Set the following paths to files that this notebook should read or write.
- `export_file`: path to file that contains the IsisCB export (should be a csv file)
- `result_file_path`: path to file that will contain all publications except book reviews
- `reviews_file_path`: path to file that will contain all book reviews

If you run this notebook on a Windows machine, make sure to encode all backslashes in path names (e.g. `\` should be `\\`).

In [1250]:
export_file =  '/Users/jdamerow/UpConsulting/files/latex/printdata.csv' #'/Users/jdamerow/UpConsulting/files/latex/journal--export.csv' # full-test-two.csv' '/Users/jdamerow/UpConsulting/files/latex/test_file.csv' #
result_file_path = '/Users/jdamerow/UpConsulting/files/latex/result.txt'
reviews_file_path = '/Users/jdamerow/UpConsulting/files/latex/reviews.txt'
errors_file_path = '/Users/jdamerow/UpConsulting/files/latex/errors.csv'

### Code
The following cells contain the code that will turn the exported csv file into latex. Do not change unless you know what you are doing.

In [1251]:
author_template = "\\textsc{%s}, %s"
author_template_organization = "\\textsc{%s}"
isbn_template = "\\textsc{isbn}: %s."
CBB_link_template = "\\textsc{\\href{https://data.isiscb.org/isis/citation/%s}{%s}}"
CBB_link_template_proof = "\\textsc{\\href{https://data.isiscb.org/isis/curation/citation/%s}{%s}}"

In [1252]:
AUTHOR_SEPARATER = "; "
CHAPTER_SEPARATER = "; "

In [1253]:
class DocumentError():
    def __init__(self, doc, error, error_type):
        self.doc = doc
        self.error = error
        self.error_type = error_type
        

In [1254]:
errors = []

In [1255]:
def generate_link_latex(cbb_nr):
    if IS_PROOF:
        return CBB_link_template_proof%(cbb_nr, cbb_nr)
    return CBB_link_template%(cbb_nr, cbb_nr)

In [1256]:
def get_last_first(name):
    if ',' in name:
        last = name[:name.index(',')]
        first = name[name.index(',')+1:]
    else:
        last = name[name.rfind(" "):].strip()
        first = name[:name.rfind(" ")].strip()
    return last, first

In [1257]:
class Author:
    def __init__(self):
        self.name = ''
        self.order = 0
        self.type = ""
        
    def get_last_name(self):
        if type == "Person":
            return get_last_first(self.name)[0]
        return self.name
        
    def get_first_name(self):
        if type == "Person":
            return get_last_first(self.name)[1]
        return ""
        
    def get_name_for_sort(self):
        return self.get_last_name().lower() + ", " + self.get_first_name().lower()
    
    def get_order(self):
        try:
            float(self.order)
            return float(self.order)
        except:
            print("Not a valid order value " + str(self.order))
            return 0

In [1258]:
def build_persons_latex_list(persons):
    persons_latex = []
    if persons:
        for p in persons:
            if p.type == 'Person':
                last, first = get_last_first(p.name)
                persons_latex.append(author_template%(last, first))
            else:
                persons_latex.append(author_template_organization%(p.name))
    return persons_latex
  

In [1259]:
def prepare_author_editor_string(author_editors_latex):
    author_editor_string = AUTHOR_SEPARATER.join(author_editors_latex).strip()
    if author_editor_string:
        return author_editor_string + ". "
    return ""

In [1260]:
class Document:
    def __init__(self):
        self.category_number = ''
        self.print_date = ''
        self.index = 0
        self.isiscb_id = ''
        self.title = ''
        self.year = 0

In [1261]:
latex_template_book = """
 
 
\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s\\textit{%s}%s. %s, %s. %s %s.
%s

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""

latex_template_book_chapters = """
\\begin{isisdescription} Includes:  %s.\\end{isisdescription}
"""

latex_template_book_chapter_item = """
%s, ``\\selectlanguage{frenchb}%s\\selectlanguage{english}''%s%s
"""

chapter_ref=" [ref.~%s]"
editors_template = ", %s (Eds.)"
pages_template = " %s"

class Book(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.editors = []
        self.title = ''
        self.year = 0
        self.publisher = ''
        self.isbn = ''
        self.chapters = []
        self.chapter_ids = []
        
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in (self.authors if self.authors else self.editors)])
        
    def get_editor_string(self):
        editors_latex = []
        if self.editors:
            for e in self.editors:
                last, first = get_last_first(e.name)
                if last and first:
                    editors_latex.append(author_template%(last, first))
        eds_suffix = ""
        if editors_latex:
            eds_suffix = " (Eds.) "
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.) "
                
        return prepare_author_editor_string(editors_latex) + eds_suffix
        
    def render(self, counter):
        authors_latex = build_persons_latex_list(self.authors)
        editors_latex = build_persons_latex_list(self.editors)
        
        author_editor_string = ""
        editors_string = ""
        if editors_latex:
            eds_suffix = " (Eds.) "
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.) "
                    
        if authors_latex:
            author_editor_string = prepare_author_editor_string(authors_latex)
            if editors_latex:
                editors_string = ", " + prepare_author_editor_string(editors_latex) + eds_suffix
        elif editors_latex:
            author_editor_string = prepare_author_editor_string(editors_latex) + eds_suffix

        isbn = ''
        if self.isbn:
            isbn = isbn_template%(self.isbn)
            
        # render chapters
        chapter_string = ""
        if self.chapters:
            chapter_latex = []
            for chap in self.chapters:
                chapter_authors = build_persons_latex_list(chap.authors)
                
                chap_pages = ''
                if chap.pages:
                    chap_pages = pages_template%(chap.pages)
                    
                chap_ref = ""
                if chap.index:
                    chap_ref = chapter_ref%(chap.index)

                chapter_latex.append((latex_template_book_chapter_item%(prepare_author_editor_string(chapter_authors), chap.title, chap_pages, chap_ref)).strip())
            
            if chapter_latex:
                chapter_string = latex_template_book_chapters%(CHAPTER_SEPARATER.join(chapter_latex))
        return latex_template_book%(counter, author_editor_string, self.title, editors_string,  
                                    self.publisher, self.year, isbn, generate_link_latex(self.isiscb_id), chapter_string)
                                    


In [1262]:
latex_template_article = """
 
 
\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s``%s.'' \\textit{%s} %s%s (%s)%s. %s.

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""
volume_template = " %s"
issue_template = ", no. %s"
pages_template = ": %s"
class Article(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.title = ''
        self.year = 0
        self.journal = ''
        self.journal_abbr = ''
        self.publisher = ''
        self.isbn = ''
        self.volume = ''
        self.issue = ''
        self.pages = ''
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def render(self, counter):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        vol = ''
        if self.volume:
            vol = volume_template%(self.volume)
        issue = ''
        if self.issue:
            issue = issue_template%(self.issue)
        pages = ''
        if self.pages:
            pages = pages_template%(self.pages)
            
        journal_text = self.journal_abbr if self.journal_abbr.strip() else self.journal
        if not journal_text:
            errors.append(DocumentError(self, "No Journal abbreviation or title found.", "ERROR"))
        return latex_template_article%(counter, prepare_author_editor_string(authors_latex), self.title, journal_text, vol, issue, self.year, pages, generate_link_latex(self.isiscb_id))


In [1263]:
latex_template_chapter = """
 
\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s``%s.'' In \\textit{%s.}, edited by %s (%s) [ref. %s], %s. %s.

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""

class Chapter(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.editors = []
        self.title = ''
        self.year = 0
        self.publisher = ''
        self.book = None
        self.pages = ''
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def get_editor_string(self):
        editors_latex = []
        if self.editors:
            for e in self.editors:
                last, first = get_last_first(e.name)
                if last and first:
                    editors_latex.append(author_template%(last, first))
        eds_suffix = ""
        if editors_latex:
            eds_suffix = " (Eds.) "
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.) "
        
        return prepare_author_editor_string(editors_latex) + eds_suffix
        
        
    def render(self, counter):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        return latex_template_chapter%(counter, prepare_author_editor_string(authors_latex), self.title, self.book.title, self.book.get_editor_string() if self.book.get_editor_string() else self.get_editor_string(), self.year, self.book.index, self.pages, generate_link_latex(self.isiscb_id))


In [1264]:
latex_template_thesis = """
 
\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s``%s.'' Dissertation at %s  (%s). %s.

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""
class Thesis(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.isbn = ''
        self.school = ''
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def render(self, counter):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        return latex_template_thesis%(counter, prepare_author_editor_string(authors_latex), self.title, self.school, self.year, generate_link_latex(self.isiscb_id))

    

In [1265]:
latex_template_media = """

\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s``%s.'' (%s). %s.

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""

class Media(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def render(self, counter):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        return latex_template_media%(counter, prepare_author_editor_string(authors_latex), self.title, self.year, generate_link_latex(self.isiscb_id))
    

In [1266]:
latex_template_essay_review = """

\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s``%s.'' \\textit{%s} %s%s (%s)%s. %s.
\\begin{isisdescription}Essay review of %s, \\emph{%s}  (%s)%s.\\end{isisdescription}

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""

volume_template = " %s"
issue_template = ", no. %s"
pages_template = ": %s"
ref_template = " [ref. %s]"

class EssayReview(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.journal = ''
        self.journal_abbr = ''
        self.publisher = ''
        self.volume = ''
        self.issue = ''
        self.pages = ''
        self.book = None
     
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
   
    def render(self, counter):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        vol = ''
        if self.volume:
            vol = volume_template%(self.volume)
        issue = ''
        if self.issue:
            issue = issue_template%(self.issue)
        pages = ''
        if self.pages:
            pages = pages_template%(self.pages)
            
        book_authors_latex = build_persons_latex_list(self.book.authors)
        book_editors_latex = build_persons_latex_list(self.book.editors) if type(self.book) == Book else ""
        
        book_author_editor_string = ""
        book_editors_string = ""
        if book_editors_latex:
            eds_suffix = " (Eds.) "
            if len(book_editors_latex) == 1:
                eds_suffix = " (Ed.) "
                    
        if book_authors_latex:
            book_author_editor_string = prepare_author_editor_string(book_authors_latex)
            if book_editors_latex:
                book_editors_string = ", " + prepare_author_editor_string(book_editors_latex) + eds_suffix
        elif book_editors_latex:
            book_author_editor_string = prepare_author_editor_string(book_editors_latex) + eds_suffix
            
        book_ref_info = ""
        if self.book.index:
            book_ref_info = ref_template%(self.book.index)
            
        journal_text = self.journal_abbr if self.journal_abbr.strip() else self.journal
        if not journal_text:
            errors.append(DocumentError(self, "No Journal abbreviation or title found.", "ERROR"))
        
        return latex_template_essay_review%(counter, prepare_author_editor_string(authors_latex), self.title, 
                                            journal_text, vol, issue, self.year, pages, generate_link_latex(self.isiscb_id), 
                                            book_author_editor_string, self.book.title, self.book.year, book_ref_info)

    

In [1267]:
latex_template_book_review="""
\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em}%s. \\textit{%s} %s.%s %s

\\begin{isisdescription}%s \\end{isisdescription}

 \\end{footnotesize}

\\vspace{0.75ex}

"""

latex_book_review = "%s \\textit{%s}  %s (%s)%s.%s"
book_review_ref_template = " [ref. %s]"

class BookReview(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.journal = ''
        self.journal_abbr = ''
        self.publisher = ''
        self.volume = ''
        self.issue = ''
        self.pages = ''
        self.reviewed_book_id = ''
        self.active = True
        
class BookReviews():
    def __init__(self):
        self.book = None
        self.reviews = []
     
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.book.authors])
   
    def get_editor_string(self):
        editors_latex = []
        if self.book.editors:
            for e in self.book.editors:
                last, first = get_last_first(e.name)
                if last and first:
                    editors_latex.append(author_template%(last, first))
        eds_suffix = ""
        if editors_latex:
            eds_suffix = " (Eds.) "
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.) "
                
        return prepare_author_editor_string(editors_latex) + eds_suffix
        
    def render(self, counter):
        authors_latex = build_persons_latex_list(self.book.authors)
        editors_latex = build_persons_latex_list(self.book.editors) if type(self.book) == Book else ""
        
        author_editor_string = ""
        editors_string = ""
        if editors_latex:
            eds_suffix = " (Eds.) "
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.) "
                    
        if authors_latex:
            author_editor_string = prepare_author_editor_string(authors_latex)
            if editors_latex:
                editors_string = ", " + prepare_author_editor_string(editors_latex) + eds_suffix
        elif editors_latex:
            author_editor_string = prepare_author_editor_string(editors_latex) + eds_suffix

        review_latexes = []
        for review in self.reviews:
            review_authors_latex = []
            for a in review.authors:
                name = a.name
                if ',' in name:
                    last = name[:name.index(',')]
                    first = name[name.index(',')+1:]
                else:
                    last = name[name.rfind(" "):].strip()
                    first = name[:name.rfind(" ")].strip()
                review_authors_latex.append(author_template%(last, first))
                
            
            vol = ''
            if review.volume:
                vol = volume_template%(review.volume)
        
            pages = ''
            if review.pages:
                pages = pages_template%(review.pages)
                
            journal_text = review.journal_abbr if review.journal_abbr.strip() else review.journal
            if not journal_text:
                errors.append(DocumentError(review, "No Journal abbreviation or title found.", "ERROR"))
        
            review_latexes.append(latex_book_review%(prepare_author_editor_string(review_authors_latex), journal_text, vol, review.year, pages, generate_link_latex(review.isiscb_id) if IS_PROOF else ""))
            
        book_authors_latex = build_persons_latex_list(self.book.authors)
        book_editors_latex = build_persons_latex_list(self.book.editors) if type(self.book) == Book else ""
        
        book_author_editor_string = ""
        book_editors_string = ""
        if book_editors_latex:
            eds_suffix = " (Eds.) "
            if len(book_editors_latex) == 1:
                eds_suffix = " (Ed.) "
                    
        if book_authors_latex:
            book_author_editor_string = prepare_author_editor_string(book_authors_latex)
            if book_editors_latex:
                book_editors_string = ", " + prepare_author_editor_string(book_editors_latex) + eds_suffix
        elif book_editors_latex:
            book_author_editor_string = prepare_author_editor_string(book_editors_latex) + eds_suffix
            
        book_ref_info = ""
        if self.book.index:
            book_ref_info = book_review_ref_template%(self.book.index)
  
        return latex_template_book_review%(counter, book_author_editor_string, self.book.title, self.book.year, book_ref_info,
                                           generate_link_latex(self.book.isiscb_id) if IS_PROOF else "",
                                           " ".join(review_latexes))


In [1268]:
def create_author(author_string):
    author = Author()
    parts = author_string.split('||')
    if len(parts) == 1:
        return None
    for part in parts:
        part = part.strip()
        if part.startswith('AuthorityName '):
            author.name = part[len('AuthorityName '):]
            continue
        if part.startswith('ACRDisplayOrder '):
            author.order = part[len('ACRDisplayOrder '):].strip()
        if part.startswith('AuthorityType '):
            author.type = part[len('AuthorityType '):]
    return author

In [1269]:
def get_publisher(publisher_string):
    parts = publisher_string.split('||')
    for part in parts:
        part = part.strip()
        if part.startswith('AuthorityName '):
            return part[len('AuthorityName '):].strip().replace('&', '\\&')

In [1270]:
def parse_journal(row, doc):
    parts = row['Journal Link'].split(" || ")
    if len(parts) <= 1:
        doc.journal = row['Journal Link'].strip()
        return
    for part in parts:
        part = part.strip()
        if part.startswith('AuthorityName '):
            doc.journal = part[len('AuthorityName '):].strip().replace('&', '\\&')
            doc.journal_abbr = part[len('Abbreviation '):].strip().replace('&', '\\&')
    

In [1271]:
def build_book(row):
    book = Book()
    book.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        if a.strip():
            author = create_author(a)
            if author:
                book.authors.append(author)
        
    def get_author_order(author):
        return author.get_order()
    book.authors.sort(key=get_author_order)
        
    editor_strings = row['Editor'].split('//')
    for e in editor_strings:
        editor = create_author(e)
        if editor:
            book.editors.append(editor)
    
    book.title = row['Title'].replace('&', '\\&')
    book.year = row['Year of publication']
    book.publisher = get_publisher(row['Place Publisher'])
    book.isbn = row['ISBN']
    
    related_citations = row['Related Citations'].split('//')
    for rel_cit in related_citations:
        if rel_cit.strip():
            cit_info = rel_cit.split("||")
            cit_id = ''
            type_is_chapter = False
            for info in cit_info:
                info = info.strip()
                if info.startswith("CitationID "):
                    cit_id = info[len("CitationID "):].strip()
                if info.startswith("CCRType  "):
                    ccrType = info[len("CCRType "):].strip()
                    if ccrType.strip() == "Includes Chapter":
                        type_is_chapter = True
                    
            if type_is_chapter and cit_id:
                book.chapter_ids.append(cit_id)                    
    
    return book

In [1272]:
def build_chapter(row):
    chapter = Chapter()
    chapter.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        author = create_author(a)
        if author:
            chapter.authors.append(author)
        
    def get_order(author):
        return author.order
    chapter.authors.sort(key=get_order)
    
    chapter.title = row['Title'].replace('&', '\\&')
    chapter.year = row['Year of publication']
    
    editor_strings = row['Editor'].split('//')
    for e in editor_strings:
        editor = create_author(e)
        if editor:
            chapter.editors.append(editor)
    
    related_citations = row['Related Citations'].split("//")
    for cit in related_citations:
        cit_parts = cit.split("||")
        book_title = ""
        book_id = ""
        is_book_info = True
        for part in cit_parts:
            part = part.strip()
            if part.startswith("CCRType "):
                ccrType = part[len("CCRType "):]
                if ccrType.strip() != "Includes Chapter":
                    is_book_info = False
            if part.startswith("CitationTitle "):
                book_title = part[len("CitationTitle "):]
            if part.startswith("CitationID "):
                book_id = part[len("CitationID "):].strip()
                
        if is_book_info:
            chapter.book = Book()
            chapter.book.title = book_title
            chapter.book.isiscb_id = book_id
            break;
            
    pages = row['Pages Free Text']
    if pages:
        chapter.pages = pages[:pages.index("(From")].strip() if "(From" in pages else ""
                          
    return chapter        

In [1273]:
def build_article(row):
    article = Article()
    
    article.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        author = create_author(a)
        if author:
            article.authors.append(author)
    
    article.title = row['Title'].replace('&', '\\&')
    article.year = row['Year of publication']
    article.publisher = get_publisher(row['Place Publisher'])
    parse_journal(row, article)
    
    vol = row['Journal Volume']
    if vol:
        article.volume = vol[:vol.index("(From")].strip() if "(From" in vol else ""
    
    issue = row['Journal Issue']
    if issue:
        article.issue = issue[:issue.index("(From")].strip() if "(From" in issue else ""
    
    pages = row['Pages Free Text']
    if pages:
        article.pages = pages[:pages.index("(From")].strip() if "(From" in pages else ""
    return article

In [1274]:
def build_thesis(row):
    thesis = Thesis()
    
    thesis.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        author = create_author(a)
        if author:
            thesis.authors.append(author)
    
    thesis.title = row['Title'].replace('&', '\\&')
    thesis.year = row['Year of publication']
    
    school_string = row['School']
    for part in school_string.split('||'):
        if part.strip().startswith('AuthorityName'):
            thesis.school = part[len("AuthorityName "):].strip()
            
    return thesis

In [1275]:
def build_media(row):
    media = Media()
    media.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        author = create_author(a)
        if author:
            media.authors.append(author)
   
    media.title = row['Title'].replace('&', '\\&')
    media.year = row['Year of publication']
    
    return media

In [1276]:
def build_essay_review(row):
    essay_review = EssayReview()
    
    essay_review.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        author = create_author(a)
        if author:
            essay_review.authors.append(author)
    
    essay_review.title = row['Title'].replace('&', '\\&')
    essay_review.year = row['Year of publication']
    essay_review.publisher = get_publisher(row['Place Publisher'])
    parse_journal(row, essay_review)
    
    vol = row['Journal Volume']
    if vol:
        essay_review.volume = vol[:vol.index("(From")].strip() if "(From" in vol else ""
    
    issue = row['Journal Issue']
    if issue:
        essay_review.issue = issue[:issue.index("(From")].strip() if "(From" in issue else ""
    
    pages = row['Pages Free Text']
    if pages:
        essay_review.pages = pages[:pages.index("(From")].strip() if "(From" in pages else ""
        
    related_citations = row['Related Citations'].split("//")
    for cit in related_citations:
        cit_parts = cit.split("||")
        book_title = ""
        book_id = ""
        is_book_info = True
        for part in cit_parts:
            part = part.strip()
            if part.startswith("CCRType "):
                ccrType = part[len("CCRType "):]
                if ccrType.strip() != "Is Reviewed By":
                    is_book_info = False
            if part.startswith("CitationTitle "):
                book_title = part[len("CitationTitle "):]
            if part.startswith("CitationID "):
                book_id = part[len("CitationID "):].strip()
                
        if is_book_info:
            essay_review.book = Book()
            essay_review.book.title = book_title
            essay_review.book.isiscb_id = book_id
            break;
            
    return essay_review

In [1277]:
def build_book_review(row):
    book_review = BookReview()
    
    book_review.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        if a.strip():
            author = create_author(a)
            if author:
                book_review.authors.append(author)
        
    def get_order(author):
        return author.order
    book_review.authors.sort(key=get_order)
    
    book_review.year = row['Year of publication']
    parse_journal(row, book_review)
    
    vol = row['Journal Volume']
    if vol:
        book_review.volume = vol[:vol.index("(From")].strip() if "(From" in vol else ""
    
    issue = row['Journal Issue']
    if issue:
        book_review.issue = issue[:issue.index("(From")].strip() if "(From" in issue else ""
    
    pages = row['Pages Free Text']
    if pages:
        book_review.pages = pages[:pages.index("(From")].strip() if "(From" in pages else ""
        
    status = row['Record Nature']
    if not status.startswith('Active'):
        book_review.active = False
        
    related_citations = row['Related Citations'].split("//")
    for cit in related_citations:
        cit_parts = cit.split("||")
        book_title = ""
        book_id = ""
        is_book_info = True
        for part in cit_parts:
            part = part.strip()
            if part.startswith("CCRType "):
                ccrType = part[len("CCRType "):]
                if ccrType.strip() != "Is Reviewed By":
                    is_book_info = False
            if part.startswith("CitationTitle "):
                book_title = part[len("CitationTitle "):]
            if part.startswith("CitationID "):
                book_id = part[len("CitationID "):].strip()
                
        if is_book_info:
            book_review.reviewed_book_id = book_id
            break;
            
    return book_review

In [1278]:
class Classification():
    def __init__(self):
        self.code = ''
        self.code_print = ''
        self.code_for_sorting = -1
        self.name = ''

In [1279]:
with open(export_file, encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    docs = []
    docs_by_id = {}
    reviews_by_book_id = {}
    classifications_by_nr = {}
    docs_by_classification = {}
    
    for row in reader:
        doc = None
        if row['Record Type'] == "Book":
            doc = build_book(row)
        elif row['Record Type'] == "Article":
            doc = build_article(row)
        elif row['Record Type'] == "Chapter":
            doc = build_chapter(row)
        elif row['Record Type'] == "Thesis":
            doc = build_thesis(row)
        elif row['Record Type'] == "Multimedia Object":
            doc = build_media(row)
        elif row['Record Type'] == "Essay Review":
            doc = build_essay_review(row)
        elif row['Record Type'] == "Review":
            doc = build_book_review(row)

        if doc and not doc.isiscb_id in docs_by_id:
            doc.category_number = row['CategoryNumbers'].strip()
            doc.print_date = row['Published Print'].strip()
            if type(doc) != BookReview:
                docs.append(doc)
                docs_by_id[doc.isiscb_id] = doc
                
                # classification
                class_codes = row['CategoryNumbers'].split('//')
                if len(class_codes) > 1:
                    errors.append(DocumentError(doc, "There are more than two classifications.", "ERROR"))
                doc.classifications = []
                for class_code in class_codes:
                    classi = Classification()
                    for part in class_code.split('||'):
                        if part.strip().startswith('ClassificationCode'):
                            code_parts = part[len("ClassificationCode "):].strip().split('-')
                            classi.code = part[len("ClassificationCode "):].strip()
                            while "" in code_parts: code_parts.remove("")
                            try:
                                if len(code_parts) == 1:
                                    classi.code_for_sorting = int(code_parts[0].strip())/1000
                                    classi.code_print = code_parts[0].strip()
                                if len(code_parts) == 2:
                                    classi.code_for_sorting = float(code_parts[1].strip()) + float(code_parts[0].strip())/1000
                                    classi.code_print = code_parts[1].strip() + "-" + code_parts[0].strip()
                            except Exception:
                                print("Category code not valid " + classi.code)
                                doc.category_number = ""
                                
                        if part.strip().startswith('AuthorityName'):
                            classi.name = part[len("AuthorityName "):].strip()
                    if classi.code_print.strip():
                        doc.classifications.append(classi)
                        if classi.code_for_sorting not in classifications_by_nr:
                            classifications_by_nr[classi.code_for_sorting] = classi
                        if classi.code_for_sorting not in docs_by_classification:
                            docs_by_classification[classi.code_for_sorting] = []
                        docs_by_classification[classi.code_for_sorting].append(doc)
                    
            else:
                reviews_list = reviews_by_book_id.get(doc.reviewed_book_id, None)
                if not reviews_list:
                    reviews_by_book_id[doc.reviewed_book_id] = []
                
                if doc.active and not doc.print_date:
                    reviews_by_book_id[doc.reviewed_book_id].append(doc)
            
    def get_sort_by_value(doc):
        return doc.get_sort_by_value()

    #docs.sort(key=get_sort_by_value)
    counter = 1
    for classi_code in sorted(docs_by_classification):
        sorted_docs = docs_by_classification[classi_code]
        sorted_docs.sort(key=get_sort_by_value)
        for doc in sorted_docs:
            if doc.category_number and not doc.print_date:
                doc.index = counter
                counter += 1
        docs_by_classification[classi_code] = sorted_docs
                
                
heading_latex_template = """
\\section*{%s.\\hspace{0.5em}%s}
\\markboth{%s}{%s}
\\addcontentsline{toc}{section}{\\protect\\numberline{%s}%s}

\\paragraph{}
    """
        
with open(result_file_path, 'w', encoding='utf-8') as result_file:
    heading_idx = 1
    for classi_code in sorted(docs_by_classification):
        classification = classifications_by_nr[classi_code]
        heading = heading_latex_template%(str(heading_idx), classification.name, classification.name, classification.name,str(heading_idx) ,classification.name)
        print("Writing " + classification.name)
        print(classi_code)
        result_file.write(heading + "\n")
        heading_idx += 1

        for doc in docs_by_classification[classi_code]:
            if (type(doc) in [Chapter, EssayReview]) and doc.book.isiscb_id in docs_by_id:
                book = docs_by_id[doc.book.isiscb_id]
                if book:
                    doc.book = book
            if type(doc) == Book and doc.chapter_ids:
                for chap_id in doc.chapter_ids:
                    if chap_id in docs_by_id:
                        doc.chapters.append(docs_by_id[chap_id])
            # print all classified records (e.g., have Category Number) 
            # that have not been previously printed (e.g., have Published Print data).
            if doc.category_number and not doc.print_date:
                #print("Writing " + str(doc.isiscb_id))
                result_file.write(doc.render(doc.index).strip() + "\n")

all_book_reviews = []
for book_id in reviews_by_book_id:
    if book_id in docs_by_id:
        book = docs_by_id[book_id]

        if book:
            book_reviews = BookReviews()
            book_reviews.book = book
            book_reviews.reviews = reviews_by_book_id[book_id]
            all_book_reviews.append(book_reviews)

def get_sort_by_value(doc):
    return doc.get_sort_by_value()

all_book_reviews.sort(key=get_sort_by_value)
counter = 1
for doc in all_book_reviews:
    doc.index = "R" + str(counter)
    counter += 1

with open(reviews_file_path, 'w', encoding='utf-8') as reviews_file:
    for reviews in all_book_reviews:
        print("Writing Reviews for" + str(reviews.book.isiscb_id))
        reviews_file.write(reviews.render(reviews.index).strip() + "\n")
        
with open(errors_file_path, 'w', encoding='utf-8') as error_file:
    for error in errors:
        error_file.write(error.doc.isiscb_id + ", " + error.error + "\n")
        


Category code not valid OM
Category code not valid wg
Category code not valid D 37.2 a
Category code not valid CW
Category code not valid D 38 g
Category code not valid OM
Category code not valid wg
Category code not valid op
Category code not valid wd
Category code not valid xg
Category code not valid dj
Category code not valid j
Category code not valid D 36 g
Category code not valid FY
Category code not valid N
Category code not valid we
Category code not valid SA
Category code not valid we
Category code not valid D 39 e
Category code not valid DY
Category code not valid C 28
Category code not valid FY
Category code not valid we
Category code not valid D 36 i
Category code not valid FY
Category code not valid D 36 i
Category code not valid op
Category code not valid D 38 c
Category code not valid D
Category code not valid wf
Category code not valid D
Category code not valid we
Category code not valid D 38 c
Category code not valid D 38 c
Category code not valid D
Category code not va

Writing Reviews forCBB182640253
Writing Reviews forCBB221633675
Writing Reviews forCBB755993162
Writing Reviews forCBB897786905
Writing Reviews forCBB757502558
Writing Reviews forCBB777412720
Writing Reviews forCBB792039626
Writing Reviews forCBB001213160
Writing Reviews forCBB001551068
Writing Reviews forCBB448244889
Writing Reviews forCBB001553344
Writing Reviews forCBB003859647
Writing Reviews forCBB432687357
Writing Reviews forCBB000880371
Writing Reviews forCBB604486246
Writing Reviews forCBB335557089
Writing Reviews forCBB220506803
Writing Reviews forCBB246245023
Writing Reviews forCBB580544384
Writing Reviews forCBB697656713
Writing Reviews forCBB718574285
Writing Reviews forCBB001213244
Writing Reviews forCBB308469579
Writing Reviews forCBB949955189
Writing Reviews forCBB409677268
Writing Reviews forCBB905423062
Writing Reviews forCBB578391917
Writing Reviews forCBB011700651
Writing Reviews forCBB858209500
Writing Reviews forCBB186453605
Writing Reviews forCBB917713034
Writing 