# Publications markdown generator for academicpages

Takes a set of bibtex of publications and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). 

The core python code is also in `pubsFromBibs.py`. 
Run either from the `markdown_generator` folder after replacing updating the publist dictionary with:
* bib file names
* specific venue keys based on your bib file preferences
* any specific pre-text for specific files
* Collection Name (future feature)

Must be in the folder where the bib file is

In [5]:
from pybtex.database.input import bibtex
import pybtex.database.input.bibtex 
from datetime import date
from time import strptime, strftime
import numpy
import string
import html
import os
import re

In [6]:
#todo: incorporate different collection types rather than a catch all publications, requires other changes to template
publist = {
    "preprint":{
        "file": "preprint.bib",
        "venuekey" : "note",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
    },
    "journal":{
        "file": "journal.bib",
        "venuekey" : "journal",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
    },
    "proceeding": {
        "file" : "proceedings.bib",
        "venuekey": "booktitle",
        "venue-pretext": "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
        
    },
    "abstract":{
        "file": "abstract.bib",
        "venuekey" : "booktitle",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
    } 
}

In [7]:
html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&rsquo;" #"&apos;" #"&#39;"
    }

def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c,c) for c in text)

In [8]:
for idx, pubsource in enumerate(publist):
    parser = bibtex.Parser()
    bibdata = parser.parse_file(publist[pubsource]["file"])
    #os.mkdir("../_publications/" + pubsource)
    #loop through the individual references in a given bibtex file
    isFirst = True
    for idx_in,bib_id in enumerate(bibdata.entries):
        #reset default date
        pub_year = ""
        pub_mn = ""
        pub_month = ""
        citation_month = ""
        #pub_day = ""
        pub_vol = ""
        pub_numb = ""
        pub_pages = ""
        
        b = bibdata.entries[bib_id].fields
        
        try:
            pub_year = f'{b["year"]}'

            #todo: this hack for month and day needs some cleanup
            if "month" in b.keys(): 
                print(b["month"])
                if b["month"]:
                    if(len(b["month"])<3): #just a number
                        pub_month = "0"+b["month"]
                        pub_month = pub_month[-2:]
                        tmnth = datetime.strptime(pub_month,'%d')
                        pub_month = tmnth.strftime('%B') #get full name
                        pub_mn = tmnth.strftime('%d') #get number rep
                        citation_month = pub_month
                    if(b["month"] not in range(12)): #full month + some dates?
                        spoi = numpy.char.find(b["month"]," ")
                        if spoi !=-1:
                            tmnth = datetime.strptime(b["month"][:spoi],'%B')
                        else:
                            tmnth = datetime.strptime(b["month"],'%B')
                        pub_month = tmnth.strftime('%B') #get full name
                        pub_mn = tmnth.strftime('%d') #get number rep
                        citation_month = b["month"]
                    else: #full month?
                        tmnth = datetime.strptime(b["month"],'%B')
                        pub_month = tmnth.strftime('%B') #get full name
                        pub_mn = tmnth.strftime('%d') #get number rep
                        citation_month = pub_month
                    
            if  "volume" in b.keys():
                pub_vol = str(b["volume"])
            if  "number" in b.keys():
                pub_numb = str(b["number"])
            if  "pages" in b.keys():
                pub_pages = str(b["pages"])
            
            
            if pub_mn:
                pub_date = pub_year + "-" + pub_mn
            else:
                pub_date = pub_year + "-01"
            pub_date = pub_date + "-01"
            
            #strip out {} as needed (some bibtex entries that maintain formatting)
            clean_title = b["title"].replace("{", "").replace("}","").replace("\\","").replace(" ","-")    

            url_slug = re.sub("\\[.*\\]|[^a-zA-Z0-9_-]", "", clean_title)
            url_slug = url_slug.replace("--","-")

            md_filename = (str(idx) + "-" + str(idx_in) + "-" + str(pub_date) + "-" + url_slug + ".md").replace("--","-")
            html_filename = (str(pub_date) + "-" + url_slug).replace("--","-")

            #Build Citation from text
            citation = ""

            #citation authors - todo - add highlighting for primary author?
            for author in bibdata.entries[bib_id].persons["author"]:
                citation = citation+" "+author.first_names[0]+" "+author.last_names[0]+", "

            #citation title
            citation = citation + "\"" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + ".\""

            #add venue logic depending on citation type
            
            venue = publist[pubsource]["venue-pretext"]+b[publist[pubsource]["venuekey"]].replace("{", "").replace("}","").replace("\\","")
            
            citation = citation + " " + "<i>" + html_escape(venue) + "</i>"
            if pub_vol:
                citation = citation + ", " + pub_vol
            if pub_numb:
                citation = citation + ", " + pub_numb
            if pub_pages:
                citation = citation + ", " + pub_pages
            if citation_month:
                citation = citation + ", " + citation_month
            else:
                citation = citation + ","
            #if pub_day:
            #    citation = citation + " " + pub_day + ","
            citation = citation + " " + pub_year + "."
            
            if pubsource=="proceeding":
                citation = citation + " <b>" + html_escape(b["note"]) + "</b>" #for award typically #ego

            
            ## YAML variables
                        
            md = "---\ntitle: \""   + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + '"\n'
            
            md += """collection: """ +  publist[pubsource]["collection"]["name"]

            md += """\npermalink: """ + publist[pubsource]["collection"]["permalink"]  + html_filename
            
            md += """\ncategory: """ + "'" + pubsource + "'"

            
            note = False
            #if "note" in b.keys():
                #if len(str(b["note"])) > 5:
                    #md += "\nexcerpt: '" + html_escape(b["note"]) + "'"
                    #note = True

            md += "\ndate: " + str(pub_date) 
            if pubsource=="preprint":
                md += "\nvenue: '" + venue + "'"
            else:
                md += "\nvenue: '" + html_escape(venue) + "'"
            
            url = False
            if "url" in b.keys():
                if len(str(b["url"])) > 5:
                    md += "\npaperurl: '" + b["url"] + "'"
                    url = True

            md += "\ncitation: '" + citation + "'"

            md += "\n---"

            
            ## Markdown description for individual page
            #if note:
                #md += "\n" + html_escape(b["note"]) + "\n"

                
            #if pubsource=="preprint":
                #md += "\n[Link here](" + venue + "){:target=\"_blank\"}\n" 
            #else:
            #md += "\n[Google Scholar](https://scholar.google.com/scholar?q="+html.escape(clean_title.replace("-","+"))+"){:target=\"_blank\"}"
            print(md)
            md_filename = os.path.basename(md_filename)

            with open("../_publications/" + md_filename, 'w') as f:
                f.write(md)
            print(f'SUCESSFULLY PARSED {bib_id}: \"', b["title"][:60],"..."*(len(b['title'])>60),"\"")
        # field may not exist for a reference
        except KeyError as e:
            print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \"', b["title"][:30],"..."*(len(b['title'])>30),"\"")
            continue


NameError: name 'pub_day' is not defined

'S&rsquo;S'