# Publications markdown generator for academicpages

Takes a set of bibtex of publications and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). 

The core python code is also in `pubsFromBibs.py`. 
Run either from the `markdown_generator` folder after replacing updating the publist dictionary with:
* bib file names
* specific venue keys based on your bib file preferences
* any specific pre-text for specific files
* Collection Name (future feature)

TODO: Make this work with other databases of citations, 
TODO: Merge this with the existing TSV parsing solution

In [21]:
from pybtex.database.input import bibtex
import pybtex.database.input.bibtex 
from time import strptime
import string
import html
import os
import re

In [22]:
#todo: incorporate different collection types rather than a catch all publications, requires other changes to template
publist = {
    "proceeding": {
        "file" : "proceedings.bib",
        "venuekey": "booktitle",
        "venue-pretext": "In the proceedings of ",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
        
    },
    "journal":{
        "file": "pubs.bib",
        "venuekey" : "journal",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
    } 
}

In [23]:
html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;"
    }

def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c,c) for c in text)

In [None]:
from pybtex.database.input import bibtex
from time import strptime
import html
import os
import re

for pubsource in publist:
    bibfile = publist[pubsource]["file"]
    if not os.path.isfile(bibfile):
        print(f"WARNING: File '{bibfile}' not found. Skipping.")
        continue
    parser = bibtex.Parser()
    bibdata = parser.parse_file(bibfile)

    # loop through the individual references in a given bibtex file
    for bib_id in bibdata.entries:
        # reset default date
        pub_year = "1900"
        pub_month = "01"
        pub_day = "01"

        b = bibdata.entries[bib_id].fields

        try:
            pub_year = f'{b["year"]}'

            # todo: this hack for month and day needs some cleanup
            if "month" in b.keys():
                if len(b["month"]) < 3:
                    pub_month = "0" + b["month"]
                    pub_month = pub_month[-2:]
                elif not b["month"].isdigit():
                    tmnth = strptime(b["month"][:3], '%b').tm_mon
                    pub_month = "{:02d}".format(tmnth)
                else:
                    pub_month = str(b["month"])
            if "day" in b.keys():
                pub_day = str(b["day"])

            pub_date = pub_year + "-" + pub_month + "-" + pub_day

            # strip out {} as needed (some bibtex entries that maintain formatting)
            clean_title = b["title"].replace("{", "").replace("}", "").replace("\\", "").replace(" ", "-")

            url_slug = re.sub(r"\[.*\]|[^a-zA-Z0-9_-]", "", clean_title)
            url_slug = url_slug.replace("--", "-")

            md_filename = (str(pub_date) + "-" + url_slug + ".md").replace("--", "-")
            html_filename = (str(pub_date) + "-" + url_slug).replace("--", "-")

            # Build Citation from text
            citation = ""

            # citation authors - todo - add highlighting for primary author?
            for author in bibdata.entries[bib_id].persons["author"]:
                first = author.first_names[0] if author.first_names else ""
                last = author.last_names[0] if author.last_names else ""
                citation = citation + " " + first + " " + last + ", "

            # citation title
            citation = citation + "\"" + html_escape(b["title"].replace("{", "").replace("}", "").replace("\\", "")) + ".\""

            # add venue logic depending on citation type
            venue = ""
            try:
                venue = publist[pubsource]["venue-pretext"] + b[publist[pubsource]["venuekey"]].replace("{", "").replace("}", "").replace("\\", "")
            except KeyError:
                venue = ""

            citation = citation + " " + html_escape(venue)
            citation = citation + ", " + pub_year + "."

            ## YAML variables
            md = "---\ntitle: \"" + html_escape(b["title"].replace("{", "").replace("}", "").replace("\\", "")) + '\"\n'
            md += "collection: " + publist[pubsource]["collection"]["name"]
            md += "\npermalink: " + publist[pubsource]["collection"]["permalink"] + html_filename

            note = False
            if "note" in b.keys():
                if len(str(b["note"])) > 5:
                    md += "\nexcerpt: '" + html_escape(b["note"]) + "'"
                    note = True

            md += "\ndate: " + str(pub_date)
            md += "\nvenue: '" + html_escape(venue) + "'"

            url = False
            if "url" in b.keys():
                if len(str(b["url"])) > 5:
                    md += "\npaperurl: '" + b["url"] + "'"
                    url = True

            md += "\ncitation: '" + html_escape(citation) + "'"
            md += "\n---"

            ## Markdown description for individual page
            if note:
                md += "\n" + html_escape(b["note"]) + "\n"

            if url:
                md += f"\n[Access paper here]({b['url']})" + "{:target=\"_blank\"}\n"
            else:
                md += "\nUse [Google Scholar](https://scholar.google.com/scholar?q=" + html.escape(clean_title.replace("-", "+")) + "){:target=\"_blank\"} for full citation"

            md_filename = os.path.basename(md_filename)

            output_dir = "../_publications/"
            os.makedirs(output_dir, exist_ok=True)  # Ensure the directory exists

            with open(os.path.join(output_dir, md_filename), 'w', encoding="utf-8") as f:
                f.write(md)
            print(f'SUCESSFULLY PARSED {bib_id}: \"', b["title"][:60], "..."*(len(b['title'])>60), "\"")
        except KeyError as e:
            print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \"', b.get("title", "NO TITLE")[:30], "..."*(len(b.get('title', ""))>30), "\"")
            continue


SUCESSFULLY PARSED key0: " Simulator for modeling, analysis, and visualizations of ther ... "
SUCESSFULLY PARSED key1: " Parallel delay multiply and sum algorithm for microwave medi ... "
SUCESSFULLY PARSED key2: " Measurement of whole-brain atrophy progression using microwa ... "
SUCESSFULLY PARSED key4: " Experimental radar data for monitoring brain atrophy progres ... "
SUCESSFULLY PARSED key5: " Multi-skills resource constrained and personality traits bas ... "
SUCESSFULLY PARSED key6: " An integrated methodology for bibliometric analysis: a case  ... "
SUCESSFULLY PARSED key7: " Multistatic radar-based imaging in layered and dispersive me ... "
SUCESSFULLY PARSED key8: " Microwave sensing dataset for noninvasive monitoring of vent ... "
SUCESSFULLY PARSED key9: " A machine learning-based classification method for monitorin ... "
SUCESSFULLY PARSED key11: " Weighted multi-skill resource constrained project scheduling ... "


FileNotFoundError: [Errno 2] No such file or directory: '../_publications/2024-01-01-Classification-of-skin-lesion-with-features-extraction-using-quantum-chebyshev-polynomials-and-autoencoder-from-wavelet-transformed-images.md'