# Orcid to bib

In [1]:
orcid = '0000-0001-5336-3284' # Fill your orcid here

In [2]:
import requests

We use the /works api to list all works related to the orcid. This gives a summary of all works, so citation information is not included. We collect the put-code of all works to retrieve the citation information later.

In [78]:
response = requests.get('https://pub.orcid.org/v3.0/{}/works'.format(orcid),
                        headers={"Accept": "application/orcid+json" })
record = response.json()

In [79]:
put_codes = []
for work in record['group']:
    put_code = work['work-summary'][0]['put-code']
    put_codes.append(put_code)
put_code = put_codes[0]

We use the /<orcid>/work/<put-code> endpoint to retrieve the citation information for each record.

In [80]:
citations = []
for put_code in put_codes:
    response = requests.get('https://pub.orcid.org/v3.0/{}/work/{}'.format(orcid, put_code),
                            headers={"Accept": "application/orcid+json" })
    work = response.json()
    if work['citation'] is not None:
        citations.append(work['citation']['citation-value'])

In [81]:
print(citations)

['@article{Tran2021,title = {Leading from Behind: Sovereign Credit Ratings during COVID-19 Pandemic},journal = {SSRN},year = {2021},author = {Tran, Y. and Vu, H. and Klusak, P. and Kramer, M. and Hoang, T.}}', '@article{Tran2021,title = {Sovereign credit ratings during the COVID-19 pandemic},journal = {International Review of Financial Analysis},year = {2021},volume = {78},author = {Tran, Y. and Vu, H. and Klusak, P. and Kraemer, M. and Hoang, T.}}', '@book{Nguyen2018,title = {Doanh nhân Việt Nam - Các yếu tố ảnh hưởng[Vietnamese entrepreneurs – Factors affecting performance]},publisher = {National University Press},year = {2018},author = {Nguyen, V. H. and Tran, Y. and Bui, T. H. A. and Tran, M. and Le, T. H. Y.}, address   = "Hanoi"}']


In [82]:
with open('pubs.bib', 'w', encoding='utf-8') as bibfile:
    for citation in citations:
        bibfile.write(citation)
        bibfile.write('\n')

In [83]:
with open('pubs.bib', 'r', encoding='utf-8') as bibfile:
    bib_content = bibfile.read()
print(bib_content)

@article{Tran2021,title = {Leading from Behind: Sovereign Credit Ratings during COVID-19 Pandemic},journal = {SSRN},year = {2021},author = {Tran, Y. and Vu, H. and Klusak, P. and Kramer, M. and Hoang, T.}}
@article{Tran2021,title = {Sovereign credit ratings during the COVID-19 pandemic},journal = {International Review of Financial Analysis},year = {2021},volume = {78},author = {Tran, Y. and Vu, H. and Klusak, P. and Kraemer, M. and Hoang, T.}}
@book{Nguyen2018,title = {Doanh nhân Việt Nam - Các yếu tố ảnh hưởng[Vietnamese entrepreneurs – Factors affecting performance]},publisher = {National University Press},year = {2018},author = {Nguyen, V. H. and Tran, Y. and Bui, T. H. A. and Tran, M. and Le, T. H. Y.}, address   = "Hanoi"}



In [None]:
## Modify .bib entries

In [84]:
import bibtexparser

def modify_bib_keys(bibfile_path, output_path):
    # Read and parse the BibTeX file
    with open(bibfile_path, 'r', encoding='utf-8') as bibfile:
        bibdata = bibtexparser.load(bibfile)
    
    # Create a dictionary to track the occurrence of citation keys
    key_count = {}
    
    # Iterate over the entries in the parsed BibTeX data
    for entry in bibdata.entries:
        citation_key = entry['ID']  # The citation key (e.g., 'Tran2021')
        
        # If the key is already encountered, modify it
        if citation_key in key_count:
            key_count[citation_key] += 1
            # Append the occurrence letter (e.g., 'Tran2021a', 'Tran2021b')
            entry['ID'] = f"{citation_key}{chr(97 + key_count[citation_key])}"
        else:
            # First occurrence, just set the count to 0
            key_count[citation_key] = 0

    # Save the updated BibTeX data to a new file
    with open(output_path, 'w', encoding='utf-8') as output_file:
        bibtexparser.dump(bibdata, output_file)

    print(f"Updated BibTeX file saved to: {output_path}")

# Example usage
modify_bib_keys('pubs.bib', 'pubs_fixed.bib')


Updated BibTeX file saved to: pubs_fixed.bib


In [85]:
with open('pubs.bib', 'r', encoding='utf-8') as bibfile:
    bib_content = bibfile.read()
print(bib_content)
with open('pubs_fixed.bib', 'r', encoding='utf-8') as bibfile:
    bib_content = bibfile.read()
print(bib_content)

@article{Tran2021,title = {Leading from Behind: Sovereign Credit Ratings during COVID-19 Pandemic},journal = {SSRN},year = {2021},author = {Tran, Y. and Vu, H. and Klusak, P. and Kramer, M. and Hoang, T.}}
@article{Tran2021,title = {Sovereign credit ratings during the COVID-19 pandemic},journal = {International Review of Financial Analysis},year = {2021},volume = {78},author = {Tran, Y. and Vu, H. and Klusak, P. and Kraemer, M. and Hoang, T.}}
@book{Nguyen2018,title = {Doanh nhân Việt Nam - Các yếu tố ảnh hưởng[Vietnamese entrepreneurs – Factors affecting performance]},publisher = {National University Press},year = {2018},author = {Nguyen, V. H. and Tran, Y. and Bui, T. H. A. and Tran, M. and Le, T. H. Y.}, address   = "Hanoi"}

@book{Nguyen2018,
 address = {Hanoi},
 author = {Nguyen, V. H. and Tran, Y. and Bui, T. H. A. and Tran, M. and Le, T. H. Y.},
 publisher = {National University Press},
 title = {Doanh nhân Việt Nam - Các yếu tố ảnh hưởng[Vietnamese entrepreneurs – Factors affec

# Publications from Bib

Takes a set of bibtex of publications and converts them for use with academicpages.github.io. This is an interactive Jupyter notebook (see more info here).

The core python code is also in pubsFromBibs.py. Run either from the markdown_generator folder after replacing updating the publist dictionary with:

* bib file names
* specific venue keys based on your bib file preferences
* any specific pre-text for specific files
* Collection Name (future feature)

In [27]:
#pip install pybtex

In [28]:
from pybtex.database.input import bibtex
import pybtex.database.input.bibtex 
from time import strptime
import string
import html
import os
import re

In [90]:
#todo: incorporate different collection types rather than a catch all publications, requires other changes to template
publist = {
    "journal":{
        "file": "pubs_fixed.bib",
        "venuekey" : "journal",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
    }, 
    "book":{
        "file": "pubs_fixed.bib",
        "venuekey" : "publisher",
        "venue-pretext" : "",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
    } 
}

In [None]:
    "proceeding": {
        "file" : "pubs_fixed.bib",
        "venuekey": "booktitle",
        "venue-pretext": "In the proceedings of ",
        "collection" : {"name":"publications",
                        "permalink":"/publication/"}
        
    },

In [None]:
# Modify publist

## Espaping HTML special characters

YAML is very picky about how it takes a valid string, so we are replacing single and double quotes (and ampersands) with their HTML encoded equivilents. This makes them look not so readable in raw format, but they are parsed and rendered nicely.

To replace:
* The ampersand (&) has a special meaning in HTML (it starts an entity), so it needs to be escaped.
* Double quote ("): This is useful when dealing with attribute values in HTML.
* Single quote ('): It’s similar to the double quote entity but used in different contexts, such as in single-quoted HTML attributes.

The html_escape function takes a string of text and escapes any characters that need to be represented as HTML entities. The function does this by iterating over the string and replacing special characters with their corresponding HTML entities.

Explanation of the function:
* text: This is the input string that the function will process.
* The function uses a generator expression inside "".join() to loop through each character c in the text.
* html_escape_table.get(c, c):
    * For each character c, it tries to lookup the character c in the html_escape_table.
    * If the character c exists in the table, it gets replaced with the corresponding HTML entity (e.g., & becomes &amp;).
    * If the character c does not exist in the table, it returns the character c unchanged (i.e., no escaping is applied).
* "".join() combines all the characters back together into a single string.

In [76]:
# A dictionary that maps special characters to their corresponding HTML entities.
html_escape_table = {
    "&": "&amp;", 
    '"': "&quot;", 
    "'": "&apos;" 
    }
# A function that escapes the special characters in a string by looking them up in the dictionary and replacing them with their HTML entity equivalents.
def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c,c) for c in text)

## Generating markdown

In [None]:
import bibtexparser
with open('pubs.bib', 'r', encoding='utf-8') as bibfile:
    bibdata = bibtexparser.load(bibfile)
print(bibdata)

In [92]:
for pubsource in publist:
    parser = bibtex.Parser() # Instantiating a Parser Object
    """This line creates an instance of Parser from the bibtex module (presumably from the pybtex library, which is used for parsing and working with BibTeX files in Python).
    A Parser is an object responsible for parsing BibTeX files and converting them into Python data structures (like dictionaries, lists, or other objects)."""
    bibdata = parser.parse_file(publist[pubsource]["file"]) # Parsing the BibTeX File
    """This line calls the parse_file() method of the Parser object to parse the BibTeX file.
    The argument publist[pubsource]["file"] refers to a file path of the BibTeX file associated with the current pubsource. It accesses the "file" key in the value corresponding to the current pubsource in the publist dictionary.
    The parser.parse_file() method then reads the file at that path, processes the contents (assuming it's in valid BibTeX format), and stores the parsed data in the bibdata variable.
    bibdata will typically be an object or data structure containing the parsed BibTeX entries (like a list of dictionaries or a more structured representation)."""

    #loop through the individual references in a given bibtex file
    for bib_id in bibdata.entries:
        #reset default date
        pub_year = "1900"
        pub_month = "01"
        pub_day = "01"
        
        b = bibdata.entries[bib_id].fields
        
        try:
            pub_year = f'{b["year"]}'

            #todo: this hack for month and day needs some cleanup
            if "month" in b.keys(): 
                if(len(b["month"])<3):
                    pub_month = "0"+b["month"]
                    pub_month = pub_month[-2:]
                elif(b["month"] not in range(12)):
                    tmnth = strptime(b["month"][:3],'%b').tm_mon   
                    pub_month = "{:02d}".format(tmnth) 
                else:
                    pub_month = str(b["month"])
            if "day" in b.keys(): 
                pub_day = str(b["day"])

                
            pub_date = pub_year+"-"+pub_month+"-"+pub_day
            
            #strip out {} as needed (some bibtex entries that maintain formatting)
            clean_title = b["title"].replace("{", "").replace("}","").replace("\\","").replace(" ","-")    

            url_slug = re.sub("\\[.*\\]|[^a-zA-Z0-9_-]", "", clean_title)
            url_slug = url_slug.replace("--","-")

            md_filename = (str(pub_date) + "-" + url_slug + ".md").replace("--","-")
            html_filename = (str(pub_date) + "-" + url_slug).replace("--","-")

            #Build Citation from text
            citation = ""

            #citation authors - todo - add highlighting for primary author?
            for author in bibdata.entries[bib_id].persons["author"]:
                citation = citation+" "+author.first_names[0]+" "+author.last_names[0]+", "

            #citation title
            citation = citation + "\"" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + ".\""

            #add venue logic depending on citation type
            venue = publist[pubsource]["venue-pretext"]+b[publist[pubsource]["venuekey"]].replace("{", "").replace("}","").replace("\\","")

            citation = citation + " " + html_escape(venue)
            citation = citation + ", " + pub_year + "."

            
            ## YAML variables
            md = "---\ntitle: \""   + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + '"\n'
            
            md += """collection: """ +  publist[pubsource]["collection"]["name"]

            md += """\npermalink: """ + publist[pubsource]["collection"]["permalink"]  + html_filename
            
            note = False
            if "note" in b.keys():
                if len(str(b["note"])) > 5:
                    md += "\nexcerpt: '" + html_escape(b["note"]) + "'"
                    note = True

            md += "\ndate: " + str(pub_date) 

            md += "\nvenue: '" + html_escape(venue) + "'"
            
            url = False
            if "url" in b.keys():
                if len(str(b["url"])) > 5:
                    md += "\npaperurl: '" + b["url"] + "'"
                    url = True

            md += "\ncitation: '" + html_escape(citation) + "'"

            md += "\n---"

            
            ## Markdown description for individual page
            if note:
                md += "\n" + html_escape(b["note"]) + "\n"

            if url:
                md += "\n[Access paper here](" + b["url"] + "){:target=\"_blank\"}\n" 
            else:
                md += "\nUse [Google Scholar](https://scholar.google.com/scholar?q="+html.escape(clean_title.replace("-","+"))+"){:target=\"_blank\"} for full citation"

            md_filename = os.path.basename(md_filename)

            with open("../_publications/" + md_filename, 'w', encoding="utf-8") as f:
                f.write(md)
            print(f'SUCESSFULLY PARSED {bib_id}: \"', b["title"][:60],"..."*(len(b['title'])>60),"\"")
        # field may not exist for a reference
        except KeyError as e:
            print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \"', b["title"][:30],"..."*(len(b['title'])>30),"\"")
            continue

SUCESSFULLY PARSED Tran2021: " Leading from Behind: Sovereign Credit Ratings during COVID-1 ... "
SUCESSFULLY PARSED Tran2021b: " Sovereign credit ratings during the COVID-19 pandemic  "
SUCESSFULLY PARSED Nguyen2018: " Doanh nhân Việt Nam - Các yếu tố ảnh hưởng[Vietnamese entrep ... "
