In [3]:
#imports
import os

try:
    import bibtexparser
except:
    ! conda install -c conda-forge bibtexparse

In [13]:
#PATHs
in_bib_path="../2_chapter_intro/ref/ref.bib"
in_tex_file_path = "../2_chapter_intro/inc/introduction.tex"

out_dir = "./fixed_ref"
if(not os.path.exists(out_dir)): os.mkdir(out_dir)
out_bib_path = out_dir+"/cleanManuscript.bib"

print("in Path:", in_bib_path)
print("in Path:", out_bib_path)

in Path: ../2_chapter_intro/ref/ref.bib
in Path: ./fixed_ref/cleanManuscript.bib


# Parsing and fixing of general things

## RULES

In [14]:
from reffix.entry_rules import titelcasing_fields, journal_iso4, double_minus, capitalize_keys
from reffix.entry_rules import check_misc_fields, check_incollection_fields, check_inproceedings_fields, check_inbook_fields, check_article_fields, check_book_fields
from reffix.entry_rules import homogenize_latex_encoding
from bibtexparser.customization import type as bibtexType

In [15]:
from bibtexparser.bparser import BibTexParser

def build_standardParser():
    myParser = BibTexParser()

    def rulez(record):
        record = bibtexType(record)
        record = capitalize_keys(record)

        #record = homogenize_latex_encoding(record)
        record = check_article_fields(record)
        record = check_book_fields(record)
        record = check_incollection_fields(record)
        record = check_inproceedings_fields(record)
        record = check_inbook_fields(record)


        record = double_minus(record)
        record = titelcasing_fields(record)
        record = journal_iso4(record)

        return record

    myParser.customization = rulez
    return myParser


## Load bib and Clean entries

the rules are stored in the build_standardParser function

In [16]:
with open(in_bib_path, "r", encoding="utf-8") as bibtex_file:
    bib_database = bibtexparser.load(bibtex_file, parser=build_standardParser())


In [17]:
print(bib_database.entries_dict)

{'Agard2014': {'title': 'Chapter Two - Single-Particle Cryo-Electron Microscopy (Cryo-Em): Progress, Challenges, and Perspectives for Further Improvement', 'author': 'Agard, David and Cheng, Yifan and Glaeser, Robert M. and Subramaniam, Sriram', 'year': '2014', 'ENTRYTYPE': 'incollection', 'ID': 'Agard2014', 'publisher': 'Elsevier', 'pages': '113--137', 'booktitle': 'Advances in Imaging and Electron Physics'}, 'Aldeghi2016': {'title': 'Accurate Calculation of the Absolute Free Energy of Binding for Drug Molecules', 'author': 'Aldeghi, Matteo and Heifetz, Alexander and Bodkin, Michael J. and Knapp, Stefan and Biggin, Philip C.', 'year': '2016', 'journal': 'Chem. Sci.', 'volume': '7', 'pages': '207--218', 'ENTRYTYPE': 'article', 'ID': 'Aldeghi2016'}, 'Armacost2020': {'title': 'Novel Directions in Free Energy Methods and Applications', 'author': 'Armacost, Kira A. and Riniker, Sereina and Cournia, Zoe', 'year': '2020', 'journal': 'J. Chem. Inf. Model.', 'volume': '60', 'pages': '1--5', 'E

# Clean for Duplicates!

In [18]:
from reffix.database_functions import remove_duplicates

bib_database = remove_duplicates(bib_database)

Filter DB for duplicates
	Before Elements in DB:  142
	After Elements in DB:  142


# Check for required!

In [19]:
from reffix.database_functions import get_used_citations, filter_database_for_required_citations

#get all used citations
unique_citations = get_used_citations(in_tex_file_path)

#remove not used citations from DB
bib_database = filter_database_for_required_citations(bib_database=bib_database, used_citation_keys=unique_citations)

Filter DB for used in TEX
	Before Elements in DB:  142
	After Elements in DB:  141


# Write out nice Latex

In [20]:
print("Number of Entries: ", len(bib_database.entries))
with open(out_bib_path, 'w', encoding="utf-8") as bibtex_file:
    bibtexparser.dump(bib_database, bibtex_file)


Number of Entries:  141
