In [29]:
from bibutils import load_bib, write_bib
import bibtexparser
from bibtexparser.customization import author, page_double_hyphen
from copy import deepcopy
from titlecase import titlecase
import calendar
import re
from StringIO import StringIO
import pyperclip
from datetime import datetime

In [30]:
def change_journal_to_journaltitle(entry):
    journal = entry.pop('journal', None)
    if journal:
        print(u"Changing 'journal={}' to 'journaltitle' from ID:{}".format(journal, entry['ID']))
        entry['journaltitle'] = journal

def remove_curly_brackets(string):
    return string.replace('{', '').replace('}', '')

def change_month(entry):
    try:
        month = entry['month']
    except KeyError:
        print("no month")
        return
    else:
        month = month.lower()
        month = month[:3]
    print("Month", month)
    month_names = [month_name.lower() for month_name in calendar.month_abbr]
    if month in month_names:
        month_i = str(month_names.index(month))
        print(u"Changing month from", month, u"to", month_i)
        entry['month'] = month_i
    
def lower_case_id(entry):
    bib_id = entry['ID']
    if len(bib_id) >= 2 and bib_id[1].islower():
        entry['ID'] = bib_id[0].lower() + bib_id[1:]
        
def fix_IEEE_journals(s):
    split = s.split(',')
    if len(split) > 1 and 'IEEE' in split[-1]:
        print("replacing '{}' ".format(s), end='')
        s = split[1].strip() + ' ' + split[0].strip()
        print("with '{}'".format(s))
    return s

def numbering(s):
    numbering = {
        'first': '1st',
        'second': '2nd',
        'third': '3rd',
        'fourth': '4th',
        'fifth': '5th',
        'sixth': '6th',
        'seventh': '7th',
        'eighth': '8th',
        'ninth': '9th',
        'tenth': '10th',
        'eleventh': '11th',
        'twelfth': '12th',
        'thirteenth': '13th',
        'fourteenth': '14th',
        'fifteenth': '15th',
        'sixteenth': '16th',
        'seventeenth': '17th',
        'eighteenth': '18th',
        'nineteenth': '19th',
        'twentieth': '20th'}
    words = s.split(' ')
    for word in words:
        if word.lower() in numbering:
            s = s.replace(word, numbering[word.lower()])
    return s

In [31]:
def process(bib_database):
    for entry in bib_database.entries:
        # Change ID
        surname_of_first_author = author(deepcopy(entry))['author'][0].split(',')[0].strip().lower()
        entry['ID'] = '{:s}{}'.format(surname_of_first_author, entry['year'])
        
        # journaltitle and booktitle
        change_journal_to_journaltitle(entry)
        for key in ['journaltitle', 'booktitle']:
            if key in entry:
                entry[key] = fix_IEEE_journals(entry[key])
                if ' &' in entry[key]:
                    entry[key] = entry[key].replace(' &', ' \&')

        if 'booktitle' in entry:
            booktitle = entry['booktitle']

            # Extract 'series' from 'booktitle'
            if '(' in booktitle and ')' in booktitle:
                start = booktitle.index('(')
                end = booktitle.index(')')
                if 'series' not in entry:
                    entry['series'] = booktitle[start+1:end]
                    print("setting 'series'")
                print("removing series from booktitle")
                booktitle = booktitle[:start].strip() + booktitle[end+1:].strip()

            # Extract 'Proceedings of the ' from 'booktitle'
            booktitle = booktitle.replace('Proceedings of the ', '')
            
            # Remove date from 'booktitle':
            if re.match(r"""\d{4}""", booktitle):
                print("removing date from booktitle")
                booktitle = booktitle[5:]    
        
            # Convert 'first' to '1st' etc:
            booktitle = numbering(booktitle)
        
            entry['booktitle'] = booktitle
        
        if 'series' in entry:
            if "'" in entry['series']:
                print("removing ' from series")
                entry['series'] = entry['series'].split("'")[0].strip()
        
        for key_to_delete in ['keywords', 'file', 'mendeley-tags']:
            entry.pop(key_to_delete, None)
        change_month(entry)
        lower_case_id(entry)

        # Set 'publisher'
        for publisher in ['ACM', 'IEEE']:
            for key in ['journaltitle', 'booktitle']:
                if publisher in entry.get(key, ''):
                    if key == 'booktitle':
                        print("removing", publisher, "from", key)
                        entry[key] = entry[key].replace(publisher + ' ', '')
                
                    if 'publisher' not in entry:
                        entry['publisher'] = publisher
                        print("setting publisher to", publisher, "for ID", entry['ID'])

        # Remove 'year' and 'month' and use 'date' instead
        if 'date' not in entry:
            if 'year' in entry:
                year = entry.pop('year')
                entry['date'] = year
            if 'month' in entry:
                month = entry.pop('month')
                entry['date'] += '-{:02d}'.format(int(month))

        if 'date' in entry:
            date = entry['date']
            def bad_date():
                print("********##### BAD DATE", entry['date'], entry['ID'])
                
            if len(date) > 7 and not re.match(r"""\d{4}-\d{2}-\d{2}""", date):
                bad_date()
            elif len(date) > 4 and not re.match(r"""\d{4}-\d{2}""", date):
                bad_date()
            elif not re.match(r"""\d{4}""", date):
                bad_date()
                
        page_double_hyphen(entry)
        
        if 'doi' in entry:
            entry['doi'] = entry['doi'].replace('http://dx.doi.org/', '')
        
            for key in ['url', 'eprint', 'eprinttype']:
                if key in entry:
                    entry.pop(key)
                    print("removing", key)
                                
        # Format title
        entry['title'] = remove_curly_brackets(entry['title'])
        if entry.get('publisher', '') == 'IEEE':
            entry['title'] = titlecase(entry['title'])
            
        # Format abstract
        if 'abstract' in entry:
            entry['abstract'] = entry['abstract'].replace("""\{""", "")
            entry['abstract'] = entry['abstract'].replace("""\}""", "")
            
        entry['owner'] = 'jack'
        entry['timestamp'] = datetime.now().strftime('%Y.%m.%d')
                        
    return bib_database

In [32]:
bibtex = pyperclip.paste()

# Fix broken IEEE end of string:
if bibtex[-3:] == '},}':
    bibtex = bibtex[:-2] + '\n}'

bibtex = bibtex.replace("""abstract = "Abstract """, """abstract = {""")
bibtex = bibtex.replace("""abstract = \"""", """abstract = {""")
bibtex = bibtex.replace(""" "
""", """}
""")
stringio = StringIO(bibtex)
bib_database = load_bib(stringio=stringio)
# print(stringio.read())

In [33]:
print("----------------LOG----------------\n")
bib_database = process(bib_database)
print(bib_database.entries[0])
print("\n\n")

print("----------------ORIGINAL-----------\n")
print(bibtex)

print("\n\n")
print("----------------PROCESSED----------\n")
bibstr = bibtexparser.dumps(bib_database)
print(bibstr)

pyperclip.copy(bibstr) #[:-4] + ',')

----------------LOG----------------

Changing 'journal=Energy and Buildings' to 'journaltitle' from ID:chiang2014
no month
removing url
{u'note': '', u'doi': u'10.1016/j.enbuild.2013.10.035', u'author': u'Teresa Chiang and Gokhan Mevlevioglu and Sukumar Natarajan and Julian Padget and Ian Walker', u'title': u'Inducing [sub]conscious energy behaviour through visually displayed energy information: A case study in university accommodation', 'journaltitle': u'Energy and Buildings', u'issn': u'0378-7788', u'number': '', u'abstract': u'Direct feedback on energy use presented by in-home displays (IHDs) has been found to be useful in helping people learn about their energy use and make a reduction. However, it is not yet clear what is the best form in which to present energy information. Two six-week experiments were carried out in student residences at the University of Bath, UK, to investigate how visually displayed energy information presented in different ways could encourage reductions in

In [34]:
titlecase("LOW COST FRAMEWORK FOR NON-INTRUSIVE HOME ENERGY MONITORING AND RESEARCH")

'Low Cost Framework for Non-Intrusive Home Energy Monitoring and Research'

In [35]:
"LOW COST FRAMEWORK FOR NON-INTRUSIVE HOME ENERGY MONITORING AND RESEARCH".lower()

'low cost framework for non-intrusive home energy monitoring and research'