In [1]:
import pandas as pd
import numpy as np
import pybtex.database as pbx # https://docs.pybtex.org/api/parsing.html#pybtex.database.BibliographyData

In [None]:
# change input varibles

# define categories
categories = pd.Index(['AI', 'Disease Identificaiton', 'Early Season Weed', 
                'Edge Computing', 'Image Processing','Precision Agriculture',
                'Robotics', 'UAV', 'Weed Control'])

bib_input_file = 'export.bib'

In [2]:
no_category_count = 0
no_category_list = []
missing_group_count = 0
missing_group_list = []


# create header for csv
header = ['title']
header.extend(list(categories))
    
# create csv file
with open('bib_keys.csv', 'w') as filehandle:
    filehandle.write(','.join(map(str,header)))

In [3]:
# read bibtex
def read_bib():
    bib_data = pbx.parse_file(bib_input_file, "bibtex")
    return len(list(bib_data.entries.keys())),list(bib_data.entries.keys()),bib_data

In [4]:
# add row in csv for every entry
def create_csv(key,groups):
    global no_category_count, no_category_list
    line = [key]
    groups = categories.isin(groups).astype(int)
    if groups.sum() != 0:
        line.extend(list(groups.astype(str)))
        line = ','.join(map(str, line))
        with open('bib_keys.csv', 'a') as filehandle:
            filehandle.write(line)
            filehandle.write('\n')
    else:
        no_category_count += 1
        no_category_list.append(key)

In [5]:
# read each entry in bibtex
def iter_bibtex(bib_data):
    global missing_group_count, missing_group_list
    for entry in bib_data.entries.values():
        try:
            groups = entry.fields['mendeley-groups'].split(",")
        except KeyError:
            missing_group_count += 1
            missing_group_list.append(entry.key)
            continue
        create_csv(str(entry.key),groups)

In [6]:
total_keys, key_list, bib_data = read_bib()
print("Total Keys Read: ", total_keys)
print('\n')
iter_bibtex(bib_data)
print('Entries with Missing Group: ', missing_group_count)
print(missing_group_list)
print('\n')
print('Entries with No Category: ', no_category_count)
print(no_category_list)

Total Keys Read:  56


Entries with Missing Group:  4
['Camacho2018', 'Sabour2017', 'Hinton', 'Hinton2011']


Entries with No Category:  12
['Huang2016', 'Barrero2018', 'Sanders2019', 'Reddy2014', 'ranganathan2018sustainably', 'golijan2018global', 'GRV:2017', 'silva2018feeding', 'van2019management', 'van2016survey', 'legleiter2013palmer', 'GrandViewMarket2017']
