In [1]:
import pandas as pd
import os
from semanticscholar import SemanticScholar
from get_biblatex import GetBiblatex


In [None]:
def remove_blacklist_items(df_new_items):
    blacklisted_items = pd.read_csv("./script_data/blacklist.csv")
    initial_length = len(df_new_items)
    df_new_items = df_new_items[~df_new_items['ss_doi'].isin(blacklisted_items['doi'].unique().tolist())] # remove blacklisted dois
    df_new_items = df_new_items[~df_new_items['ss_id'].isin(blacklisted_items['ss_id'].unique().tolist())] # remove blacklisted dois

    print(f"{initial_length-len(df_new_items)} items removed from newly found items.")
    return df_new_items

In [None]:
def add_item_to_blacklist(item):
    #Read blacklist.csv
    blacklist_csv = pd.read_csv('./script_data/blacklist.csv')

    #Add item to blacklist.csv
    move_to_blacklist = {
        'staff_id': item.get('staff_id', None),
        'staff_name': item.get('staff_id', None),
        'ss_year': item.get('ss_year', None),
        'ss_id': item.get('ss_id', None),
        'title': item.get('ss_title', None),
        'doi': item.get('ss_doi', None),
        'Should be in diag.bib': 'no',
        'Reason': item.get('Blacklist reason', None)
    }

    blacklist_csv = pd.concat([blacklist_csv, pd.DataFrame([move_to_blacklist])], ignore_index=True)
    #Save blacklist.csv
    blacklist_csv.to_csv('./script_data/blacklist.csv', index=False)

    return f"{item['ss_id']}: added to blacklist.csv"

In [None]:
# Code to get citations from semantic scholar. If there are multiple ss_ids, we should get the number of citations for each of them and sum the two (or more?) values.
def get_citations(schemantic_scholar_id):
    sch = SemanticScholar()
    paper = sch.get_paper(schemantic_scholar_id)
    return len(paper['citations'])

In [None]:
def add_item_to_diag_bib(item):
    #Load diag.bib
    cwd = os.getcwd()
    parent_directory = os.path.abspath(os.path.join(cwd, ".."))
    diag_bib_path = os.path.join(parent_directory, 'diag.bib')
    with open(diag_bib_path, encoding="utf8") as bibtex_file:
        diag_bib = bibtex_file.read()

    #Get DOI information
    citations = 0
    if isinstance(item['ss_id'], list):
        for ss_id in item['ss_id']:
            citations += get_citations(ss_id)
    else:
        citations = get_citations(item['ss_id'])
    # Update Dre's code and add an extra argument for citations so that it uses the correct number when creating the bib item?
    reader = GetBiblatex(doi=item['ss_doi'], diag_bib=diag_bib, num_citations=citations)
    bibtext = reader.get_bib_text()
    # Add information to diag.bib
    if bibtext != 'empty':
        diag_bib = diag_bib + bibtext
    #Save diag.bib

    return f"{item} added to diag.bib"

In [None]:
def add_ss_id_to_existing_bibkey(item):
    #Load diag.bib

    #Update bibkey with ss_id

    #Save diag.bib

    return f"{item} added to diag.bib"

In [None]:
def update_citation_count(item):

    return f"Citation count updated for {item}"

In [3]:
manually_checked = pd.read_csv("./script_data/manual_check.csv")
manually_checked = remove_blacklist_items(manually_checked)     # This should be done before actually manually checking

for index, bib_item in manually_checked.iterrows():
    # Make sure item is manually checked
    if bib_item['action'].str.contains(","):
        print(f"{bib_item['ss_id']} has not been checked yet, make sure only 1 action is mentioned")

    # Add new item to diag.bib
    elif bib_item['action'].str.contains("add new item"):
        add_item_to_diag_bib(bib_item['ss_doi'])

    # Add ss_id to already existing doi in diag.bib
    elif bib_item['action'].str.contains("add ss_id"):
        assert bib_item['ss_doi'] == bib_item['bib_doi'], f"ss_doi and bib_doi should be equal for {bib_item['ss_id']}"
        add_ss_id_to_existing_bibkey(bib_item["ss_id"], bib_item['ss_doi'])

    elif bib_item['action'].str.contains("blacklist"):
        add_item_to_blacklist(bib_item)

print("DONE")

Unnamed: 0,bibkey,ss_id,reason,url,match score,bib_doi,ss_doi,bib_title,ss_title,bib_authors,...,bib_journal,ss_journal,bib_year,ss_year,bib_type,ss_pmid,action,assigned to,Blacklist reason,Unnamed: 20
0,Leeu22,00744ccc38761d1fec5542d0bc9bdf929fd855ca,new item,semantischolar.org/paper/00744ccc38761d1fec554...,0.530612,,10.1016/j.media.2022.102605,The rise of artificial intelligence solutions ...,Rapid artificial intelligence solutions in a p...,"van Leeuwen, Kicky G. and de Rooij, Maarten an...",...,,Medical Image Analysis,2022,2022,conference,36156419.0,[add new item],dre,,
1,Litj14e,0075a6b4a31f64f027ee1d300803d00317547557,new item,semantischolar.org/paper/0075a6b4a31f64f027ee1...,0.517073,,10.1177/0271678X18756218,Computer-aided Detection of Prostate Cancer in...,Quantitative blood flow measurement in rat bra...,G. Litjens and N. Karssemeijer and J. O. Baren...,...,,Journal of Cerebral Blood Flow & Metabolism,2014,2018,conference,29498562.0,[add new item],dre,,
2,Hadd20,00797964da6917e94ed3de51882fc7f6e297b747,new item,semantischolar.org/paper/00797964da6917e94ed3d...,0.486486,,10.1016/j.ejso.2022.11.378,Characterisation of the tumour-host interface ...,Uniform Noting for International application o...,Tariq Haddad and John Melle Bokhorst and Luuk ...,...,,European Journal of Surgical Oncology,2020,2023,conference,,[add new item],dre,,
3,Venh17,01df4624e9578fd597b0f0ee6d81ac508363e1eb,title match,semantischolar.org/paper/01df4624e9578fd597b0f...,1.000000,,,Fully automated detection of hyperreflective f...,Fully automated detection of hyperreflective f...,Freerk G. Venhuizen and Samuel Schaffhauser an...,...,,Investigative Ophthalmology & Visual Science,2017,2017,conference,,[blacklist ss_id],dre,ARVO Annual Meeting Abstract,
4,Hoss19,03ad8d7078805db6fbd4993b881045b462b4e028,new item,semantischolar.org/paper/03ad8d7078805db6fbd49...,0.508876,,10.1038/s41585-020-0324-x,Effect of Adding Probabilistic Zonal Prior in ...,Introducing PIONEER: a project to harness big ...,Matin Hosseinzadeh and Patrick Brand and Henkj...,...,,Nature Reviews Urology,2019,2020,inproceedings,32461687.0,[add new item],dre,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275,Less19,fbdd91432a4cd3077d88a4ce20a1e7c1bdcbab05,new item,semantischolar.org/paper/fbdd91432a4cd3077d88a...,0.389381,10.1016/j.jcmg.2018.10.026,10.1093/cid/ciaa1855,Sex Differences in Coronary Artery and Thoraci...,Guidance on Imaging for Invasive Pulmonary Asp...,"Lessmann, Nikolas and de Jong, Pim A and Celen...",...,JACCCI,Clinical infectious diseases : an official pub...,2019,2021,article,33709131.0,"[add ss_id, blacklist ss_id, add new item, None]",,,
276,Vent20,fc12f80e0fe56243c26f628d311577507f34b39c,title match,semantischolar.org/paper/fc12f80e0fe56243c26f6...,1.000000,,,Estimating Uncertainty of Deep Neural Networks...,Estimating Uncertainty of Deep Neural Networks...,"de Vente, Coen and van Grinsven, Mark and De Z...",...,,Investigative Ophthalmology & Visual Science,2020,2020,conference,,"[add ss_id, blacklist ss_id, add new item]",,,
277,Pinc22,fc34292163822dca66f4f284ceb0e5cb689727f5,new item,semantischolar.org/paper/fc34292163822dca66f4f...,0.512821,10.1038/s43856-022-00126-3,10.1186/s13058-022-01541-z,Predicting biochemical recurrence of prostate ...,Towards defining morphologic parameters of nor...,"Pinckaers, Hans and van Ipenburg, Jolique and ...",...,COMMMED,Breast Cancer Research,2022,2022,article,,"[add ss_id, blacklist ss_id, add new item, None]",,,
278,Scho14b,fdac595273fd6b71aa7ee5a231b26df69b41eac5,new item,semantischolar.org/paper/fdac595273fd6b71aa7ee...,0.681818,,10.21037/TLCR-20-924,SubSolid Nodules in lung cancer screening,The radiologist’s role in lung cancer screening,Ernst Th. Scholten,...,PhD thesis,Translational Lung Cancer Research,2014,2020,phdthesis,34164283.0,"[add ss_id, blacklist ss_id, add new item, None]",,,
