## Import the needed libraries

In [1]:
# import re
from pymongo import MongoClient
# from elasticsearch import Elasticsearch
# from habanero import Crossref, counts, cn
# import pandas as pd
# import matplotlib.pyplot as plt
# from matplotlib.ticker import FuncFormatter

## Get the mongo collection

In [2]:
# get the mongo client
client = MongoClient('mongo')

# get the database
metadatadb = client.get_database('narcis')

# get the metadata collection
metacollection = metadatadb.data

## The metadata retriever

In [3]:
def metadata_retriever(collection, DOI):
    """
    Retrieves metadata when an DOI is provided.

    @param  Collection   The metadata collection
    @param  string       The DOI
    @return dict         The corresponding metadata
    """
    
    # find the metadata with the DOI
    subset = collection.find({'doi': DOI}).limit(1)
    
    # return the first item
    for item in subset:
        return item
    
    # there was no hit for the given DOI
    return None
    

## Get the metadata that belongs to the DOI

In [4]:
metadata = metadata_retriever(metadatadb.doiboost2017, '10.1007/s00422-017-0730-1')
print(metadata)

{'_id': ObjectId('5d01137ba1ae159641df3c08'), 'publisher': None, 'issn': [{'type': 'print', 'value': '0340-1200'}, {'type': 'electronic', 'value': '1432-0770'}], 'doi': '10.1007/s00422-017-0730-1', 'license': [{'url': 'http://creativecommons.org/licenses/by/4.0', 'content-version': 'unspecified', '"delay-in-days': None, 'date-time': '2017-09-14T00:00:00Z'}], 'published-print': '2017-12-1', 'title': ['Affective–associative two-process theory: a neurocomputational account of partial reinforcement extinction effects'], 'issued': '2017-9-14', 'abstract': [{'provenance': 'MAG', 'value': 'The partial reinforcement extinction effect (PREE) is an experimentally established phenomenon: behavioural response to a given stimulus is more persistent when previously inconsistently rewarded than when consistently rewarded. This phenomenon is, however, controversial in animal/human learning theory. Contradictory findings exist regarding when the PREE occurs. One body of research has found a within-subj

## The GRID id retriever

In [5]:
def grid_ids_retriever(metadata):
    
    # initial list
    gridIDs = []
    
    # get the author information
    authors = metadata['authors']
    
    # loop over the authors in the list
    for author in authors:
        
        # get the affiliation(s) of the author
        affiliations = author['affiliations']
        
        # only continue if there is information about the affiliation
        if affiliations:
            
            # loop over the affiliation information
            for affiliation in affiliations:
                
                # get the identifiers
                identifiers = affiliation['identifiers']
                
                # get the value of the second item, which is always the GRID id
                gridIDs.append(identifiers[1]['value'])
                
    # return the list of affiliations
    return gridIDs

## Get the GRID ids that belong to the metadata

In [6]:
gridIDs = grid_ids_retriever(metadata)
print(gridIDs)

['grid.8761.8', 'grid.8761.8', 'grid.412798.1', 'grid.5801.c', 'grid.4514.4']
