# Setup

Run this cell once at the start

In [2]:
# redcap_api.ipynb, a Python script for getting data from ORCID and loading it into a RedCap form.
version = '0.1'
created = '2022-02-25'

# (c) 2022 Vanderbilt University. This program is released under a GNU General Public License v3.0 http://www.gnu.org/licenses/gpl-3.0
# Author: Steve Baskauf

# -------------
# Configuration
# -------------

# Module imports

import requests
import json
import hashlib
from pathlib import Path

# Global variables
endpoint_url = 'https://redcap.vanderbilt.edu/api/'

# ---------------------

# Function definitions

# value of directory should be either 'home' or 'working'
def load_credential(filename, directory):
    """Retrieve the raw token text from a plain text file."""
    cred = ''
    # to change the script to look for the credential in the working directory, change the value of home to empty string
    if directory == 'home':
        home = str(Path.home()) #gets path to home directory; supposed to work for Win and Mac
        credential_path = home + '/' + filename
    else:
        directory = 'working'
        credential_path = filename
    try:
        with open(credential_path, 'rt', encoding='utf-8') as file_object:
            cred = file_object.read()
    except:
        print(filename + ' file not found - is it in your ' + directory + ' directory?')
        exit()
    return(cred)

def retrieve_api_data(token, fields):
    """Sends GET request to API and convert result to data structure."""
    data = {
        'token': token,
        'format': 'json',
        'returnFormat': 'json'
    }
    data.update(fields)
    response = requests.post(endpoint_url,data=data)
    status = response.status_code
    print('status:', status)
    if status == 200:
        try:
            data = response.json()
            return status, data
        except:
            print(response.text)
            return 0, {}
    else:
        print(response.text)
        return status, {}

# ---------------------

# Load token from file

token = load_credential('redcap_test_token.txt', 'home')


# Retrieve some data from ORCID

This code is modified from https://github.com/HeardLibrary/linked-data/blob/master/publications/orcid/orcid-get-json.ipynb

In [6]:
orcid_iri = 'https://orcid.org/0000-0002-0061-5182'
response = requests.get(orcid_iri, headers={'Accept' : 'application/json'})
data = response.json()
print(json.dumps(data, indent = 2))

{
  "orcid-identifier": {
    "uri": "https://orcid.org/0000-0002-0061-5182",
    "path": "0000-0002-0061-5182",
    "host": "orcid.org"
  },
  "preferences": {
    "locale": "en"
  },
  "history": {
    "creation-method": "WEBSITE",
    "completion-date": null,
    "submission-date": {
      "value": 1384177076903
    },
    "last-modified-date": {
      "value": 1539183360388
    },
    "claimed": true,
    "source": null,
    "deactivation-date": null,
    "verified-email": true,
    "verified-primary-email": true
  },
  "person": {
    "last-modified-date": {
      "value": 1456773714763
    },
    "name": {
      "created-date": {
        "value": 1460756524649
      },
      "last-modified-date": {
        "value": 1460756524649
      },
      "given-names": {
        "value": "Andrew"
      },
      "family-name": {
        "value": "Wesolek"
      },
      "credit-name": null,
      "source": null,
      "visibility": "public",
      "path": "0000-0002-0061-5182"
    },
    "ot

In [7]:
# Extract the ORCID
orcidId = data['orcid-identifier']['path']
#print(orcidId)

# Extract the names
if data['person']['name']['given-names']:  
    givenNames = data['person']['name']['given-names']['value']
else:
    givenNames = ''
if data['person']['name']['family-name']:
    familyName = data['person']['name']['family-name']['value']
else:
    familyName = ''
#print(givenNames, ' ', familyName)

# Check the afilliations and pull the data from the Vanderbilt one
affiliations = data['activities-summary']['employments']['affiliation-group']
#print(json.dumps(affiliations, indent = 2))
for affiliation in affiliations:
    summaries = affiliation['summaries']
    #print(summaries)
    #print()
    for summary in summaries:
        employment = summary['employment-summary']
        #print(json.dumps(employment, indent = 2))
        startDate = ''
        if employment['start-date']:
            if employment['start-date']['year']:
                startDate += employment['start-date']['year']['value']
                startMonth = employment['start-date']['month']
                if startMonth:
                    startDate += '-' + startMonth['value']
                    startDay = employment['start-date']['day']
                    if startDay:
                        startDate += '-' + startDay['value']
        #print('start date: ', startDate)
        endDate = ''
        if employment['end-date']:
            if employment['end-date']['year']:
                endDate += employment['end-date']['year']['value']
                endMonth = employment['end-date']['month']
                if endMonth:
                    endDate += '-' + endMonth['value']
                    endDay = employment['end-date']['day']
                    if endDay:
                        endDate += '-' + endDay['value']
        #print('end date: ', endDate)
        department = employment['department-name']
        # if there is no value for department, set it to empty string
        if not department:
            department = ''
        #print(department)
        if employment['organization']:
            organization = employment['organization']['name']
        #print(organization)
        if 'Vanderbilt University' in organization:
            print(orcidId, givenNames, familyName, startDate, endDate, department, organization)
            #table.append([orcidId, givenNames, familyName, startDate, endDate, department, organization])
        #print(table)

0000-0002-0061-5182 Andrew Wesolek 2018-05-15  Libraries Vanderbilt University


In [20]:
works = data['activities-summary']['works']['group']
for work_container in works[:1]:
    work = work_container['work-summary'][0]
    print(json.dumps(work, indent = 2))
    title = work['title']['title']['value'].strip()
    pub_date = work['publication-date']['year']['value'].strip()
    journal = work['journal-title']['value'].strip()
    #print(title, pub_date, journal)
    citation = title + '. ' + pub_date + '. ' + journal
    print(citation)


{
  "put-code": 49189303,
  "created-date": {
    "value": 1539182715316
  },
  "last-modified-date": {
    "value": 1539182715316
  },
  "source": {
    "source-orcid": {
      "uri": "https://orcid.org/0000-0002-0061-5182",
      "path": "0000-0002-0061-5182",
      "host": "orcid.org"
    },
    "source-client-id": null,
    "source-name": {
      "value": "Andrew Wesolek"
    },
    "assertion-origin-orcid": null,
    "assertion-origin-client-id": null,
    "assertion-origin-name": null
  },
  "title": {
    "title": {
      "value": "What\u2019s in a Container? The Future of the Scholarly Journal "
    },
    "subtitle": null,
    "translated-title": null
  },
  "external-ids": {
    "external-id": []
  },
  "url": null,
  "type": "journal-issue",
  "publication-date": {
    "year": {
      "value": "2016"
    },
    "month": null,
    "day": null
  },
  "journal-title": {
    "value": "Against the Grain: Linking Publisher, Librarians, and Vendors"
  },
  "visibility": "public",
 

# Set up specific data fields

The following cells can be run individually to do different kinds of API interactions.
See https://redcap.vanderbilt.edu/api/help/ for more info. Clicking on the `API Playground` link will let you interactively build the request JSON for different languages.

In [None]:
# Retrieve some records
data_fields = {
    'content': 'record',
    'action': 'export',
    'type': 'flat',
    'csvDelimiter': '',
    'records[0]': '1',
    'fields[0]': 'record_id',
    'fields[1]': 'orcid',
    'fields[2]': 'name',
    'fields[3]': 'doi',
    'fields[4]': 'citation',
    'fields[5]': 'type',
    'fields[6]': 'form_1_complete',
    'forms[0]': 'form_1',   
    'rawOrLabel': 'raw',
    'rawOrLabelHeaders': 'raw',
    'exportCheckboxLabel': 'false',
    'exportSurveyFields': 'false',
    'exportDataAccessGroups': 'false'
}


In [None]:
# Successful test of writing
# You can send multiple records at once, but the 
# hashlib.sha1().hexdigest()[:16]
# seems to result in duplicates if you do it for two records. Not sure why...

fields =  [
    {
    'record_id': hashlib.sha1().hexdigest()[:16],
    "orcid": "1234-2222-7777-2678",
    "name": "Jing Ping",
    "doi": "10.1234/j.mol.stuff.123",
    "citation": "J. Zhang. 2021. My better article. J.Whatever 2:12-13",
    "type": " Journal article",
    "form_1_complete": "2"  
},
{
    'record_id': '1000',
    "orcid": "1234-2222-333-2678",
    "name": "Xiang Zeng",
    "doi": "10.1234/j.mol.stuff.365",
    "citation": "J. Zhang. 2021. My best article. J.Whatever 2:12-13",
    "type": " Journal article",
    "form_1_complete": "2"  
}
]

data_fields = {
    'content': 'record',
    'type': 'flat',
    'data': json.dumps(fields)
}


In [21]:
# Test with data from ORCID
fields =  [
    {
    'record_id': hashlib.sha1().hexdigest()[:16],
    "orcid": orcidId,
    "name": givenNames + ' ' + familyName,
    "doi": "10.1234/j.mol.stuff.123",
    "citation": givenNames + ' ' + familyName + ' ' + citation,
    "type": " Journal article",
    "form_1_complete": "2"  
}
]

data_fields = {
    'content': 'record',
    'type': 'flat',
    'data': json.dumps(fields)
}

In [None]:
# Get project information
data_fields = {'content': 'project'}


In [None]:
# Get field names from forms
data_fields = {'content': 'exportFieldNames'}


# Main script

Run after defining `data_fields` in one of the preceeding cells

In [22]:
status, data = retrieve_api_data(token, data_fields)
if status == 200:
    print(json.dumps(data, indent = 2))
else:
    print('no data retrieved')


status: 200
{
  "count": 1
}
