# SDG query execution
## Configuration

In [None]:
import os
import requests

from model.AllResponses import AllResponses
from model.Unpaywall import Unpaywall
from utilities import utils

from elasticsearch import Elasticsearch
es = Elasticsearch()

location = ''
elsevier_url = 'https://api.elsevier.com'
altmetric_url = 'https://api.altmetric.com/v1'
altmetric_key = ''
scopus_api_key = ''
unpaywall_api_url ='https://api.unpaywall.org/my/request' 
libintel_user_email = 'john.smith@example.com'
results_per_page = 100


# prepare location for saving data
if not os.path.exists(location):
    os.makedirs(location)



## Define the search

In [None]:
search = {}
search['identifier'] = 'search1'
search['author_name'] = ''
search['topic'] = ''
search['startyear'] = '2010'
search['endyear'] = '2017'
search['title'] = ''
search['subject'] = ''
search['author_id'] = '16643594900'
search['affiliation_id'] = ''


### Create directory to save data

In [None]:
out_dir = location + search['identifier'] + '-out\\'
print(out_dir)
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

## Define Function to transform a search object into a Scopus query:

In [None]:
def convert_search_to_scopus_search_string(search):
    search_string = ""
    if search["author_name"]:
        if search_string != "":
            search_string += " AND "
        search_string += "AUTH(" + search["author"] + ")"
    if search["topic"]:
        if search_string != "":
            search_string += " AND "
        search_string += "TITLE-ABS-KEY(" + search["topic"] + ")"
    if search["startyear"]:
        if search_string != "":
            search_string += " AND "
        search_string += "PUBYEAR AFT " + search["startyear"]
    if search["endyear"]:
        if search_string != "":
            search_string += " AND "
        search_string += "PUBYEAR BEF " + search["endyear"]
    if search["title"]:
        if search_string != "":
            search_string += " AND "
        search_string += "TITLE(" + search["title"] + ")"
    if search["subject"]:
        if search_string != "":
            search_string += " AND "
        search_string += "SUBJAREA(" + search["subject"] + ")"
    if search["author_id"]:
        if search_string != "":
            search_string += " AND "
        search_string += "AU-ID(" + search["author_id"] + ")"
    if search["affiliation_id"]:
        if search_string != "":
            search_string += " AND "
        search_string += 'AF-ID(' + search["affiliation_id"] + ')'
    return search_string

## Create search string
The above conversion funtion is used to create a viable search string for Scopus

In [None]:
search_string = convert_search_to_scopus_search_string(search)
print(search_string)

The url to be requested is performed by replacing the whitespaces with '+' and adding the scopus API key. The number of results is set to 1 as this calls is used only to retrieve the total number of results

In [None]:
url = elsevier_url + '/content/search/scopus?count=1&query=' + search_string.replace(" ","+") + '&apiKey=' + scopus_api_key
print("querying URL: " + url)

In [None]:
r = requests.get(url)

In [None]:
 if r.status_code == 200:
        scopus_first_response = r.json()
        number_of_results = int(scopus_first_response['search-results']['opensearch:totalResults'])
        print('total number of publications for this query: ' + str(number_of_results))

## Collect the individual data
For the number of publications make N calls to the API, where N is given by the number of total results divided by the number of results per page (paramter results_per_page) as defined above. 

For each call the start number is constructed and the query url composed. The HTTP call is performed and if the call was successfull (response code 200) the individual documents are added as JSON-objects to the list 'publication_set'.

__if this list becomes to large, we need to do the extension, processing, and storage within this for loop!__

In [None]:
publication_set = []
if number_of_results != 0:
    number_of_calls = number_of_results // results_per_page + 1
    for i in range(number_of_calls):
        start = i * results_per_page
        url = elsevier_url + '/content/search/scopus?start=' + str(start) + '&count=' + str(results_per_page) + '&query=' + search_string + '&apiKey=' + scopus_api_key
        r = requests.get(url)

        # if results are obtained create a ScopusResponse object with all the necessary identifiers accessible.
        if r.status_code == 200:
            for document in r.json()['search-results']['entry']:
                publication_set.append(document)

## Extension of data
For each of these publications several steps are performed to extend the data.

### Call the whole Scopus record
The extended Scopus is retrieved by calling the corresponding retrieval API. The obtained full record is attached as new node 'extended-data' to the JSON-object

In [None]:
for document in publication_set:
    if document['prism:doi'] is not None:
        url = elsevier_url + '/content/abstract/doi/' + document['prism:doi'] + '?apiKey=' + scopus_api_key + '&httpAccept=application%2Fjson'
        r = requests.get(url)
        print("queryied URL: " + url + " with status code " + str(r.status_code))
        if r.status_code == 200:
            response = r.json()
            document['extended_data'] = response

## Call the Unpaywall-API
Similarly, the Upaywall API is called to retrieve the Open-Access information. The information is stored in the node 'unpaywall-response'.

In [None]:
for document in publication_set:
    if document['prism:doi'] is not None:
        url = unpaywall_api_url + '/' + document['prism:doi'] + "?email=" + libintel_user_email
        r = requests.get(url)
        print("queryied URL: " + url + " with status code " + str(r.status_code))
        if r.status_code == 200:
            document['unpaywall_response'] = r.json()

## Call the Scopus citation API
Similarly, the Scopus citation API is called to retrieve the extended citation information. The information is stored in the node 'citation-response'.

In [None]:
for document in publication_set:
    if document['prism:doi'] is not None:
        url = elsevier_url + '/content/abstract/citations?doi=' + document['prism:doi'] + '&apiKey=' + scopus_api_key
        r = requests.get(url)
        print("queryied URL: " + url + " with status code " + str(r.status_code))
        if r.status_code == 200:
            document['citation_response'] = r.json()

## Call the altmetric API
Similarly, the Altmetric API is called to retrieve the web impact information. The information is stored in the node 'altmetric-response'.

In [None]:
for document in publication_set:
    if document['prism:doi'] is not None:
        url = altmetric_url + '/doi/' + document['prism:doi'] + '?key=' + altmetric_key
        r = requests.get(url)
        print("queryied URL: " + url + " with status code " + str(r.status_code))
        if r.status_code == 200:
            document['altmetric_response'] = r.json()

## Send To Index
The individual documents are stored within an index to allow for queries on this index

In [None]:
for document in publication_set:
    res = es.index(search['identifier', 'full-data', document)
    print('saved to index ' + query_id)
    print(res['result'])