In [32]:
# Loading Packages

import requests
import pandas as pd
import json
import urllib.parse

#### Elsevier API (Scopus Search)

In [2]:
# API Key and token
elsevier_apiKey= '4fec016525bc7cf8febea7c81db45976'
elsevier_insttoken = '03a9e22c2f85af0fcdd7f9415998446b'

# Define the base URL of the API endpoint
search_url = 'http://api.elsevier.com/content/search/scopus'

# Define your search query
query = 'TITLE-ABS-KEY(GIS)'  # returns documents where the terms appear in the title, keywords, or abstract.

# Cursor for an intial search
cursor = "*" # Start with "*" to indicate the beginning


In [10]:
def elsevier_api(search_url, query, cursor):
    headers={"Accept":"application/json", "X-ELS-APIKey": elsevier_apiKey,"X-ELS-Insttoken": elsevier_insttoken}
    response = requests.get(f"{search_url}?query={query}&date=2013-2023&cursor={cursor}&view=complete",headers=headers)
    # Check if the response status code is 200 (OK)
    if response.status_code == 200:
        try:
            # Attempt to decode the JSON response
            return response.json()
        except json.JSONDecodeError:
            print("Error: Unable to decode JSON response")
            return None
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None

In [11]:
# Test elsevier_api function

elsevier_api(search_url, query, cursor)

{'search-results': {'opensearch:totalResults': '84826',
  'opensearch:itemsPerPage': '25',
  'opensearch:Query': {'@role': 'request',
   '@searchTerms': 'TITLE-ABS-KEY(GIS)',
   '@startPage': ''},
  'cursor': {'@current': '*', '@next': 'AoJWw5ZNMjItczIuMC04NTE4MTU3MDc3NA=='},
  'link': [{'@_fa': 'true',
    '@ref': 'self',
    '@href': 'https://api.elsevier.com/content/search/scopus?cursor=*&count=25&query=TITLE-ABS-KEY%28GIS%29&date=2013-2023&view=complete',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'first',
    '@href': 'https://api.elsevier.com/content/search/scopus?cursor=*&count=25&query=TITLE-ABS-KEY%28GIS%29&date=2013-2023&view=complete',
    '@type': 'application/json'},
   {'@_fa': 'true',
    '@ref': 'next',
    '@href': 'https://api.elsevier.com/content/search/scopus?cursor=AoJWw5ZNMjItczIuMC04NTE4MTU3MDc3NA%3D%3D&count=25&query=TITLE-ABS-KEY%28GIS%29&date=2013-2023&view=complete',
    '@type': 'application/json'}],
  'entry': [{'@_fa': 'true',
    'l

In [33]:
# Code to extract relevant information from search results

def elsevier_scroll(search_url, query):
    all_results = []
    cursor = "AoJR/oJNMjItczIuMC04NTA1ODM4MjQwMQ==" # Start with "*" to indicate the beginning
    iteration_count = 0
    
    while cursor:
        iteration_count += 1
        response = elsevier_api(search_url, query, cursor)
        
        if response is None:
            break

        entries = response.get('search-results', {}).get('entry', [])

        if not entries:
            break  # Exit the loop if there are no more results

        cursor = response.get('search-results', {}).get('cursor', {}).get('@next') # Get next cursor to move to next page
        # decoded_cursor = urllib.parse.unquote(cursor)


        print(f"Iteration: {iteration_count}, Cursor: {cursor}")  # Print cursor value

        for entry in entries:
            extracted_info = extract_info(entry)
            all_results.append(extracted_info)

        print(f"Progress: {len(all_results)} items retrieved")
    
    return all_results

def extract_info(entry):
    affiliation_country = entry.get("affiliation", [{}])[0].get("affiliation-country", "")
    return {
        "ISSN": entry.get("prism:issn", ""), # ISSN
        "Online ISSN": entry.get("prism:eIssn", ""), # Electronic ISSN
        "DOI": entry.get("prism:doi", ""), # DOI
        "Title": entry.get("dc:title", ""), # Article Title
        "First Author": entry.get("dc:creator", ""), # Name of first author
        "Location": affiliation_country,  # Affiliation country of first author
        "Publication Date": entry.get("prism:coverDate", ""), # Publication date 
        "Publication Name": entry.get("prism:publicationName", "") # Name of publisher
    }

In [34]:
elsevier_results = elsevier_scroll(search_url, query)

Iteration: 1, Cursor: AoJX+YJNMjItczIuMC04NTA3Nzc5MDY5MQ==
Progress: 25 items retrieved
Error: 400, {"service-error":{"status":{"statusCode":"INVALID_INPUT","statusText":"Invalid Input (cursor)"}}}


In [35]:
# Converting all_results to a dataframe

elsevier = pd.DataFrame(elsevier_results)
elsevier.head(5)

Unnamed: 0,ISSN,Online ISSN,DOI,Title,First Author,Location,Publication Date,Publication Name
0,11109823,20902476,10.1016/j.ejrs.2018.01.008,Spatial assessment of urban sprawl in Arua Mun...,Abudu D.,Uganda,2019-12-01,Egyptian Journal of Remote Sensing and Space S...
1,11109823,20902476,10.1016/j.ejrs.2018.03.002,Assessment of land degradation using comprehen...,AbdelRahman M.A.E.,India,2019-12-01,Egyptian Journal of Remote Sensing and Space S...
2,03432521,15729893,10.1007/s10708-018-9938-4,Diffusion of GIS in Uganda: the role of change...,Eria S.,United States,2019-12-01,GeoJournal
3,03432521,15729893,10.1007/s10708-018-9933-9,Identification of suitable landfill site based...,Dar S.N.,India,2019-12-01,GeoJournal
4,1874463X,18744621,10.1007/s12061-018-9272-8,"Geographical Relationship between Ungulates, H...",Riccioli F.,Lebanon,2019-12-01,Applied Spatial Analysis and Policy


In [36]:
# Export dataframe as a json file

elsevier.to_json('elsevier_data.json', orient='records')


In [40]:
# Read Elsevier .json file

with open('elsevier_data.json', 'r') as f:
    elsevier_data = json.load(f)

# Converting json to dataframe

elsevier_df = pd.DataFrame(elsevier_data)
elsevier_df.head(5)

Unnamed: 0,ISSN,Online ISSN,DOI,Title,First Author,Location,Publication Date,Publication Name
0,18553362.0,22326022.0,,Assessment of Coastal Water Quality by Testing...,Ezzghari Y.,Morocco,2023-12-31,International Journal of Euro-Mediterranean St...
1,,,10.1515/9783111317779-004,Menocchio mapped: Italian microhistory and the...,Madden A.,United States,2023-12-31,Zoomland: Exploring Scale in Digital History a...
2,9706011.0,,10.21077/ijf.2023.70.4.134234-07,Assessing land suitability for sustainable aqu...,Kumar T.,India,2023-12-31,Indian Journal of Fisheries
3,813834.0,,10.23858/SA/75.2023.2.3374,Attempts at spatial analyses of data from the ...,Solecki R.,Poland,2023-12-31,Sprawozdania Archeologiczne
4,,15785157.0,10.21138/GF.825,MEASUREMENT OF SPATIAL INEQUITY IN THE ACCESSI...,Bosisio A.,Spain,2023-12-31,GeoFocus


In [41]:
# Unique Values

unique_issn = elsevier_df['ISSN'].unique()
print(f"The total number of unique ISSN values is {len(unique_issn)}")

unique_doi = elsevier_df['DOI'].unique()
print(f"The total number of unique DOI values is {len(unique_doi)}")

unique_title = elsevier_df['Title'].unique()
print(f"The total number of unique Title values is {len(unique_title)}")

The total number of unique ISSN values is 4015
The total number of unique DOI values is 34669
The total number of unique Title values is 36804
