In [None]:
pip install pybliometrics

In [1]:
# library configuration: https://pybliometrics.readthedocs.io/en/latest/configuration.html
# The first time init() is run there will be a prompt to input the key and instoken
# API Key: 4fec016525bc7cf8febea7c81db45976
# Instoken: 03a9e22c2f85af0fcdd7f9415998446b


import pybliometrics

pybliometrics.scopus.init()


In [2]:
# Line of code to check where the configuration file is saved

pybliometrics.scopus.utils.constants.CONFIG_FILE

WindowsPath('C:/Users/FSzeliga/.config/pybliometrics.cfg')

In [69]:
# Import pacakages

from pybliometrics.scopus import ScopusSearch
import pandas as pd
import json

In [4]:
# set of parameters

min_year = 2013
max_year = 2023

#### Applied Geography (APG)

In [5]:
# Query set up and Scopus search

apg_query = f'EXACTSRCTITLE(Applied+AND+Geography) AND PUBYEAR > {min_year-1} AND PUBYEAR < {max_year+1}' 

s1 = ScopusSearch(apg_query, verbose=True, download=True)


In [6]:
# Checking length of results

s1.get_results_size()

2193

In [7]:
# Conversion to dataframe

apg_df = pd.DataFrame(s1.results)

In [8]:
# Unique Values

unique_doi = apg_df['doi'].unique()
print(f"The total number of unique DOI values is {len(unique_doi)}")

unique_journal = apg_df['publicationName'].unique()
print(f"The total number of unique journal titles {len(unique_journal)}")

The total number of unique DOI values is 2193
The total number of unique journal titles 3


In [9]:
# Print Journal Unique Values
print(unique_journal)

['Applied Geography' 'Papers in Applied Geography'
 'The Black Sea from Paleogeography to Modern Navigation: Applied Maritime Geography and Oceanography']


In [10]:
# Filter the DataFrame to keep only rows where 'journal_title' is 'Applied Geography'
apg_df = apg_df[apg_df['publicationName'] == 'Applied Geography']

#### Cartography and Geographic Information Science (CaGIS)

In [11]:
# Query set up and Scopus search

cagis_query = f'EXACTSRCTITLE(Cartography+AND+Geographic+AND+Information+AND+Science) AND PUBYEAR > {min_year-1} AND PUBYEAR < {max_year+1}' 

s2 = ScopusSearch(cagis_query, verbose=True, download=True)

In [12]:
# Checking length of results

s2.get_results_size()

448

In [13]:
# Conversion to dataframe

cagis_df = pd.DataFrame(s2.results)

In [14]:
# Unique Values

unique_doi = cagis_df['doi'].unique()
print(f"The total number of unique DOI values is {len(unique_doi)}")

unique_journal = cagis_df['publicationName'].unique()
print(f"The total number of unique journal titles {len(unique_journal)}")

The total number of unique DOI values is 448
The total number of unique journal titles 1


In [15]:
# Print Journal Unique Values
print(unique_journal)

['Cartography and Geographic Information Science']


#### GIScience and Remote Sensing (G&RS)

In [16]:
# Query set up and Scopus search

gandrs_query = f'EXACTSRCTITLE(GIScience+AND+Remote+AND+Sensing) AND PUBYEAR > {min_year-1} AND PUBYEAR < {max_year+1}' 

s3 = ScopusSearch(gandrs_query, verbose=True, download=True)

In [17]:
# Checking length of results

s3.get_results_size()

700

In [18]:
# Conversion to dataframe

gandrs_df = pd.DataFrame(s3.results)

In [19]:
# Unique Values

unique_doi = gandrs_df['doi'].unique()
print(f"The total number of unique DOI values is {len(unique_doi)}")

unique_journal = gandrs_df['publicationName'].unique()
print(f"The total number of unique journal titles {len(unique_journal)}")

The total number of unique DOI values is 700
The total number of unique journal titles 2


In [20]:
# Print Journal Unique Values
print(unique_journal)

['GIScience and Remote Sensing'
 'Remote Sensing and GIScience: Challenges and Future Directions']


In [21]:
# Filter the DataFrame to keep only rows where 'journal_title' is 'GIScience and Remote Sensing'
gandrs_df = gandrs_df[gandrs_df['publicationName'] == 'GIScience and Remote Sensing']

#### International Journal of Digital Earth (IJDE)

In [22]:
# Query set up and Scopus search

ijde_query = f'EXACTSRCTITLE(International+AND+Journal+AND+of+AND+Digital+AND+Earth) AND PUBYEAR > {min_year-1} AND PUBYEAR < {max_year+1}' 

s4 = ScopusSearch(ijde_query, verbose=True, download=True)

In [23]:
# Checking length of results

s4.get_results_size()

992

In [24]:
# Conversion to dataframe

ijde_df = pd.DataFrame(s4.results)

In [25]:
# Unique Values

unique_doi = ijde_df['doi'].unique()
print(f"The total number of unique DOI values is {len(unique_doi)}")

unique_journal = ijde_df['publicationName'].unique()
print(f"The total number of unique journal titles {len(unique_journal)}")

The total number of unique DOI values is 992
The total number of unique journal titles 1


In [26]:
# Print Journal Unique Values
print(unique_journal)

['International Journal of Digital Earth']


#### ISPRS International Journal of Geo-information (IJGI)

In [27]:
# Query set up and Scopus search

ijgi_query = f'EXACTSRCTITLE(ISPRS+AND+International+AND+Journal+AND+of+AND+Geo-information) AND PUBYEAR > {min_year-1} AND PUBYEAR < {max_year+1}' 

s5 = ScopusSearch(ijgi_query, verbose=True, download=True)

In [28]:
# Checking length of results

s5.get_results_size()

4690

In [29]:
# Conversion to dataframe

ijgi_df = pd.DataFrame(s5.results)

In [30]:
# Unique Values

unique_doi = ijgi_df['doi'].unique()
print(f"The total number of unique DOI values is {len(unique_doi)}")

unique_journal = ijgi_df['publicationName'].unique()
print(f"The total number of unique journal titles {len(unique_journal)}")

The total number of unique DOI values is 4690
The total number of unique journal titles 1


In [31]:
# Print Journal Unique Values
print(unique_journal)

['ISPRS International Journal of Geo-Information']


#### International Journal of Geographical Information Science (IJGIS)

In [32]:
# Query set up and Scopus search

ijgis_query = f'EXACTSRCTITLE(International+AND+Journal+AND+of+AND+Geographical+AND+Information+AND+Science) AND PUBYEAR > {min_year-1} AND PUBYEAR < {max_year+1}' 

s6 = ScopusSearch(ijgis_query, verbose=True, download=True)

In [33]:
# Checking length of results

s6.get_results_size()

1264

In [34]:
# Conversion to dataframe

ijgis_df = pd.DataFrame(s6.results)

In [35]:
# Unique Values

unique_doi = ijgis_df['doi'].unique()
print(f"The total number of unique DOI values is {len(unique_doi)}")

unique_journal = ijgis_df['publicationName'].unique()
print(f"The total number of unique journal titles {len(unique_journal)}")

The total number of unique DOI values is 1264
The total number of unique journal titles 1


In [36]:
# Print Journal Unique Values
print(unique_journal)

['International Journal of Geographical Information Science']


#### Journal of Spatial Science (JSS)

In [37]:
# Query set up and Scopus search

jss_query = f'EXACTSRCTITLE(Journal+AND+of+AND+Spatial+AND+Science) AND PUBYEAR > {min_year-1} AND PUBYEAR < {max_year+1}' 

s7 = ScopusSearch(jss_query, verbose=True, download=True)

In [38]:
# Checking length of results

s7.get_results_size()

493

In [39]:
# Conversion to dataframe

jss_df = pd.DataFrame(s7.results)

In [40]:
# Unique Values

unique_doi = jss_df['doi'].unique()
print(f"The total number of unique DOI values is {len(unique_doi)}")

unique_journal = jss_df['publicationName'].unique()
print(f"The total number of unique journal titles {len(unique_journal)}")

The total number of unique DOI values is 493
The total number of unique journal titles 2


In [41]:
# Print Journal Unique Values
print(unique_journal)

['Journal of Spatial Information Science' 'Journal of Spatial Science']


In [42]:
# Filter the DataFrame to keep only rows where 'journal_title' is 'Journal of Spatial Science'
jss_df = jss_df[jss_df['publicationName'] == 'Journal of Spatial Science']

#### ISPRS Journal of Photogrammetry and Remote Sensing (P&RS)

In [43]:
# Query set up and Scopus search

pandrs_query = f'EXACTSRCTITLE(ISPRS+AND+Journal+AND+of+AND+Photogrammetry+AND+Remote+AND+Sensing) AND PUBYEAR > {min_year-1} AND PUBYEAR < {max_year+1}' 

s8 = ScopusSearch(pandrs_query, verbose=True, download=True)

In [44]:
# Checking length of results

s8.get_results_size()

2481

In [45]:
# Conversion to dataframe

pandrs_df = pd.DataFrame(s8.results)

In [46]:
# Unique Values

unique_doi = pandrs_df['doi'].unique()
print(f"The total number of unique DOI values is {len(unique_doi)}")

unique_journal = pandrs_df['publicationName'].unique()
print(f"The total number of unique journal titles {len(unique_journal)}")

The total number of unique DOI values is 2481
The total number of unique journal titles 2


In [47]:
# Print Journal Unique Values
print(unique_journal)

['ISPRS Open Journal of Photogrammetry and Remote Sensing'
 'ISPRS Journal of Photogrammetry and Remote Sensing']


In [48]:
# Filter the DataFrame to keep only rows where 'journal_title' is 'ISPRS Journal of Photogrammetry and Remote Sensing'
pandrs_df = pandrs_df[pandrs_df['publicationName'] == 'ISPRS Journal of Photogrammetry and Remote Sensing']

#### Transactions in GIS (TGIS)

In [49]:
# Query set up and Scopus search

tgis_query = f'EXACTSRCTITLE(Transactions+AND+in+AND+GIS) AND PUBYEAR > {min_year-1} AND PUBYEAR < {max_year+1}' 

s9 = ScopusSearch(tgis_query, verbose=True, download=True)

In [50]:
# Checking length of results

s9.get_results_size()

962

In [51]:
# Conversion to dataframe

tgis_df = pd.DataFrame(s9.results)

In [52]:
# Unique Values

unique_doi = tgis_df['doi'].unique()
print(f"The total number of unique DOI values is {len(unique_doi)}")

unique_journal = tgis_df['publicationName'].unique()
print(f"The total number of unique journal titles {len(unique_journal)}")

The total number of unique DOI values is 962
The total number of unique journal titles 1


In [53]:
# Print Journal Unique Values
print(unique_journal)

['Transactions in GIS']


#### ACM Transactions on Spatial Algorithms and Systems (TSAS)

In [54]:
# Query set up and Scopus search

tsas_query = f'EXACTSRCTITLE(ACM+AND+Transactions+AND+on+AND+Spatial+AND+Algorithms+AND+Systems) AND PUBYEAR > {min_year-1} AND PUBYEAR < {max_year+1}' 

s10 = ScopusSearch(tsas_query, verbose=True, download=True)

In [55]:
# Checking length of results

s10.get_results_size()

190

In [56]:
# Conversion to dataframe

tsas_df = pd.DataFrame(s10.results)

In [57]:
# Unique Values

unique_doi = tsas_df['doi'].unique()
print(f"The total number of unique DOI values is {len(unique_doi)}")

unique_journal = tsas_df['publicationName'].unique()
print(f"The total number of unique journal titles {len(unique_journal)}")

The total number of unique DOI values is 190
The total number of unique journal titles 1


In [58]:
# Print Journal Unique Values
print(unique_journal)

['ACM Transactions on Spatial Algorithms and Systems']


#### Combination of dataframes and export to json

In [59]:
# Concatenation of journals dataframes
scopus_api_df = pd.concat([apg_df, cagis_df, gandrs_df, ijde_df, ijgi_df, ijgis_df, jss_df, pandrs_df, tgis_df, tsas_df], ignore_index=True)
drop_columns = ['eid', 'pii', 'pubmed_id', 'afid', 'subtype',
           'author_ids', 'author_afids', 'source_id', 'eIssn', 'aggregationType','volume', 
           'issueIdentifier', 'article_number', 'pageRange', 'citedby_count', 'openaccess', 'freetoread', 
           'freetoreadLabel', 'fund_acr', 'fund_no', 'fund_sponsor']

scopus_api_df = scopus_api_df.drop(columns = drop_columns)

In [60]:
scopus_api_df.head(5)

Unnamed: 0,doi,title,subtypeDescription,creator,affilname,affiliation_city,affiliation_country,author_count,author_names,coverDate,coverDisplayDate,publicationName,issn,description,authkeywords
0,10.1016/j.apgeog.2023.103145,Trapped in dilemma: Inverted N-shaped EKC evid...,Article,Huang J.,Renmin University of China;The Hong Kong Polyt...,Beijing;Hong Kong,China;Hong Kong,6,"Huang, Jing;Zhang, Dong;Zhang, Zhengfeng;Kong,...",2023-12-01,December 2023,Applied Geography,1436228,The environmental Kuznets relationship between...,Ecological land | Economic growth | Inverted N...
1,10.1016/j.apgeog.2023.103136,Finding home: Participatory geospatial mapping...,Article,Al-haddad R.E.,Independent Researcher;DevRA International,Ukhiya;Tucson,Bangladesh;United States,2,"Al-haddad, Robin Elizabeth;Rakshit, Pradipto V...",2023-12-01,December 2023,Applied Geography,1436228,Despite decades of using community-based parti...,Geo-positioning | Geographic oppression | Part...
2,10.1016/j.apgeog.2023.103122,Diffusion of the Internet-of-Things (IoT): A f...,Article,Jamme H.T.,Arizona State University,Tempe,United States,2,"Jamme, Huê Tâm;Connor, Dylan S.",2023-12-01,December 2023,Applied Geography,1436228,Internet-of-Things (IoT) innovations are recon...,Community change | France | Internet of Things...
3,10.1016/j.apgeog.2023.103142,A multi-level framework for assessing the spat...,Article,Dong J.,Capital Normal University,Beijing,China,5,"Dong, Junwu;Wang, Yanhui;Yang, Yang;Luo, Xiaoy...",2023-12-01,December 2023,Applied Geography,1436228,Rapid urbanization is reducing the limited urb...,Accessibility | Public spaces | SDG 11.7.1 | S...
4,10.1016/j.apgeog.2023.103140,Urbanization shifts freshwater service flows i...,Article,Shen W.,Sun Yat-Sen University,Guangzhou,China,2,"Shen, Wenting;Liu, Zhenhuan",2023-12-01,December 2023,Applied Geography,1436228,Freshwater ecosystem services supply and deman...,Freshwater ecosystem service flow | Social-eco...


In [61]:
# Checking lenght of df

len(scopus_api_df)

13932

In [62]:
# Check for missing values in each column
print(scopus_api_df.isnull().sum())

doi                       0
title                     0
subtypeDescription        0
creator                  54
affilname               110
affiliation_city        110
affiliation_country     110
author_count             54
author_names             54
coverDate                 0
coverDisplayDate          0
publicationName           0
issn                   4690
description             303
authkeywords           1299
dtype: int64


In [71]:
# Export to JSON
scopus_api_df.to_json('scopus_api_top10.json', orient='records', lines=True)

#### Read in json file

In [75]:
# Read Scopus .json file
scopus_api_df = pd.read_json('scopus_api_top10.json', orient='records', lines=True)

# Display the first 5 rows of the DataFrame
scopus_api_df.head(5)

Unnamed: 0,doi,title,subtypeDescription,creator,affilname,affiliation_city,affiliation_country,author_count,author_names,coverDate,coverDisplayDate,publicationName,issn,description,authkeywords
0,10.1016/j.apgeog.2023.103145,Trapped in dilemma: Inverted N-shaped EKC evid...,Article,Huang J.,Renmin University of China;The Hong Kong Polyt...,Beijing;Hong Kong,China;Hong Kong,6.0,"Huang, Jing;Zhang, Dong;Zhang, Zhengfeng;Kong,...",2023-12-01,December 2023,Applied Geography,1436228.0,The environmental Kuznets relationship between...,Ecological land | Economic growth | Inverted N...
1,10.1016/j.apgeog.2023.103136,Finding home: Participatory geospatial mapping...,Article,Al-haddad R.E.,Independent Researcher;DevRA International,Ukhiya;Tucson,Bangladesh;United States,2.0,"Al-haddad, Robin Elizabeth;Rakshit, Pradipto V...",2023-12-01,December 2023,Applied Geography,1436228.0,Despite decades of using community-based parti...,Geo-positioning | Geographic oppression | Part...
2,10.1016/j.apgeog.2023.103122,Diffusion of the Internet-of-Things (IoT): A f...,Article,Jamme H.T.,Arizona State University,Tempe,United States,2.0,"Jamme, Huê Tâm;Connor, Dylan S.",2023-12-01,December 2023,Applied Geography,1436228.0,Internet-of-Things (IoT) innovations are recon...,Community change | France | Internet of Things...
3,10.1016/j.apgeog.2023.103142,A multi-level framework for assessing the spat...,Article,Dong J.,Capital Normal University,Beijing,China,5.0,"Dong, Junwu;Wang, Yanhui;Yang, Yang;Luo, Xiaoy...",2023-12-01,December 2023,Applied Geography,1436228.0,Rapid urbanization is reducing the limited urb...,Accessibility | Public spaces | SDG 11.7.1 | S...
4,10.1016/j.apgeog.2023.103140,Urbanization shifts freshwater service flows i...,Article,Shen W.,Sun Yat-Sen University,Guangzhou,China,2.0,"Shen, Wenting;Liu, Zhenhuan",2023-12-01,December 2023,Applied Geography,1436228.0,Freshwater ecosystem services supply and deman...,Freshwater ecosystem service flow | Social-eco...
