In [1]:
import pandas as pd
import requests
import json
import re
from urllib.parse import quote

In [2]:
url = 'https://api.elsevier.com/content/search/scopus'
with open('config.json', 'r') as f:
    API_KEY = json.load(f)
query = '( "Machine health"  OR  "Anomaly detection"  OR  "Deterioration" )  AND  ( "Machine learning"  OR  "Clustering" )  AND  ( "Real time"  OR  "real-time"  OR  "Real-Time" )'

In [3]:
class scopus_df:
    def __init__(self):
        self.columns = ['Authors', 'Title', 'Year', 'Cited By', 'Affiliations', 'Author Keywords', 'Source title']
        self.csv = pd.DataFrame(columns=self.columns)

    def get_authors(self, publication: dict) -> list:
        return [author['authname'] for author in publication['author']]

    def get_affiliations(self, publication: dict) -> list:
        return [aff['affilname'].replace(',', ';') for aff in publication['affiliation']]

    def append(self, publication: dict) -> None:
        try:
            authors = ','.join(self.get_authors(publication))
        except KeyError as e:
            return None
        title = publication['dc:title']
        year = re.findall(r'([\d]{4})', publication['prism:coverDisplayDate'])[0]
        source_title = publication['prism:publicationName']
        cites = publication['citedby-count']
        try:
            affiliations = ','.join(self.get_affiliations(publication))
        except KeyError as e:
            affiliations = ''
        try:
            author_kw = ','.join([ii.lstrip().strip() for ii in publication['authkeywords'].split('|')])
        except KeyError as e:
            author_kw = ''
        self.csv = self.csv.append(pd.DataFrame([[authors, title, year, cites, affiliations, author_kw, source_title]], columns=self.columns), ignore_index=True)

def query_to_scopus(url: str, query: str, api: str, start_item: int = 0) -> list:
    return requests.get(url,
                        headers={'Accept': 'application/json', 'X-ELS-APIKey': api},
                        params={'query': query, 'view': 'COMPLETE', 'start': start_item}).json()
def create_df_from_scopus(url: str, query: str, api: str, num_items: int) -> pd.DataFrame():
    assert num_items > 0
    start_item = 0
    publications = scopus_df()
    while start_item < num_items:
        response = query_to_scopus(url, query_parsed, api, start_item)
        try:
            batch = response['search-results']['entry']
        except KeyError as e:
            break
        for item in batch:
            publications.append(item)
            start_item += 1
    return publications.csv
    

In [4]:
query_parsed = f'TITLE-ABS-KEY({query})' # TODO: Save a history of all the querys with the number of results
api = API_KEY['api-key']
num_items = int(query_to_scopus(url, query_parsed, api)['search-results']['opensearch:totalResults'])
num_items

491

In [5]:
csv = create_df_from_scopus(url, query, api, num_items)

In [6]:
csv

Unnamed: 0,Authors,Title,Year,Cited By,Affiliations,Author Keywords,Source title
0,"Bezerra C.,Costa B.,Guedes L.,Angelov P.",An evolving approach to data streams clusterin...,2020,0,School of Computing and Communications; Lancas...,"Anomaly detection,Data stream,Eccentricity,Onl...",Information Sciences
1,"Chellammal P.,Sheba Kezia Malarchelvi P.",Real-time anomaly detection using parallelized...,2020,1,J J College of Engineering and Technology,"concept drift,data imbalance,ensemble modeling...",Concurrency Computation
2,"Hoque X.,Sharma S.",Ensembled Deep Learning Approach for Maritime ...,2020,0,Institute of Information Technology and Manage...,"AIS,Deep learning,LSTM,Maritime anomaly detection",Lecture Notes in Electrical Engineering
3,"Uddin V.,Rizvi S.,Hashmani M.,Jameel S.,Ansari T.",A study of deterioration in classification mod...,2020,0,"Hamdard University,Universiti Teknologi Petronas","Big Data,Online Classification,Real-time machi...",Advances in Intelligent Systems and Computing
4,"Gurina E.,Klyuchnikov N.,Zaytsev A.,Romanenkov...",Application of machine learning to accidents d...,2020,1,"Gazprom Neft PJSC,Skolkovo Institute of Scienc...","Anomaly detection,Classification,Directional d...",Journal of Petroleum Science and Engineering
...,...,...,...,...,...,...,...
455,Adair K.,Extracting knowledge from temporal clusters fo...,1999,1,Los Alamos National Laboratory,,Proceedings of the International Joint Confere...
456,Waite N.,A real-time system-adapted anomaly detector,1999,2,,"Adaptive,Anomaly detection,Artificial intellig...",Information Sciences
457,"McGregor R.,Nguyen M.,Robinson I.,Weisberg A.,...",Target detection using HSI systems: An update ...,1998,1,Northrop Grumman corporation,"Anomaly detection,Clustering,Hyperspectral ima...",Proceedings of SPIE - The International Societ...
458,Baker J.,Image accuracy and representational enhancemen...,1993,2,Oak Ridge National Laboratory,,Proceedings of SPIE - The International Societ...
