In [1]:
from elsapy.elsclient import ElsClient
from elsapy.elsprofile import ElsAuthor, ElsAffil
from elsapy.elsdoc import FullDoc, AbsDoc
from elsapy.elssearch import ElsSearch
from urllib.parse import urlencode, quote_plus

In [2]:
## Read API key
con_file = open("elsevier_key.txt")
api_key = con_file.read()
con_file.close()

In [3]:

st = {'':'"star wars"'}
urlencode(st, quote_via=quote_plus)

'=%22star+wars%22'

In [4]:

class Elsevier_Searcher():
    __base_url = u'https://api.elsevier.com/content/search/'
    __api_key = u'e7882eed968063f188d6c53a919528a7'
    
    def __init__(self, index: str, apikey: str = __api_key): 
        self.articles_found = []
        self.apikey = apikey
        self.els_client = ElsClient(self.apikey)
        self._uri = self.__base_url + index + '?'
            
    def search(self, queryterms: list=None,
                    start_year: int=None, end_year: int=None,
                    content_type: str='journals', start_record: int=None, 
                    sort_field: str=None, sort_order: int=None, 
                    max_records: int=None, article_title: str = None, author: list = None):
        """
        @param queryterms: list of lists. Terms within the same list are
            separated by an OR. Lists are separated by an AND
        @param search_type: meta_data or querytext. 
            meta_data: This field enables a free-text search of all 
                configured metadata fields and the abstract. Accepts 
                complex queries involving field names and  boolean 
                operators.
            querytext: This field enables a free-text search of all 
                configured metadata fields, abstract and document text. 
                Accepts complex queries involving field names and boolean 
                operators. 
        @param start_year: Start value of Publication Year to restrict results by.
        @param end_year: End value of Publication Year to restrict results by.
        @param content_type: Note: these are case sensitive and must be spelled as 
            presented here to get a result: Journals, Conference, Early Access,
            Standards, Books, Courses
        @param start_record: Sequence number of first record to fetch. Default: 1
        @param sort_field: Field name on which to sort. Choose from: article_number
            article_title, author, publication_title, publication_year
        @param sort_order: asc (for ascending sort) or desc (for descending sort)
        @param max_records: The number of records to fetch. Maximum: 200
        
        @return  the data fields returned by the search are described by the 
            following link https://developer.ieee.org/docs/read/Metadata_API_responses
        """
        
        if not queryterms: 
            queryterms = self.queryterms
        
        formated_query = "("
        for index_group, group in enumerate(queryterms):
            if index_group > 0: 
                formated_query += ' AND ('
    #         else: 
    # #             formated_query += '('
            for index_term, term in enumerate(group): 
                if index_term > 0: 
                    formated_query += ' OR '
                if ' ' in term: 
                    formated_query += '('
                formated_query += f'"{term}"'
                if ' ' in term: 
                    formated_query += ')'

                if (index_term + 1) == len(group):
                    formated_query += ')'
        if author:
            str_author = ' AND '.join([f'"{x}"' for x in author])
            formated_query += f' AND aut({str_author})'
        if article_title:
            formated_query += f' AND ttl({article_title})'
        
        query_params = dict()
        
        query_params['query'] = formated_query
#         query_params['view'] = 'STANDARD'
            
        if start_year: 
            date = start_year
            if end_year: 
                date += f'-{end_year}'
            query_params['date'] = date
        if content_type: 
            query_params['content'] = content_type
        if start_record: 
            query_params['start'] = start_record
        if sort_field: 
            url += '&sort_field=' + sort_field 
        if sort_order: 
            url += '&sort_order=' + sort_order
        if max_records: 
            query_params['count'] = max_records
        
        get_all = True
        
        url = self._uri + urlencode(query_params, quote_via=quote_plus)
        print(url)
        
        self._api_response = self.els_client.exec_request(url)
        self._tot_num_res = int(self._api_response['search-results']['opensearch:totalResults'])
        self.results = self._api_response['search-results']['entry']
        print("a request completed...")
        if get_all is True:
            while (len(self.results) < self._tot_num_res) and (len(self.results) < 5000):
                for e in self._api_response['search-results']['link']:
                    if e['@ref'] == 'next':
                        next_url = e['@href']
                self._api_response = self.els_client.exec_request(next_url)
                self.results += self._api_response['search-results']['entry'] 
                print("a request completed...")

        return self.results

In [50]:
# ScienceDirect
#searcher = Elsevier_Searcher(apikey=api_key, index='scidir')

# Scopus
searcher = Elsevier_Searcher(index='scopus')

In [51]:
technology_queryterms = [
    'machine learning', 'deep learning', 'artificial intelligence',
    'neural network'
]

health_queryterms = [
    'coronary artery disease', 'chest pain', 'heart disease', 'MACE',
    'Acute Cardiac Complications'
]


searcher.search([technology_queryterms, health_queryterms], start_year=2010)

https://api.elsevier.com/content/search/scopus?query=%28%28%22machine+learning%22%29+OR+%28%22deep+learning%22%29+OR+%28%22artificial+intelligence%22%29+OR+%28%22neural+network%22%29%29+AND+%28%28%22coronary+artery+disease%22%29+OR+%28%22chest+pain%22%29+OR+%28%22heart+disease%22%29+OR+%22MACE%22+OR+%28%22Acute+Cardiac+Complications%22%29%29&date=2010&content=journals
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...
a request completed...


KeyboardInterrupt: 

In [63]:
searcher.results[10]

{'@_fa': 'true',
 'affiliation': [{'@_fa': 'true',
   'affiliation-city': 'Kuching',
   'affiliation-country': 'Malaysia',
   'affilname': 'Swinburne University of Technology Sarawak Campus'}],
 'article-number': '5636274',
 'citedby-count': '4',
 'dc:creator': 'Loh B.',
 'dc:identifier': 'SCOPUS_ID:78650323248',
 'dc:title': 'Ontology-enhanced interactive anonymization in domain-driven data mining outsourcing',
 'eid': '2-s2.0-78650323248',
 'link': [{'@_fa': 'true',
   '@href': 'https://api.elsevier.com/content/abstract/scopus_id/78650323248',
   '@ref': 'self'},
  {'@_fa': 'true',
   '@href': 'https://api.elsevier.com/content/abstract/scopus_id/78650323248?field=author,affiliation',
   '@ref': 'author-affiliation'},
  {'@_fa': 'true',
   '@href': 'https://www.scopus.com/inward/record.uri?partnerID=HzOxMe3b&scp=78650323248&origin=inward',
   '@ref': 'scopus'},
  {'@_fa': 'true',
   '@href': 'https://www.scopus.com/inward/citedby.uri?partnerID=HzOxMe3b&scp=78650323248&origin=inward',


In [54]:
from functools import reduce
from datetime import date

In [55]:
for item in searcher.results:
    
    try:
        authors = item.get('authors').get('author')

        authors = reduce((lambda x, y: f"{x}; {y['given-name']} {y['surname']}"), authors[1:], f"{authors[0]['given-name']} {authors[0]['surname']}")
    except:
        authors = None
    print(authors)
    print(datetime.strptime(item['prism:coverDate'][0]['$'], '%Y-%m-%d'))

None


TypeError: string indices must be integers

In [16]:
from abc import ABC, abstractmethod


class AbstractSerializer(ABC):
    @abstractmethod
    def parse(self, data):
        pass


class ElsevierSerializer(AbstractSerializer):
    def parse(self, data):
        total_list = list()
        if type(data)==list:
            for item in data:
                total_list.append(self.parse_item(item))
        else:
            total_list.append(self.parse_item(item))
        return total_list
    
    def parse_item(self, item):
        documento = dict()
        documento['titulo'] = item['dc:title']
        try:
            documento['autores'] = item['dc:creator']
        except:
            print(item)
        documento['doi'] = item['prism:doi']
        documento['revista'] = item['prism:publicationName']
        documento['html_url'] = item['prism:url']
#         documento['citado_papers'] = item['citedby-count']
#         documento['data'] = item['prism:coverDate']
        return documento

In [18]:
serializer = ElsevierSerializer()
filtered = serializer.parse(searcher.results)

{'@_fa': 'true', 'link': [{'@_fa': 'true', '@ref': 'self', '@href': 'https://api.elsevier.com/content/article/pii/S0165011410003325'}, {'@_fa': 'true', '@ref': 'scidir', '@href': 'https://www.sciencedirect.com/science/article/pii/S0165011410003325'}], 'dc:identifier': 'DOI:10.1016/j.fss.2010.08.009', 'eid': '1-s2.0-S0165011410003325', 'prism:url': 'https://api.elsevier.com/content/article/pii/S0165011410003325', 'dc:title': 'Recent Literature', 'prism:publicationName': 'Fuzzy Sets and Systems', 'prism:issueName': 'Theme: Algebraic Aspects of Fuzzy Sets', 'prism:issn': '01650114', 'prism:volume': '161', 'prism:issueIdentifier': '24', 'prism:coverDate': [{'@_fa': 'true', '$': '2010-12-16'}], 'prism:coverDisplayDate': '16 December 2010', 'prism:startingPage': '3174', 'prism:endingPage': '3181', 'prism:doi': '10.1016/j.fss.2010.08.009', 'openaccess': '0', 'openaccessArticle': False, 'openArchiveArticle': False, 'openaccessUserLicense': None, 'pii': 'S0165-0114(10)00332-5', 'prism:teaser': 

In [21]:
[x for x in filtered if x['titulo']=='Award pages']

[{'doi': '10.1016/S0302-2838(10)00975-9',
  'html_url': 'https://api.elsevier.com/content/article/pii/S0302283810009759',
  'revista': 'European Urology',
  'titulo': 'Award pages'},
 {'doi': '10.1016/S0302-2838(10)00807-9',
  'html_url': 'https://api.elsevier.com/content/article/pii/S0302283810008079',
  'revista': 'European Urology',
  'titulo': 'Award pages'},
 {'doi': '10.1016/S0302-2838(10)00625-1',
  'html_url': 'https://api.elsevier.com/content/article/pii/S0302283810006251',
  'revista': 'European Urology',
  'titulo': 'Award pages'},
 {'doi': '10.1016/S0302-2838(10)00498-7',
  'html_url': 'https://api.elsevier.com/content/article/pii/S0302283810004987',
  'revista': 'European Urology',
  'titulo': 'Award pages'}]