In [None]:
from elsapy.elsclient import ElsClient
from elsapy.elsprofile import ElsAuthor, ElsAffil
from elsapy.elsdoc import FullDoc, AbsDoc
from elsapy.elssearch import ElsSearch
from elsapy.utils import recast_df
import pandas as pd
import json

## Load configuration
con_file = open("config.json")
config = json.load(con_file)
con_file.close()

## Initialize client
client = ElsClient(config['apikey'])

srch_line = 'fatigue AND life AND polymer AND composite AND water OR immerse OR wet'

abs_keywords = False

count_per_page = 200 # limitation: <=200

In [None]:
# See https://github.com/ElsevierDev/elsapy/blob/master/elsapy/elssearch.py for the original execute function

from urllib.parse import quote_plus as url_encode

class myElsSearch(ElsSearch):
    def __init__(self, query, index, count = 25):
        super(myElsSearch, self).__init__(query, index)
        self._uri = self._base_url + self.index + '?count='+str(count)+'&query=' + url_encode(
                self.query)

    def execute(self, els_client=None, get_all=False):
        api_response = els_client.exec_request(self._uri)
        self._tot_num_res = int(api_response['search-results']['opensearch:totalResults'])
        self._results = api_response['search-results']['entry']
        if get_all is True:
            while (self.num_res < self.tot_num_res) and not self._upper_limit_reached():
                for e in api_response['search-results']['link']:
                    if e['@ref'] == 'next':
                        next_url = e['@href']
                api_response = els_client.exec_request(next_url)
                self._results += api_response['search-results']['entry']
                print(f'{self.num_res}/{self.tot_num_res}, {next_url}')
        self.results_df = recast_df(pd.DataFrame(self._results))

srch = myElsSearch('TITLE-ABS-KEY('+srch_line+')','scopus',count=count_per_page)
srch.execute(client, get_all=True)

print('Find', srch.num_res, 'results.')

df = pd.DataFrame(columns = ['doi', 'title', 'publication_name', 'url'])

for idx,item in enumerate(srch.results):
    try:
        if abs_keywords:
            doc = AbsDoc(scp_id=item['dc:identifier'].split(':')[1])
            doc.read(client)
            abstract = doc.data['item']['bibrecord']['head']['abstracts']
            keywords = [x['$'] for x in doc.data['authkeywords']['author-keyword']]
            keywords_line = ', '
            keywords_line = keywords_line.join(keywords)
            item_df = pd.DataFrame({
                'doi': item['prism:doi'],
                'title': item['dc:title'],
                'publication_name': item['prism:publicationName'],
                'abstract': abstract,
                'keywords': keywords_line,
                'url': '=HYPERLINK(\"https://doi.org/'+item['prism:doi']+'\",\"DOI Link\")'
            }, index=[0])

        else:
            item_df = pd.DataFrame({
                'doi': item['prism:doi'],
                'title': item['dc:title'],
                'publication_name': item['prism:publicationName'],
                'url': '=HYPERLINK(\"https://doi.org/'+item['prism:doi']+'\",\"DOI Link\")'
            }, index=[0])

        df = pd.concat([df,item_df], ignore_index=True)

    except:
        pass

    if (idx+1) % count_per_page==0 and abs_keywords:
        print(f'Items finished: {idx+1}/{len(srch.results)}')

df.reset_index()
df.to_excel('../data/'+srch_line+'.xlsx')