In [6]:
import requests
import time
import os
import json
from dotenv import load_dotenv

load_dotenv()

API_KEY = os.getenv('SCOPUS_API_KEY')
header = {
    'X-ELS-APIKey': API_KEY,
    'Accept': 'application/json'
}
# print(API_KEY)

In [7]:
def api_search(year = 2000, offset = 0, count = 200):
    URI = 'https://api.elsevier.com/content/search/scopus'
    
    AFF_NAME = 'Chulalongkorn'

    res = requests.get(
        url=URI,
        headers=header,
        params={
            'start': offset,
            'count': count,
            'query': f'AFFIL({AFF_NAME}) AND PUBYEAR = {year}',
            'apiKey': API_KEY,
        }
    )  
    time.sleep(3)
    if not res.ok:
        print(res.content)
        return None
    
    try:
        return res.json()['search-results']['entry']
    except:
        print(res.content)
        return None

In [11]:
def api_abstracts_retrieve(eid):
    URI = f'https://api.elsevier.com/content/abstract/eid/{eid}'

    res = requests.get(
        url=URI,
        headers=header,
        params={
            # 'view': 'FULL',
            'apiKey': API_KEY,
        }
    )
    time.sleep(3)

    if not res.ok:
        print(res.content)
        return None
    try:
        data = json.loads(res.text)
        return data
    except:
        print(res.content)
        return None
    
def write_json(path, data):
    directory = os.path.dirname(path)
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    with open(path, 'w') as json_file:
        json.dump(data, json_file, indent=4)

In [12]:
first_year = 2000
last_year = 2007
each_year = 3000
each_chunk = 10

start_time = time.time()
total = 0

years = [x for x in range(first_year, last_year+1, 1)]
for year in years:

    root_path = f'../../Raw_Extra/{year}'
    for offset in range(0, each_year, each_chunk):
        
        elapsed_time = time.time() - start_time
        print(f"[{elapsed_time:.2f}] Searching with year:{year}, offset:{offset}, count:{each_chunk}")
        search_result = api_search(year=year, offset=offset, count=each_chunk)
        if search_result == None: 
            elapsed_time = time.time() - start_time
            print(f"[{elapsed_time:.2f}] Search with year:{year}, offset:{offset}, count:{each_chunk} not found")
            break

        for paper in search_result:

            eid = paper['eid']
            abs_response = api_abstracts_retrieve(eid)
            if abs_response == None: 
                elapsed_time = time.time() - start_time
                print(f"[{elapsed_time:.2f}] Paper with eid:{eid} not found")
                continue
            
            path = root_path + f'/{eid}.json'
            write_json(path=path, data=abs_response)
        
        total += len(search_result)
        elapsed_time = time.time() - start_time
        print(f"[{elapsed_time:.2f}] loaded {total} papers")

[0.00] Searching with year:2000, offset:0, count:10
2-s2.0-0034561311
2-s2.0-0033637489
2-s2.0-0033637210


KeyboardInterrupt: 