In [1]:

import os
import requests
import json
import time
import urllib.parse

In [2]:
API_KEY = "04ba27c177017665d0d52733e44f680b"
YEARS = range(2021,2023)
COUNT = 25
BASE_SAVE_DIR = "scopus_cursor_by_year"

BASE_QUERY = 'TITLE-ABS-KEY%28%28%22smart%20city%22%20OR%20%22smart%20cities%22%20OR%20%22sustainable%20city%22%20OR%20%22sustainable%20cities%22%20OR%20%22digital%20city%22%20OR%20%22digital%20cities%22%20OR%20urban%2A%20OR%20infrastructure%2A%20OR%20technology%2A%29%20AND%20%28health%2A%20OR%20%22well-being%22%20OR%20safety%20OR%20security%2A%29%29%20AND%20SRCTYPE%28j%29%20AND%20LANGUAGE%28english%29%20AND%20DOCTYPE%28ar%29'

headers = {
    "X-ELS-APIKey": API_KEY,
    "Accept": "application/json"
}

for YEAR in YEARS:
    print(f"\n📅 Starting fetch for year {YEAR}")
    SAVE_DIR = os.path.join(BASE_SAVE_DIR, f"{YEAR}")
    os.makedirs(SAVE_DIR, exist_ok=True)

    cursor_count = 0
    url = f"https://api.elsevier.com/content/search/scopus?query={BASE_QUERY}&view=COMPLETE&date={YEAR}&cursor=*&count={COUNT}"

    while url:
        print(f"  ⏳ Fetching page {cursor_count + 1} for {YEAR}...")
        response = requests.get(url, headers=headers)

        if response.status_code != 200:
            print(f"❌ Error: {response.status_code} - {response.text}")
            break

        data = response.json()

        # Check for empty entries
        entries = data.get("search-results", {}).get("entry", [])
        if not entries:
            print("✅ No more results (empty entry list). Stopping.")
            break

        # Save the page
        output_file = os.path.join(SAVE_DIR, f"scopus_cursor_{YEAR}_page_{cursor_count + 1}.json")
        with open(output_file, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
        print(f"  ✅ Saved page {cursor_count + 1} to {output_file}")

        # Get next cursor
        next_cursor = data.get("search-results", {}).get("cursor", {}).get("@next")
        if not next_cursor:
            print("✅ No more pages (no next cursor).")
            break

        encoded_cursor = urllib.parse.quote(next_cursor)
        url = f"https://api.elsevier.com/content/search/scopus?query={BASE_QUERY}&view=COMPLETE&date={YEAR}&cursor={encoded_cursor}&count={COUNT}"
        cursor_count += 1
        time.sleep(1)  # Rate limiting

    print(f"🎉 Finished fetching results for year {YEAR}")

print("\n🚀 Done fetching all years!")



📅 Starting fetch for year 2021
  ⏳ Fetching page 1 for 2021...
  ✅ Saved page 1 to scopus_cursor_by_year\2021\scopus_cursor_2021_page_1.json
  ⏳ Fetching page 2 for 2021...
  ✅ Saved page 2 to scopus_cursor_by_year\2021\scopus_cursor_2021_page_2.json
  ⏳ Fetching page 3 for 2021...
  ✅ Saved page 3 to scopus_cursor_by_year\2021\scopus_cursor_2021_page_3.json
  ⏳ Fetching page 4 for 2021...
  ✅ Saved page 4 to scopus_cursor_by_year\2021\scopus_cursor_2021_page_4.json
  ⏳ Fetching page 5 for 2021...
  ✅ Saved page 5 to scopus_cursor_by_year\2021\scopus_cursor_2021_page_5.json
  ⏳ Fetching page 6 for 2021...
  ✅ Saved page 6 to scopus_cursor_by_year\2021\scopus_cursor_2021_page_6.json
  ⏳ Fetching page 7 for 2021...
  ✅ Saved page 7 to scopus_cursor_by_year\2021\scopus_cursor_2021_page_7.json
  ⏳ Fetching page 8 for 2021...
  ✅ Saved page 8 to scopus_cursor_by_year\2021\scopus_cursor_2021_page_8.json
  ⏳ Fetching page 9 for 2021...
  ✅ Saved page 9 to scopus_cursor_by_year\2021\scopus_cu