In [1]:
import os
import json
import pandas as pd

In [2]:
base_dir = "scopus_cursor_by_year"
target_years = {"2021", "2022"}

def extract_entry_fields(entry):
    title = entry.get("dc:title", "")
    abstract = entry.get("dc:description", "")
    scopus_id = entry.get("dc:identifier", "")
    eid = entry.get("eid", "")
    doi = entry.get("prism:doi", "")

    authors = entry.get("author", [])
    author_names = []
    for author in authors:
        given = author.get("given-name", "")
        surname = author.get("surname", "")
        if given or surname:
            author_names.append(f"{given} {surname}".strip())
    authors_str = ", ".join(author_names)

    return {
        "Title": title,
        "Abstract": abstract,
        "Authors": authors_str,
        "Scopus ID": scopus_id,
        "EID": eid,
        "DOI": doi
    }

# Loop only over the selected years
for year in target_years:
    year_path = os.path.join(base_dir, year)
    if os.path.isdir(year_path):
        all_entries = []
        for file_name in os.listdir(year_path):
            if file_name.endswith(".json"):
                file_path = os.path.join(year_path, file_name)
                with open(file_path, 'r', encoding='utf-8') as f:
                    try:
                        data = json.load(f)
                        entries = data.get("search-results", {}).get("entry", [])
                        for entry in entries:
                            all_entries.append(extract_entry_fields(entry))
                    except Exception as e:
                        print(f"Error reading {file_path}: {e}")

        # Save to one CSV per year
        if all_entries:
            df = pd.DataFrame(all_entries)
            df.to_csv(f"D:/GS/final_scoping_review/final_papers/scopus_final/excel_sheets/{year}.csv", index=False)
            print(f"✅ Saved: {year}.csv")
        else:
            print(f"⚠️ No entries found for {year}")
    else:
        print(f"❌ Folder for year {year} does not exist")


✅ Saved: 2021.csv
✅ Saved: 2022.csv
