## Combination of OpenAlex and Crossref to get URL links for all 1499 papers.


In [None]:
import pandas as pd
import requests
import time

# === Step 1: Load your list of DOIs ===
# Replace this with your own CSV path and column name
input_file = "datasets\Q1_eng_compsci_econ.xlsx"
doi_column = "DOI"  # Adjust if your column is named differently

df = pd.read_excel(input_file)
dois = df[doi_column].dropna().unique().tolist()

# === Step 2: Enrich with OpenAlex and Crossref ===
results = []

for doi in dois:
    result = {"DOI": doi}

    # Try OpenAlex first
    openalex_url = f"https://api.openalex.org/works/https://doi.org/{doi}"
    try:
        r = requests.get(openalex_url)
        if r.status_code == 200:
            data = r.json()
            result.update({
                "Title": data.get("title"),
                "Open Access URL": data.get("open_access", {}).get("oa_url"),
                "Open Access Status": data.get("open_access", {}).get("oa_status"),
                "Journal": data.get("host_venue", {}).get("display_name"),
                "Cited By": data.get("cited_by_count"),
                "Year": data.get("publication_year")
            })
    except Exception as e:
        result["OpenAlex Error"] = str(e)

    # If no OpenAlex URL found, fallback to Crossref
    if not result.get("Open Access URL"):
        crossref_url = f"https://api.crossref.org/works/{doi}"
        try:
            res = requests.get(crossref_url)
            if res.status_code == 200:
                crossref_data = res.json()
                result["Open Access URL"] = crossref_data.get("message", {}).get("URL")
        except Exception as e:
            result["Crossref Error"] = str(e)

    results.append(result)
    time.sleep(1)  # Be kind to the API
    if len(results) % 10 == 0:
        print(f"Processed {len(results)} DOIs...")  

# === Step 3: Save to CSV ===
output_file = "datasets\open_access_enriched.csv"
pd.DataFrame(results).to_csv(output_file, index=False)
print(f"✅ Done! Output saved to {output_file}")


  input_file = "datasets\Q1_eng_compsci_econ.xlsx"


✅ Done! Output saved to open_access_enriched.csv
