In [56]:
import requests, csv
import pandas as pd
from datetime import date

In [None]:
from find_qid import find_qid_by_orcid
from find_qid import _api_get

In [None]:
def fetch_orcid_sections(orcid_id: str) -> dict:
    headers = {"Accept": "application/json"}
    base_url = f"https://pub.orcid.org/v3.0/{orcid_id}"

    # def fetch_employment():
    #     url = f"{base_url}/employments"
    #     resp = requests.get(url, headers=headers)
    #     out = []
    #     if resp.ok:
    #         for group in resp.json().get("affiliation-group", []):
    #             for s in group.get("summaries", []):
    #                 emp = s.get("employment-summary")
    #                 if emp:
    #                     out.append(emp)
    #     return out[:5]

    def fetch_education():
        url = f"{base_url}/educations"
        resp = requests.get(url, headers=headers)
        out = []
        if resp.ok:
            for group in resp.json().get("affiliation-group", []):
                for s in group.get("summaries", []):
                    edu = s.get("education-summary")
                    if edu:
                        out.append(edu)
        return out

    def fetch_works():
        url = f"{base_url}/works"
        resp = requests.get(url, headers=headers)
        out = []
        if resp.ok:
            for group in resp.json().get("group", []):
                work_summary = group.get("work-summary", [])
                if work_summary:
                    out.append(work_summary[0])  # only the first (representative) version
        return out[:5]

    def fetch_peer_reviews():
        url = f"{base_url}/peer-reviews"
        resp = requests.get(url, headers=headers)
        out = []
        if resp.ok:
            for group in resp.json().get("group", []):
                for subgroup in group.get("peer-review-group", []):
                    for summary in subgroup.get("peer-review-summary", []):
                        out.append(summary)
        return out

    return {
        # "Employment": fetch_employment(),
        "Education and qualification": fetch_education(),
        "Work": fetch_works(),
        "Peer Reviews": fetch_peer_reviews(),
    }


In [102]:
def sort_by_completion_year(entries, reverse=True):
    def extract_year(entry):
        return int(entry.get("completion-date", {}).get("year", {}).get("value") or 9999)
    return sorted(entries, key=extract_year, reverse=reverse)

In [None]:
# Test call
# peer_list = sort_by_completion_year(data.get("Peer Reviews", []))
# review = peer_list[0] if peer_list else None

# print(review)

In [None]:
csv_input_path = "../outputs/orcid_only.csv"
df = pd.read_csv(csv_input_path).dropna()
# df = df.head(15)
for _, r in df.iterrows():
    # name = str(r["Name"]).strip()
    orcid = str(r["orcid"]).strip() if pd.notna(r["orcid"]) else ""
    print(f"Processing {orcid}")
    qid = find_qid_by_orcid(orcid)


Processing 0000-0001-9124-5203
Processing 0009-0009-8131-7627
Processing 0000-0003-0161-0559
Processing 0000-0003-1655-0931
Processing 0000-0002-3757-0037
Processing 0000-0001-8999-4395
Processing 0009-0007-0313-2593
Processing 0000-0001-6513-5350
Processing 0009-0005-5325-8309
Processing 0009-0000-1799-5268
Processing 0009-0005-9811-5862
Processing 0000-0003-0616-5191
Processing 0000-0001-5130-546X
Processing 0009-0004-4382-4760
Processing 0009-0008-0887-0015
Processing 0009-0007-3729-1533
Processing 0000-0003-4328-9193
Processing 0009-0006-8111-1723
Processing 0009-0008-2472-5061
Processing 0009-0004-2517-0791
Processing 0000-0002-9316-8982
Processing 0009-0001-7836-1274


In [None]:
def export_orcid_qs(data_dict: dict, output_path: str, limits: dict):
    today = date.today().isoformat()
    today_wd = f'+{today}T00:00:00Z/11'

    with open(output_path, mode='w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f, delimiter='|', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(['ID', 'P', 'Value', 'Qualifier_P', 'Qualifier_V', 'S854', 'S813'])

        for orcid_id, sections in data_dict.items():
            source_url = f"https://orcid.org/{orcid_id}"

            # EMPLOYMENT → P108
            # for emp in sections.get("Employment", [])[:limits.get("Employment", 1)]:
            #     if not isinstance(emp, dict):
            #         continue
            #     org = emp.get('organization', {}).get('name')
            #     start = (emp.get('start-date') or {}).get('year', {}).get('value') or {}
            #     start_fmt = f'+{start}-00-00T00:00:00Z/9' if start else ''
            #     if org:
            #         row = ['CREATE', 'P108', org]
            #         row += ['P580', start_fmt] if start_fmt else ['', '']
            #         row += ['S854', source_url, 'S813', today_wd]
            #         writer.writerow(row)

            # EDUCATION → P69
            for edu in sections.get("Education and qualification", [])[:limits.get("Education", 0)]:
                if not isinstance(edu, dict):
                    continue

                inst = edu.get('organization', {}).get('name')
                start = (edu.get('start-date') or {}).get('year', {}).get('value')
                start_fmt = f'+{start}-00-00T00:00:00Z/9' if start else ''
                if inst:
                    row = ['CREATE', 'P69', inst]
                    row += ['P580', start_fmt] if start_fmt else ['', '']
                    row += ['S854', source_url, 'S813', today_wd]
                    writer.writerow(row)

            # WORK → P800
            for work in sections.get("Work", [])[:limits.get("Work", 0)]:
                if not isinstance(work, dict):
                    continue

                title = work.get("title", {}).get("title", {}).get("value")
                if title:
                    row = ['CREATE', 'P800', title, '', '', 'S854', source_url, 'S813', today_wd]
                    writer.writerow(row)

            # PEER REVIEW → P4032
            peer_list = sort_by_completion_year(sections.get("Peer Reviews", []))[:limits.get("Peer", 0)]
            for review in peer_list:
                if not isinstance(review, dict):
                    continue

                org = review.get("convening-organization", {}).get("name")
                issn = review.get("review-group-id", "")

                if issn.startswith("issn:"):
                    issn = issn.replace("issn:", "")
                else:
                    issn = ""

                if org:
                    row = ['CREATE', 'P4032', org]
                    if issn:
                        row += ['P236', issn]
                    else:
                        row += ['', '']
                    row += ['S854', source_url, 'S813', today_wd]
                    writer.writerow(row)


In [None]:
# Test call
# orcid_id = "0000-0002-1481-2996"
# data = fetch_orcid_sections(orcid_id)

# print(data)

In [110]:
orcid_ids = df["orcid"] #["0000-0002-1481-2996", "0000-0002-9421-8582"]
orcid_data = {oid: fetch_orcid_sections(oid) for oid in orcid_ids}

limits = {
    #"Employment": 1,
    "Education": 1,
    "Work": 1,
    "Peer": 1
}
# print(orcid_data)
export_orcid_qs(orcid_data, "../outputs/qs_further_items_output.csv", limits)