In [15]:
import re

def extract_cited(text):
    match = re.search(r"(\d+)\s*cited", text.lower())
    return int(match.group(1)) if match else None


In [16]:
def extract_doi(text, link=None):
    doi_pattern = r"(10\.\d{4,9}/[-._;()/:A-Z0-9]+)"
    if text:
        match = re.search(doi_pattern, text, re.I)
        if match:
            return match.group(1)
    if link and "doi.org" in link:
        return link.split("doi.org/")[-1]
    return None


In [17]:
def scrape_author(author_id):
    all_data = []

    for view, pub_type in VIEW_TYPE_MAP.items():
        url = f"{BASE_URL}/{author_id}/?view={view}"
        soup = BeautifulSoup(requests.get(url, headers=HEADERS).text, "html.parser")

        for item in soup.select("div.ar-list-item"):
            title_el = item.select_one("div.ar-title a")
            title = title_el.get_text(strip=True) if title_el else None
            link = title_el["href"] if title_el else None

            full_text = item.get_text(" ", strip=True)
            doi = extract_doi(full_text, link)

            cited = extract_cited(full_text)

            quartile = item.select_one("a.ar-quartile")
            quartile = quartile.get_text(strip=True) if quartile else None

            detail = None

            # ===== LOGIKA KHUSUS PER TYPE =====

            if pub_type == "research":
                detail = quartile     # pindahkan ke detail
                quartile = None

            if pub_type == "community_service":
                detail = quartile     # pindahkan ke detail
                quartile = None

            if pub_type == "article_garuda":
                # pastikan cited hanya angka
                cited = cited
                # DOI tetap terpisah
                doi = doi

            meta_links = [a.get_text(strip=True) for a in item.select("div.ar-meta a")]

            data = {
                "author_id": author_id,
                "type": pub_type,
                "title": title,
                "detail": detail,
                "doi": doi,
                "year": item.select_one("a.ar-year").get_text(strip=True) if item.select_one("a.ar-year") else None,
                "quartile": quartile,
                "publisher": item.select_one("a.ar-pub").get_text(strip=True) if item.select_one("a.ar-pub") else None,
                "cited": cited,
                "author_order": next((t for t in meta_links if "Author Order" in t), None),
                "creator": next((t for t in meta_links if "Creator" in t), None),
                "source_link": link
            }

            all_data.append(data)

    return all_data


In [None]:
import pandas as pd

data = scrape_author(258671)
df = pd.DataFrame(data)
print(df.head())


In [19]:
df

Unnamed: 0,author_id,type,title,detail,doi,year,quartile,publisher,cited,author_order,creator,source_link
0,258671,article_scopus,Virtual reality photography tour 360 degree pa...,,,2025,Q4 as Conference Proceedin,Aip Conference Proceedings,0.0,Author Order : 1 of 2,Creator : Agustia R.D.,https://www.scopus.com/record/display.uri?eid=...
1,258671,article_scopus,Utilization of Context Aware System: Enhancing...,,,2024,no-Q as Conference Proceedin,Incitest 2024 Proceedings of the 7th Internati...,0.0,Author Order : 4 of 4,Creator : Maulaa H.,https://www.scopus.com/record/display.uri?eid=...
2,258671,article_scopus,Augmented Reality Based Indoor Navigation Syst...,,,2024,no-Q as Conference Proceedin,Incitest 2024 Proceedings of the 7th Internati...,0.0,Author Order : 1 of 3,Creator : Agustia R.D.,https://www.scopus.com/record/display.uri?eid=...
3,258671,article_scopus,An Augmented Reality-Based Application for Foo...,,,2023,no-Q as Conference Proceedin,Incitest 2023 Proceedings of the 2023 Internat...,1.0,Author Order : 1 of 2,Creator : Agustia R.,https://www.scopus.com/record/display.uri?eid=...
4,258671,article_scopus,An augmented reality-based lipstick color reco...,,,2023,Q4 as Conference Proceedin,Aip Conference Proceedings,1.0,Author Order : 1 of 2,Creator : Agustia R.D.,https://www.scopus.com/record/display.uri?eid=...
5,258671,article_scopus,Relay Streaming System Model to Social Media P...,,,2020,Q4 as Conference Proceedin,Iop Conference Series Materials Science and En...,0.0,Author Order : 1 of 2,Creator : Agustia R.D.,https://www.scopus.com/record/display.uri?eid=...
6,258671,article_scopus,Implementation of Web Assembly Technology as V...,,,2019,Q3 as Conference Proceedin,Iop Conference Series Materials Science and En...,1.0,Author Order : 1 of 2,Creator : Agustia R.D.,https://www.scopus.com/record/display.uri?eid=...
7,258671,article_scopus,"Implementation of Visual, Auditory, Kineshthet...",,,2018,Q3 as Conference Proceedin,Iop Conference Series Materials Science and En...,3.0,Author Order : 1 of 2,Creator : Agustia R.D.,https://www.scopus.com/record/display.uri?eid=...
8,258671,article_garuda,Pengelompokan Mahasiswa Berdasarkan Capaian Pe...,,10.34010/komputa.v14i2.17932,2025,Accred : Unknown,Komputa : Jurnal Ilmiah Komputer dan Informati...,,Author Order : 3 of 4,,https://garuda.kemdiktisaintek.go.id/documents...
9,258671,article_garuda,Pemberdayaan Masyarakat Desa Rancabango melalu...,,10.34010/petik.v5i2.18254,2025,Accred : Unknown,Jurnal Pengabdian Teknik dan Ilmu Komputer (Pe...,,Author Order : 1 of 5,,https://garuda.kemdiktisaintek.go.id/documents...
