In [1]:
import pybliometrics.scopus as scopus
import numpy as np
import pandas as pd
import random
import time

In [2]:
seed = 7 # seed for NMF topic model
num_topics = 12
query = 'title-abs-key("engineering" AND "research" AND "innovation") AND PUBYEAR = 2021'
labels = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
vis_seed = 6 # seed for t-SNE visualization
vis_angle = 135 # rotation angle for visualization

In [3]:
scopus.init()
papers = scopus.ScopusSearch(query, subscriber=True, verbose=True)
print(papers.results)



In [18]:
df = pd.DataFrame(papers.results)

In [4]:
# สร้าง DataFrame เพื่อบันทึกเป็น CSV
data = []

for paper in papers.results:
    # Format the author names
    if hasattr(paper, 'author_names') and paper.author_names:
        # Split the author names by ';' and remove leading/trailing spaces
        authors = paper.author_names.split(';')
        authors = [author.strip() for author in authors]  # Strip extra spaces

        formatted_authors = []
        for author in authors:
            name_parts = author.split(',')
            if len(name_parts) == 2:  # If the name has both Lastname and Firstname
                last_name = name_parts[0].strip()
                first_name = name_parts[1].strip()
                # Only append the initial if there is a first name
                if first_name:
                    formatted_authors.append(f"{last_name} {first_name[0]}.")
                else:
                    formatted_authors.append(f"{last_name}.")  # In case of missing first name
            else:  # If the name is a single part (like just Lastname)
                formatted_authors.append(f"{name_parts[0].strip()}.")

        # Join formatted authors with "; "
        formatted_authors = "; ".join(formatted_authors)
    else:
        formatted_authors = ""

    paper_dict = {
        "Title": paper.title,
        "Abstract": paper.description if hasattr(paper, 'description') else "",
        "Author": formatted_authors,
        "Aggregation_Type": paper.aggregationType if hasattr(paper, 'aggregationType') else "",
        "Publisher": paper.publicationName if hasattr(paper, 'publicationName') else "",
        "Publication_Date": paper.coverDate if hasattr(paper, 'coverDate') else "",
        "Institutions": "; ".join(paper.affilname.split('; ')) if hasattr(paper, 'affilname') and paper.affilname else "",
        "Keywords": "; ".join(paper.authkeywords.split('|')) if hasattr(paper, 'authkeywords') and paper.authkeywords else "",
    }
    data.append(paper_dict)



df = pd.DataFrame(data)

output_file = "scopus_papers_2021_full_details.csv"
df.to_csv(output_file, index=False, encoding="utf-8")