In [51]:
import pybliometrics.scopus as scopus
import numpy as np
import pandas as pd
import random
import time

In [52]:
year='2024'
query = f'title-abs-key("engineering" AND "research" AND "innovation") AND PUBYEAR = {year}'
labels = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [53]:
scopus.init()
papers = scopus.ScopusSearch(query, subscriber=True, verbose=True, view='COMPLETE')
print(papers.results[3])

Document(eid='2-s2.0-85204212250', doi='10.1016/j.indcrop.2024.119620', pii='S0926669024015978', pubmed_id=None, title='Bibliometric insights into palm oil mill effluent treatment by coagulation-flocculation: Research trends and future directions', subtype='re', subtypeDescription='Review', creator='Mohamed Noor M.H.', afid='60021005', affilname='Universiti Teknologi Malaysia', affiliation_city='Johor Bahru', affiliation_country='Malaysia', author_count='3', author_names='Mohamed Noor, Mohamed Hizam;Ngadi, Norzita;Ab Hamid, Nur Hafizah', author_ids='54991641100;36608934600;57204834044', author_afids='60021005;60021005;60021005', coverDate='2024-12-15', coverDisplayDate='15 December 2024', publicationName='Industrial Crops and Products', issn='09266690', source_id='32791', eIssn=None, aggregationType='Journal', volume='222', issueIdentifier=None, article_number='119620', pageRange=None, description="This analysis explores the research landscape on palm oil mill effluent (POME) treatment

In [54]:
df = pd.DataFrame(papers.results)

In [55]:
# สร้าง DataFrame เพื่อบันทึกเป็น CSV
data = []

for paper in papers.results:
    # Format the author names
    if hasattr(paper, 'author_names') and paper.author_names:
        # Split the author names by ';' and remove leading/trailing spaces
        authors = paper.author_names.split(';')
        authors = [author.strip() for author in authors]  # Strip extra spaces

        formatted_authors = []
        for author in authors:
            name_parts = author.split(',')
            if len(name_parts) == 2:  # If the name has both Lastname and Firstname
                last_name = name_parts[0].strip()
                first_name = name_parts[1].strip()
                # Only append the initial if there is a first name
                if first_name:
                    formatted_authors.append(f"{last_name} {first_name[0]}.")
                else:
                    formatted_authors.append(f"{last_name}.")  # In case of missing first name
            else:  # If the name is a single part (like just Lastname)
                formatted_authors.append(f"{name_parts[0].strip()}.")

        # Join formatted authors with "; "
        formatted_authors = "; ".join(formatted_authors)
    else:
        formatted_authors = ""

    paper_dict = {
        "Title": paper.title,
        "Abstract": paper.description if hasattr(paper, 'description') else "",
        "Author": formatted_authors,
        "Aggregation_Type": paper.aggregationType if hasattr(paper, 'aggregationType') else "",
        "Publisher": paper.publicationName if hasattr(paper, 'publicationName') else "",
        "Publication_Date": paper.coverDate if hasattr(paper, 'coverDate') else "",
        "Institutions": "; ".join(paper.affilname.split('; ')) if hasattr(paper, 'affilname') and paper.affilname else "",
        "Keywords": "; ".join(paper.authkeywords.split('|')) if hasattr(paper, 'authkeywords') and paper.authkeywords else "",
    }
    data.append(paper_dict)



df = pd.DataFrame(data)

output_file = f"../../ExtractedData/Scopus/scopus_papers_{year}.csv"
df.to_csv(output_file, index=False, encoding="utf-8")

In [56]:
df = pd.read_csv(output_file)

In [57]:
df.head(10)

Unnamed: 0,Title,Abstract,Author,Aggregation_Type,Publisher,Publication_Date,Institutions,Keywords
0,Embracing innovation and collaboration: A mess...,,Li M.,Journal,Cancer Letters,2024-12-28,University of Oklahoma College of Medicine,
1,Bio-inspiration unveiled: Dissecting nature's ...,Investigating nature's ingenious designs and s...,Ayali A.; Sonnenreich S.; El Pinchasik B.,Journal,iScience,2024-12-20,Tel Aviv University,Biological sciences ; Engineering ; Physics
2,Improving adsorption and purification performa...,Enhancing adsorption ability of photocatalyst ...,Yu X.; Liu M.; Xu H.; Xu J.; Yi J.,Journal,Environmental Pollution,2024-12-15,Wuhan Textile University,Adsorption ; Bi WO /BiOCl heterojunction 2 6 ...
3,Bibliometric insights into palm oil mill efflu...,This analysis explores the research landscape ...,Mohamed Noor M.; Ngadi N.; Ab Hamid N.,Journal,Industrial Crops and Products,2024-12-15,Universiti Teknologi Malaysia,Bibliometric ; Coagulation ; Flocculation ; ...
4,Graphene-encapsulated nanocomposites: Synthesi...,The discovery of graphene and its remarkable p...,Fu H.; Gray K.,Journal,Science of the Total Environment,2024-12-10,Robert R. McCormick School of Engineering and ...,3D graphene material ; Graphene-based nanocom...
5,Correction to: Comparative time-series analyse...,Correction to: Scientific Reportshttps://doi.o...,Chatthanathon P.; Leelahavanichkul A.; Cheibch...,Journal,Scientific Reports,2024-12-01,Chulalongkorn University;Thailand National Cen...,
6,Bio-inspired designs: leveraging biological br...,Nature’s evolutionary mastery has perfected de...,Fattepur G.; Patil A.; Kumar P.; Kumar A.; Heg...,Journal,3 Biotech,2024-12-01,Arka Jain University;Rayat Bahra University;Vi...,Aquatic adaptations ; Avian-inspired design ;...
7,Designing CITOBOT: A portable device for cervi...,Cervical cancer remains a leading cause of mor...,Arrivillaga M.; Bermúdez P.; García-Cifuentes ...,Journal,Computational and Structural Biotechnology Jou...,2024-12-01,Universidad Icesi;Pontificia Universidad Javer...,Artificial intelligence ; Cervical cancer scr...
8,On hydromagnetic two-phase gas-liquid flow in ...,"The study of two-phase flows, encompassing gas...",Fatima G.; Khan A.; Ellahi R.; Sait S.,Journal,Chinese Journal of Physics,2024-12-01,Marlan and Rosemary Bourns College of Engineer...,Air-Water ; Ciliary Channel ; MHD ; Perturb...
9,Cantilever configurations in vibration-based p...,Vibration-based energy harvesting technology i...,Sadaf A.; Ahmed R.; Ahmed H.,Journal,Smart Materials and Structures,2024-12-01,Georgia Southern University;University of Wisc...,cantilever beams ; energy harvesting ; piezo...
