# Pubmed

In [30]:
#!pip install pymed

In [31]:
from pymed import PubMed

In [32]:
pubmed = PubMed(tool="MyTool", email="my@email.address")

In [33]:
q_pubmed = '( "graph" OR "community detection " OR "group" OR "cluster") AND ( "relevance" OR "redundancy" ) AND ( "feature selection" OR "dimensionality reduction" ) AND ( "biological" OR "Gene expression" OR "omics" )'

q_pubmed

'( "graph" OR "community detection " OR "group" OR "cluster") AND ( "relevance" OR "redundancy" ) AND ( "feature selection" OR "dimensionality reduction" ) AND ( "biological" OR "Gene expression" OR "omics" )'

In [34]:
results = list(pubmed.query(q_pubmed, max_results=500))
len(results)

116

In [35]:
pubmed_results = []

for article in results:

    # Extract and format information from the article
    article_id = article.pubmed_id
    title = article.title
    if article.keywords:
        if None in article.keywords:
            article.keywords.remove(None)
        keywords = '", "'.join(article.keywords)
    publication_date = article.publication_date
    abstract = article.abstract

    pubmed_results.append(
        {
            "ID":article_id, 
            "Title":title, 
            "Abstract":abstract, 
            "Keywords":keywords, 
            "Publication date":publication_date
        }
    )


    # Show information about the article
    #print(
    #    f'{publication_date} - {title}\nKeywords: "{keywords}"\n'
    #)

print(len(pubmed_results))

116


In [36]:
import pandas as pd

pubmed_results = pd.DataFrame(pubmed_results)
pubmed_results.head()

Unnamed: 0,ID,Title,Abstract,Keywords,Publication date
0,41568381\n21965334\n25114853\n30591591\n399792...,Integrative multi-omics stratification and tra...,Immunosuppressive breast cancer subtypes drive...,"breast cancer"", ""immunosuppressive tumormicroe...",2026-01-22
1,41518988,Knowledge graph integration of clustered medic...,This study builds on the premise that phytoche...,"AI-based therapeutic mapping"", ""Chemical space...",2026-01-11
2,41387731\n36137835\n36416120\n38216661\n373753...,Feature learning augmented with sampling and h...,"Big biological datasets, such as gene expressi...","AI-based therapeutic mapping"", ""Chemical space...",2025-12-13
3,41331264\n16963055\n38264914\n32593128\n381857...,Machine learning-based prediction of drug resp...,Myocardial ischemia is a major global contribu...,"Anti-inflammatory therapy"", ""Cardioprotection""...",2025-12-03
4,41301201\n33974890\n30785707\n35779479\n348391...,A Multi-Task Ensemble Strategy for Gene Select...,Gene expression-based tumor classification aim...,"gene expression-based tumor classification"", ""...",2025-11-27


In [37]:
pubmed_results.to_csv("PubmedArticles.csv")

# Scopus

In [38]:
#!pip install pybliometrics

In [39]:
import pybliometrics
from pybliometrics.scopus import ScopusSearch

In [40]:
import os
os.environ['PYBLIOMETRICS_API_KEY'] = 'ab528db237c9103c9e4cee942f602c28' # Substitua 'YOUR_API_KEY' pela sua chave de API Scopus real.
pybliometrics.scopus.init()

In [41]:
q_scopus = 'TITLE-ABS-KEY ( ( "graph" OR "community detection " OR "group" OR "cluster") AND ( "relevance" OR "redundancy" ) AND ( "feature selection" OR "dimensionality reduction" ) AND ( "biological" OR "Gene expression" OR "omics" ) )'

In [42]:
s = ScopusSearch(q_scopus, verbose=True)

print(len(s.results))

303


In [43]:
import pandas as pd

scopus_results = pd.DataFrame(s.results)

scopus_results.head()

Unnamed: 0,eid,doi,pii,pubmed_id,title,subtype,subtypeDescription,creator,afid,affilname,...,pageRange,description,authkeywords,citedby_count,openaccess,freetoread,freetoreadLabel,fund_acr,fund_no,fund_sponsor
0,2-s2.0-105027408346,10.1007/s12559-025-10544-9,,,Leveraging Segmentation and Visibility Graph A...,ar,Article,Mohammady F.,60003666;60033391,University of Mazandaran;Shahrood University o...,...,,Motor Imagery Brain-Computer Interfaces (MI-BC...,Brain computer interface | Electroencephalogra...,0,1,all publisherhybridgold,All Open Access Hybrid Gold,,,
1,2-s2.0-105027299893,10.1007/s10238-025-01866-x,,41348242.0,Integrating multi-omics data to resolve patter...,ar,Article,Xing J.,60069744;60073448,General Hospital of People's Liberation Army;P...,...,,Skin cutaneous melanoma (SKCM) represents a hi...,Ion channel genes | Melanoma | Multi-omics | P...,0,1,all publisherhybridgold repository repositoryam,All Open Access Hybrid Gold Green,,,
2,2-s2.0-105022078059,10.1007/s10238-025-01894-7,,41249578.0,Single-cell transcriptomics identifies SOCS3+ ...,ar,Article,Jiang X.,60025234;133403086,Naval Medical University;Yingtan Municipal Peo...,...,,"Clear cell renal cell carcinoma (ccRCC), a pre...",Biomarker | Exhausted T cell | Renal cell carc...,0,1,all publisherhybridgold repository repositoryam,All Open Access Hybrid Gold Green,,,
3,2-s2.0-105027426803,10.1016/j.gep.2026.119406,S1567133X26000013,41520694.0,Exploring NPM1P51 as a key biomarker for glauc...,ar,Article,Vaishnavi,60076774;133643050,Amity University;BioinfoCore Solutions (OPC) P...,...,,Glaucoma is a group of optic neuropathic condi...,Artificial neural network (ANN) and biomarkers...,0,0,,,,,
4,2-s2.0-105027398275,10.1016/j.compbiolchem.2026.108895,S1476927126000204,41518988.0,Knowledge graph integration of clustered medic...,ar,Article,Shajil U.K.,60013170;129854699;133643214,National Institute of Technology Calicut;UL Re...,...,,This study builds on the premise that phytoche...,AI-based therapeutic mapping | Chemical space ...,0,0,,,NITC,,National Institute of Technology Calicut


In [44]:
scopus_results.to_csv("ScopusArticles.csv")

# Semantic Scholar

In [45]:
# First, import the client from semanticscholar module
from semanticscholar import SemanticScholar

# You'll need an instance of the client to request data from the API
sch = SemanticScholar()


In [53]:
q_semanticScholar = '"( "graph" OR "community detection" OR "group" OR "cluster") AND ( "relevance" OR "redundancy" ) AND ( "feature selection" OR "dimensionality reduction" ) AND ( "biological" OR "Gene expression" OR "omics" )'

response = sch.search_paper(query=q_semanticScholar, bulk=True, sort='publicationDate:desc')

len(response)

0

In [52]:
all_results = []
i = 1
for item in response:
    all_results.append(item)
#    print(i)
    i += 1

len(all_results)

0

# IEEE Xplore


In [48]:
from xploreapi.xploreapi import XPLORE


In [49]:
query = XPLORE('juvdzkf86jj5k6qf5hx8k5mp')
query.searchField('boolean_text', '"feature selection" AND "graph"')
query.maximumResults(2000)
query.resultsSorting('publication_year','asc')
data = query.callAPI()

In [50]:
data

'<h1>Developer Inactive</h1>'