In [36]:
from Bio import Entrez
import time
import os
import getpass
import json

In [2]:
Entrez.email =   getpass.getpass("Enter your email: ")

In [3]:
mental_disorders = [ "Anxiety Disorders", "Depressive Disorder", "Bipolar Disorder", "Schizophrenia", 
                    "Post-Traumatic Stress Disorder", "Obsessive-Compulsive Disorder"]

neurotransmitters = ["Dopamine", "Serotonin", "Gamma-Aminobutyric Acid", "Norepinephrine", "Glutamic Acid", "Acetylcholine",
                     "Endorphins"]

### Scraping documents

In [35]:
def fetch_pubmed_json(query, max_results=10):
    # Search for articles
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]

    if not ids:
        print(f"No articles found for: {query}")
        return []

    # Fetch details in XML format
    fetched_articles = Entrez.efetch(
        db="pubmed", id=ids, rettype="xml", retmode="text"
    )
    articles = Entrez.read(fetched_articles)
    fetched_articles.close()

    results = []

    for article in articles["PubmedArticle"]:
        try:
            title = article["MedlineCitation"]["Article"]["ArticleTitle"]
            abstract_parts = article["MedlineCitation"]["Article"].get("Abstract", {}).get("AbstractText", [])
            abstract = " ".join(abstract_parts)
            pub_date = article["MedlineCitation"]["Article"]["Journal"]["JournalIssue"]["PubDate"]
            year = pub_date.get("Year", "Unknown")
            results.append({
                "title": title,
                "abstract": abstract,
                "year": year
            })
        except Exception as e:
            print(f"Skipping an article due to missing fields: {e}")

    return results

In [37]:
os.makedirs("pubmed_outputs", exist_ok=True)

In [38]:
for disorder in mental_disorders:
    for neurotransmitter in neurotransmitters:
        mesh_query = f'"{disorder}"[MeSH] AND "{neurotransmitter}"[MeSH]'
        print(f"Fetching articles for: {mesh_query}")
        articles = fetch_pubmed_json(mesh_query, max_results=25)
        
        if articles:
            file_name = f"{disorder.replace(' ', '_')}_{neurotransmitter.replace(' ', '_')}.json"
            file_path = os.path.join("pubmed_outputs", file_name)
            with open(file_path, 'w') as f:
                json.dump(articles, f, indent=4)
            print(f"Saved {len(articles)} articles to {file_name}")
        else:
            print(f"No articles found for: {mesh_query}")

        time.sleep(1) 

Fetching articles for: "Anxiety Disorders"[MeSH] AND "Dopamine"[MeSH]
Saved 25 articles to Anxiety_Disorders_Dopamine.json
Fetching articles for: "Anxiety Disorders"[MeSH] AND "Serotonin"[MeSH]
Saved 25 articles to Anxiety_Disorders_Serotonin.json
Fetching articles for: "Anxiety Disorders"[MeSH] AND "Gamma-Aminobutyric Acid"[MeSH]
Saved 25 articles to Anxiety_Disorders_Gamma-Aminobutyric_Acid.json
Fetching articles for: "Anxiety Disorders"[MeSH] AND "Norepinephrine"[MeSH]
Saved 25 articles to Anxiety_Disorders_Norepinephrine.json
Fetching articles for: "Anxiety Disorders"[MeSH] AND "Glutamic Acid"[MeSH]
Saved 25 articles to Anxiety_Disorders_Glutamic_Acid.json
Fetching articles for: "Anxiety Disorders"[MeSH] AND "Acetylcholine"[MeSH]
Saved 25 articles to Anxiety_Disorders_Acetylcholine.json
Fetching articles for: "Anxiety Disorders"[MeSH] AND "Endorphins"[MeSH]
Saved 25 articles to Anxiety_Disorders_Endorphins.json
Fetching articles for: "Depressive Disorder"[MeSH] AND "Dopamine"[MeSH]

### Experimental section to understand scraping

In [None]:
def search_pubmed(query, max_results=10):
    handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
    record = Entrez.read(handle)
    ids = record["IdList"]

    if not ids:
        print(f"No articles found for: {query}")
        return []

    return (record, ids)

In [11]:
def fetch_articles(ids):
    articles = Entrez.efetch(db="pubmed", id=ids, rettype="xml", retmode="text")
    records = Entrez.read(articles)
    articles.close()
    return records    

In [6]:
query = '"Dopamine"[MeSH] AND "Anxiety Disorders"[MeSH]'

In [7]:
record, ids = search_pubmed(query,15)

In [12]:
fetched_articles = fetch_articles(ids)

In [None]:
for article in fetched_articles['PubmedArticle']:
    title = article['MedlineCitation']['Article']['ArticleTitle']
    abstract = article['MedlineCitation']['Article'].get('Abstract', {}).get('AbstractText', ['No abstract available'])[0]
    print(f"Title: {title}")
    print(f"Abstract: {abstract}\n")
    time.sleep(1)  # Respect NCBI's rate limits

Title: Behavior Changes in Quinpirole Obsessive-Compulsive Disorder Rats Treated with 6-Hydroxydopamine and the Corresponding Dopaminergic Compulsive Loop Mechanism.
Abstract: Recent studies suggest that the anterior limb of the internal capsule may be an area of convergence for multiple compulsion loops. In this study, the role of different dopaminergic compulsion loops in the mechanism of obsessive-compulsive disorder (OCD) was investigated by selectively damaging dopaminergic neurons or fibers in the corresponding targets with 6-hydroxydopamine (6-OHDA) and depicting the anatomical map of various compulsion loops located in the anterior limb of the internal capsule.

Title: Unveiling serotonergic dysfunction of obsessive-compulsive disorder on prefrontal network dynamics: a computational perspective.
Abstract: Serotonin (5-HT) regulates working memory within the prefrontal cortex network, which is crucial for understanding obsessive-compulsive disorder. However, the mechanisms how n