<a href="https://colab.research.google.com/github/Sachin63Kumar/AlgoSmart/blob/main/PubMed_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install biopython



In [None]:
import requests
import csv
from Bio import Entrez
from typing import List, Dict, Optional


In [None]:
# Set your email for PubMed API access
Entrez.email = "sachinkumarrathore15@gmail.com"


In [None]:
class PubMedFetcher:
    def __init__(self, query: str):
        self.query = query

    def fetch_papers(self, max_results: int = 10) -> List[Dict[str, str]]:
        """Fetches papers from PubMed based on a query."""
        handle = Entrez.esearch(db="pubmed", term=self.query, retmax=max_results)
        record = Entrez.read(handle)
        handle.close()
        pmids = record["IdList"]

        papers = []
        for pmid in pmids:
            paper_data = self.fetch_paper_details(pmid)
            if paper_data:
                papers.append(paper_data)
        return papers

    def fetch_paper_details(self, pmid: str) -> Optional[Dict[str, str]]:
        """Fetch details of a paper using PubMed ID."""
        handle = Entrez.efetch(db="pubmed", id=pmid, rettype="xml", retmode="text")
        record = Entrez.read(handle)
        handle.close()

        paper_info = record["PubmedArticle"][0]["MedlineCitation"]
        title = paper_info["Article"]["ArticleTitle"]
        pub_date = paper_info["Article"]["Journal"]["JournalIssue"]["PubDate"]
        authors = paper_info.get("Article", {}).get("AuthorList", [])

        non_academic_authors = []
        company_affiliations = []
        corresponding_email = None

        for author in authors:
            if "AffiliationInfo" in author:
                affiliation = author["AffiliationInfo"][0]["Affiliation"]
                if self.is_non_academic(affiliation):
                    non_academic_authors.append(author["LastName"] + " " + author["ForeName"])
                    company_affiliations.append(affiliation)
            if "ElectronicAddress" in author:
                corresponding_email = author["ElectronicAddress"]

        if not non_academic_authors:
            return None

        return {
            "PubmedID": pmid,
            "Title": title,
            "Publication Date": pub_date,
            "Non-academic Author(s)": ", ".join(non_academic_authors),
            "Company Affiliation(s)": ", ".join(company_affiliations),
            "Corresponding Author Email": corresponding_email or "N/A",
        }

    def is_non_academic(self, affiliation: str) -> bool:
        """Identify non-academic institutions based on heuristics."""
        academic_keywords = ["university", "college", "institute", "school", "lab", "research center"]
        return not any(keyword.lower() in affiliation.lower() for keyword in academic_keywords)


In [None]:
def save_to_csv(papers: List[Dict[str, str]], filename: str):
    """Save paper details to a CSV file."""
    keys = ["PubmedID", "Title", "Publication Date", "Non-academic Author(s)", "Company Affiliation(s)", "Corresponding Author Email"]
    with open(filename, "w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=keys)
        writer.writeheader()
        writer.writerows(papers)


In [None]:
def fetch_and_print_papers(query: str, debug: bool = False, save_to_file: str = None):
    fetcher = PubMedFetcher(query)
    papers = fetcher.fetch_papers()

    if debug:
        print(f"Debug: Found {len(papers)} papers.")

    if not papers:
        print("No relevant papers found.")
        return

    if save_to_file:
        save_to_csv(papers, save_to_file)
        print(f"Results saved to {save_to_file}")
    else:
        for paper in papers:
            print(paper)


In [None]:
# Fetch papers based on a search query and print or save them
fetch_and_print_papers(query="pharmaceutical AND drug discovery", debug=True, save_to_file="papers.csv")


Debug: Found 2 papers.
Results saved to papers.csv


In [None]:
from google.colab import files
files.download('papers.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>