### NLP Research Internship Assignment Biomedical Text Analysis
*data_extraction_starter.ipynb*

In [None]:
# Import necessary libraries
from Bio import Entrez
import ssl

# Bypass SSL certificate verification
ssl._create_default_https_context = ssl._create_unverified_context

In [None]:
# Function to fetch abstracts from PubMed using MeSH terms
def fetch_abstracts(term, max_results=1000):
    """
    Fetch abstracts from PubMed based on search terms.
    
    Parameters:
    term (str): Search term or MeSH term for querying PubMed.
    max_results (int): Maximum number of results to fetch.
    
    Returns:
    list: A list of abstracts fetched from PubMed.
    """
    
    # Provide contact email for Entrez
    Entrez.email = "info@toxgensolutions.eu"
    
    # Perform the search query using Entrez
    handle = Entrez.esearch(db="pubmed", term=term, retmax=max_results)
    
    # Read search results
    record = Entrez.read(handle)
    handle.close()
    
    # Extract PubMed IDs from the search results
    id_list = record["IdList"]
    
    # Check if search returned results
    if not id_list:
        print("No results found.")
        return []
    
    # Fetch abstracts based on PubMed IDs
    handle = Entrez.efetch(db="pubmed", id=id_list, rettype="abstract", retmode="text")
    
    # Read and split the abstracts
    abstracts = handle.read().split("\n\n")
    handle.close()
    
    return abstracts

In [None]:
# Define the search term, e.g., "Cancer Immunotherapy"
search_term = "Cancer Immunotherapy"

# Fetch abstracts using the search term
abstracts = fetch_abstracts(search_term)

# Display first 5 abstracts for quick inspection (optional)
print("First 5 abstracts:\n")
for i, abstract in enumerate(abstracts[:5]):
    print(f"{i+1}. {abstract}\n")