In [None]:
from urllib.parse import urljoin  
from selenium import webdriver  
from selenium.webdriver.chrome.service import Service  
from selenium.webdriver.common.by import By  
from selenium.webdriver.support.ui import WebDriverWait  
from selenium.webdriver.support import expected_conditions as EC  
from bs4 import BeautifulSoup  
import pandas as pd  
import time  

def setup_driver():  
    """Configure and return a headless Chrome driver"""  
    options = webdriver.ChromeOptions()  
    options.add_argument("--headless")  
    options.add_argument("--disable-gpu")  
    options.add_argument("--no-sandbox")  
    options.add_argument("--disable-dev-shm-usage")  
    options.add_argument("--window-size=1920x1080")  
    
    return webdriver.Chrome(options=options)  

BASE_URL = 'https://www.news-medical.net/'
PAGE_LOAD_TIMEOUT = 25  # Seconds to wait for page load

letters = ['b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']



driver = setup_driver()

def is_404_page(soup):
    """Check if current page is a 404 error page"""
    pass

def get_links_title(url):
    driver.get(url)
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CLASS_NAME, 'odd-item')))
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    titles = []
    links = []
    
    # Process odd items
    odd_items = soup.find_all('li', class_='odd-item')
    for item in odd_items:
        title = item.text.strip()
        link = item.find('a')['href']
        link = urljoin(BASE_URL, link)
        titles.append(title)
        links.append(link)
        print(title, link)
    
    # Process even items
    even_items = soup.find_all('li', class_='even-item')
    for item in even_items:
        title = item.text.strip()
        link = item.find('a')['href']
        link = urljoin(BASE_URL, link)
        titles.append(title)
        links.append(link)
        print(title, link)
    
    return titles, links


def get_content(url):
    driver.get(url)
    try:
        # Wait for content to load (using CSS_SELECTOR for multi-class elements)
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '.content.drug-page-content.clearfix')))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        content_div = soup.find('div', class_='content drug-page-content clearfix')
        return content_div.text.strip() if content_div else "No content found"
    except TimeoutException:
        return "Content load timeout"

def main():
    data = []
    for letter in letters:
        # Get list of drug links for current letter
        titles, links = get_links_title(f"https://www.news-medical.net/drugs-a-z.aspx?l={letter}")
        
        # Process each individual drug page
        for title, link in zip(titles, links):
            content = get_content(link)  # <-- Use the individual drug page URL
            data.append({
                'title': title,
                'link': link,
                'content': content
            })
    
    df = pd.DataFrame(data)
    df.to_csv('news_medical.csv', index=False)
    print(f'Saved {len(data)} items')

In [30]:
if __name__ == '__main__':
    main()
    driver.quit()

Bactroban Cream https://www.news-medical.net/drugs/Bactroban-Cream.aspx
Bactroban Ointment https://www.news-medical.net/drugs/Bactroban-Ointment.aspx
Beconase Allergy and Hayfever https://www.news-medical.net/drugs/Beconase-Allergy-and-Hayfever.aspx
Benadryl Original Oral Liquid (New Formula) https://www.news-medical.net/drugs/Benadryl-Original-Oral-Liquid-(New-Formula).aspx
BeneFIX https://www.news-medical.net/drugs/BeneFIX.aspx
Beovu https://www.news-medical.net/drugs/Beovu.aspx
Betadine Antiseptic Liquid Spray https://www.news-medical.net/drugs/Betadine-Antiseptic-Liquid-Spray.aspx
Betadine Sore Throat Gargle https://www.news-medical.net/drugs/Betadine-Sore-Throat-Gargle.aspx
Betaloc Injection https://www.news-medical.net/drugs/Betaloc-Injection.aspx
Betmiga https://www.news-medical.net/drugs/Betmiga.aspx
Betoquin https://www.news-medical.net/drugs/Betoquin.aspx
Beyfortus https://www.news-medical.net/drugs/Beyfortus.aspx
Bicor https://www.news-medical.net/drugs/Bicor.aspx
BiResp Spi