In [5]:
import requests
from bs4 import BeautifulSoup
import json

# Function to get all doctor profile URLs from a department page
def get_doctor_profile_urls(department_url):
    response = requests.get(department_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all <a> tags with the doctor profile URLs
    doctor_urls = []
    for a_tag in soup.find_all('a', href=True):
        href = a_tag['href']
        if "/dr-" in href or "/prof-" in href:  # Filtering for doctor URLs based on URL pattern
            doctor_urls.append(href)
    
    return doctor_urls

# Function to scrape doctor details from the profile page
def scrape_doctor_data(profile_url):
    response = requests.get(profile_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract doctor name (handle missing elements)
    name_tag = soup.find('h2', class_='elementor-heading-title ')
    name = name_tag.get_text(strip=True) if name_tag else 'N/A'

    # Extract doctor speciality (handle missing elements)
    speciality_heading = soup.find('h2', text='Speciality:')
    speciality = speciality_heading.find_next('div').get_text(strip=True) if speciality_heading else 'N/A'

    # Extract doctor education (handle missing elements)
    education_heading = soup.find('h2', text='Education:')
    education_list = education_heading.find_next('ul') if education_heading else []
    education = [li.get_text(strip=True) for li in education_list.find_all('li')] if education_list else ['N/A']

    # Prepare the doctor data in dictionary format
    doctor_data = {
        'name': name,
        'speciality': speciality,
        'education': education,
    }

    return doctor_data

# Main function to read URLs from a text file, get doctor URLs and scrape data
def scrape_doctors_from_departments(file_with_urls):
    all_doctors = []

    # Read department URLs from the file
    with open(file_with_urls, 'r') as file:
        department_urls = [line.strip() for line in file.readlines()]
    
    # For each department page, get the doctor profile URLs and scrape their data
    for department_url in department_urls:
        print(f"Scraping department page: {department_url}")
        doctor_urls = get_doctor_profile_urls(department_url)
        
        for profile_url in doctor_urls:
            print(f"Scraping doctor profile: {profile_url}")
            try:
                doctor_data = scrape_doctor_data(profile_url)
                all_doctors.append(doctor_data)
            except Exception as e:
                print(f"Error scraping {profile_url}: {e}")

    # Save the scraped doctor data to a JSON file
    with open('../quetta-doctors/aria-hospital.json', 'w') as json_file:
        json.dump(all_doctors, json_file, indent=4)

    print(f"Scraping completed. Data saved to doctors_data.json")

# Run the scraper with a file containing department URLs
scrape_doctors_from_departments('./aria-hospital.txt')


Scraping department page: https://ariahealth.com.pk/pediatric-department/
Scraping doctor profile: https://ariahealth.com.pk/dr-shazia-ali/


  speciality_heading = soup.find('h2', text='Speciality:')
  education_heading = soup.find('h2', text='Education:')


Scraping doctor profile: https://ariahealth.com.pk/dr-shazia-ali/
Scraping doctor profile: https://ariahealth.com.pk/prof-dr-mushtaq-jaffar/
Scraping doctor profile: https://ariahealth.com.pk/prof-dr-mushtaq-jaffar/
Scraping doctor profile: https://ariahealth.com.pk/dr-muhammad-ismail/
Scraping doctor profile: https://ariahealth.com.pk/dr-muhammad-ismail/
Scraping department page: https://ariahealth.com.pk/psychiatry-department/
Scraping doctor profile: https://ariahealth.com.pk/dr-mujeeb-ullah/
Scraping doctor profile: https://ariahealth.com.pk/dr-mujeeb-ullah/
Scraping doctor profile: https://ariahealth.com.pk/dr-muhammad-iiyas/
Scraping doctor profile: https://ariahealth.com.pk/dr-muhammad-iiyas/
Scraping doctor profile: https://ariahealth.com.pk/dr-silsila-sherzad/
Scraping doctor profile: https://ariahealth.com.pk/dr-silsila-sherzad/
Scraping department page: https://ariahealth.com.pk/neurology-department/
Scraping doctor profile: https://ariahealth.com.pk/dr-qahir-achakzai/
Scrap