In [24]:
import requests
from bs4 import BeautifulSoup
import json

def extract_doctor_data(doctor_div):
    # Extract image
    # image = doctor_div.find('img')['src'].strip()

    # Extract name
    name = doctor_div.find('div', class_='section2').find('p').text.strip()

     # Extract position and qualifications
    p_tags = doctor_div.find('div', class_='section2').find_all('p')
    position = [p_tags[1].text.strip()] if len(p_tags) > 1 else []
    qualifications = [p_tags[2].text.strip()] if len(p_tags) > 1 else []

    # Extract timing information
    timings = {}
    timing_section = doctor_div.find('div', id='NormalTiming')
    if timing_section:
        tables = timing_section.find_all('table', class_='section3headtable')
        for table in tables:
            day_name = table.find('b').text.strip().replace('u', '')
            timings[day_name] = ' '.join(td.text.strip() for td in table.find_all('td')[1:])

    # Patient serving remarks and notes can be included as additional fields if needed
    patient_serving_remarks = doctor_div.find('div', style="text-align:left; width:100%; display:inline-block;float:left; color:#000000; font-weight:bold; border-radius: 5px;padding: 2px;margin-bottom:1px;margin-top:3px")
    token_note = doctor_div.find('div', style="text-align:left; width:100%; display:inline-block; float:left; color:#000000; font-weight:bold; border-radius: 5px;padding: 2px;margin-bottom:3px")
    on_leave = doctor_div.find('div', style="text-align:center; width:100%; display:inline-block; color:#fff; font-weight:bold; background: #ed1e24;border-radius: 5px;padding: 2px;")

    # Create a dictionary for the doctor's information
    doctor_data = {
        "name": name,
        # "image": image,
        "position": position,
        "qualifications": qualifications,
        "timings": timings,
        "hospital": "National Medical Care Hospital",
        "patient_serving_remarks": patient_serving_remarks.text.strip() if patient_serving_remarks else "",
        "token_note": token_note.text.strip() if token_note else "",
        "on_leave": on_leave.text.strip() if on_leave else ""
    }
    
    return doctor_data

def extract_data(url):
    # Request the main page
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all the doctor divs
    doctors = soup.find_all('div', class_='doctorbar')  # Adjust this if the class name is different

    # Initialize a list to store all doctor data
    doctors_data = []

    for doctor in doctors:
        # Extract data for each doctor
        doctor_data = extract_doctor_data(doctor)
        doctors_data.append(doctor_data)
    
    return doctors_data

# with open("holy-family-hospital.txt", "r") as fs:
#     urls = fs.readlines()

doctors = []

# for url in urls:
#     doctors.extend(extract_data(url.strip()))

url = "http://portal.nmc.net.pk/labportal/DoctorTiming/Index"
doctors.extend(extract_data(url))

with open("outputs/national-medical-care-hospital.json", 'w') as json_file:
    json.dump(doctors, json_file, indent=4)
    print(doctors)


[{'name': 'ABDUL NAVEED KHAN, DR.', 'position': ['NEURO SURGEON'], 'qualifications': ['MBBS, FCPS'], 'timings': {'Monday': '03:00 PM TO 05:00 PM', 'Tesday': '- TO -', 'Wednesday': '03:00 PM TO 05:00 PM', 'Thrsday': '- TO -', 'Friday': '03:00 PM TO 05:00 PM', 'Satrday': '- TO -'}, 'hospital': 'National Medical Care Hospital', 'patient_serving_remarks': 'Patient Serving Remarks: First Come First Serve', 'token_note': 'Token Note: Number allocation will be start after 12:00 pm on same OPD day with first come first basis.', 'on_leave': ''}, {'name': 'ABDUL RASHEED KHAN, DR.', 'position': ['CARDIOLOGIST'], 'qualifications': ['MBBS, MCPS, DIP CARD, MD (CARDIOLOGY)'], 'timings': {'Monday': '02:30 PM TO 04:30 PM', 'Tesday': '- TO -', 'Wednesday': '02:30 PM TO 04:30 PM', 'Thrsday': '- TO -', 'Friday': '02:30 PM TO 04:30 PM', 'Satrday': '- TO -'}, 'hospital': 'National Medical Care Hospital', 'patient_serving_remarks': 'Patient Serving Remarks: First Come First Serve', 'token_note': 'Token Note:

to get all positions 

In [25]:
import json

# Load the JSON data from the file
with open('outputs/national-medical-care-hospital.json', 'r') as json_file:
    doctors = json.load(json_file)

# Define the relevant positions
relevant_positions = [
    'PSYCHIATRIST', 'NEURO PHYSICIAN', 'NEURO SURGEON', 'NEURO PSYCHIATRIST',
    'NEUROPHYSIOLOGIST', 'PEDIATRIC NEUROLOGIST', 'CLINICAL PSYCHOLOGIST',
    'PSYCHOLOGIST', 'DEVELOPMENTAL PEDIATRICIAN', 'OCCUPATIONAL THERAPIST'
]

# Filter doctors based on the relevant positions
filtered_doctors = [doctor for doctor in doctors if doctor['position'] in relevant_positions]

print(filtered_doctors)
# Write the filtered data back to the JSON file
# with open('outputs/national-medical-care-hospital.json', 'w') as json_file:
#     json.dump(filtered_doctors, json_file, indent=4)

print(f"Filtered data has been saved to 'outputs/national-medical-care-hospital.json'")



[]
Filtered data has been saved to 'outputs/national-medical-care-hospital.json'


In [31]:
import json

# Load the JSON data from the file
with open('outputs/national-medical-care-hospital.json', 'r') as json_file:
    doctors = json.load(json_file)

# Define the relevant positions
relevant_positions = [
    'PSYCHIATRIST', 'NEURO PHYSICIAN', 'NEURO SURGEON', 'NEURO PSYCHIATRIST',
    'NEUROPHYSIOLOGIST', 'PEDIATRIC NEUROLOGIST', 'CLINICAL PSYCHOLOGIST',
    'PSYCHOLOGIST', 'DEVELOPMENTAL PEDIATRICIAN', 'OCCUPATIONAL THERAPIST'
]

# Filter doctors based on the relevant positions
filtered_doctors = [
    doctor for doctor in doctors
    if any(position in relevant_positions for position in doctor.get('position', []))
]

# Print the filtered data
print(filtered_doctors)

# Write the filtered data back to the JSON file
with open('outputs/national-medical-care-hospital.json', 'w') as json_file:
    json.dump(filtered_doctors, json_file, indent=4)

print(f"Filtered data has been saved to 'outputs/national-medical-care-hospital.json'")


[{'name': 'ABDUL NAVEED KHAN, DR.', 'position': ['NEURO SURGEON'], 'qualifications': ['MBBS, FCPS'], 'timings': {'Monday': '03:00 PM TO 05:00 PM', 'Tesday': '- TO -', 'Wednesday': '03:00 PM TO 05:00 PM', 'Thrsday': '- TO -', 'Friday': '03:00 PM TO 05:00 PM', 'Satrday': '- TO -'}, 'hospital': 'National Medical Care Hospital', 'patient_serving_remarks': 'Patient Serving Remarks: First Come First Serve', 'token_note': 'Token Note: Number allocation will be start after 12:00 pm on same OPD day with first come first basis.', 'on_leave': ''}, {'name': 'AHMER ZUBERI, DR.', 'position': ['PSYCHOLOGIST'], 'qualifications': ['MS (PSYCH), PG CERT (CLIN. PSYCH), CBT & EMDR PRACTITIONER, RBT, DDRP'], 'timings': {'Monday': '- TO -', 'Tesday': '- TO -', 'Wednesday': '- TO -', 'Thrsday': '- TO -', 'Friday': '- TO -', 'Satrday': '04:30 PM TO 07:30 PM'}, 'hospital': 'National Medical Care Hospital', 'patient_serving_remarks': 'Patient Serving Remarks: Appointment Only', 'token_note': '', 'on_leave': ''},