In [15]:
import json
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

# Headless Chrome setup
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)

url = "https://aiche.confex.com/aiche/2023/meetingapp.cgi/Program/3316"
driver.get(url)

# Wait for the calendar content to load (wait for any CalendarList)
WebDriverWait(driver, 15).until(
    EC.presence_of_element_located((By.CLASS_NAME, "CalendarList"))
)

soup = BeautifulSoup(driver.page_source, "html.parser")

ul_calendar = soup.find("ul", class_="Calendar")

data = []

if ul_calendar:
    calendar_lists = ul_calendar.find_all("ul", class_="CalendarList")

    for cal_list in calendar_lists:
        # Extract date (first li span with class defaultTZ inside .date span)
        date_li = cal_list.find("li")
        date = None
        if date_li:
            date_span = date_li.find("span", class_="defaultTZ")
            if date_span:
                date = date_span.get_text(strip=True)

        # Extract time from <time class="first">
        time_tag = cal_list.find("time", class_="first")
        time_range = time_tag.get_text(strip=True) if time_tag else None

        # Extract sessions inside <section class="itemCalendar ...">
        sessions = []
        session_sections = cal_list.find_all("section", class_="itemCalendar")
        for session in session_sections:
            # Session title and link
            a_tag = session.find("a")
            session_title = a_tag.get_text(strip=True) if a_tag else None
            session_link = a_tag['href'] if a_tag and 'href' in a_tag.attrs else None

            # Speakers - bold tags inside span with class topDisplay
            speakers = []
            top_display = session.find("span", class_="topDisplay")
            if top_display:
                bolds = top_display.find_all("b")
                for b in bolds:
                    speakers.append(b.get_text(strip=True))

            # Location info inside ul.propertyInfo > li.propertyName
            location = None
            prop_info = session.find("ul", class_="propertyInfo")
            if prop_info:
                prop_name = prop_info.find("li", class_="propertyName")
                if prop_name:
                    location = prop_name.get_text(strip=True)

            sessions.append({
                "title": session_title,
                "link": session_link,
                "speakers": speakers,
                "location": location
            })

        data.append({
            "date": date,
            "time": time_range,
            "sessions": sessions
        })

else:
    print("❌ <ul class='Calendar'> not found.")

driver.quit()

# Save JSON data to file
with open("sessions.json", "w", encoding="utf-8") as f:
    json.dump(data, f, indent=2, ensure_ascii=False)

print("✅ Data saved to sessions.json")


✅ Data saved to sessions.json
