In [None]:
# Step 1: Setup in Colab
!apt-get update > /dev/null
!apt-get install -y chromium-chromedriver > /dev/null
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!pip install selenium beautifulsoup4 > /dev/null


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import json
import time

def get_text(soup, selector):
    elements = soup.select(selector)
    if not elements:
        return "Not found"
    return " ".join([el.get_text(strip=True) for el in elements])

options = Options()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(options=options)

try:
    driver.get("https://aiche.confex.com/aiche/2023/meetingapp.cgi/Paper/668173")

    # Wait for critical sections
    WebDriverWait(driver, 15).until(
        EC.any_of(
            EC.presence_of_element_located((By.CSS_SELECTOR, "section.titleContent")),
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.field_Abstract"))
        )
    )

    # Additional wait for dynamic content
    time.sleep(2)

    soup = BeautifulSoup(driver.page_source, 'html.parser')

    data = {
        "topic": get_text(soup, "p.favoriteItem"),
        "date_time": get_text(soup, 'span.defaultTZ'),
        "abstract": get_text(soup, 'section.field_Abstract'),  # Fixed selector and comma
        "presenting_author": f"{get_text(soup, 'a.presenter')} | {get_text(soup, 'span.roleAffiliation')}",
    }


    print(json.dumps(data, indent=2))

except Exception as e:
    print(f"❌ Error: {e}")
finally:
    driver.quit()
