In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# URL of Stanford dining menu page
URL = "https://rdeapps.stanford.edu/dininghallmenu/"

# Define the options you want to select
dining_hall_name = "Arrillaga Family Dining Commons"  # Change as needed
day_text = "Tuesday"  # Modify dynamically
meal_text = "Dinner"  # Options: Breakfast, Lunch, or Dinner

# Initialize WebDriver with headless mode (faster & more stable)
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # Run in headless mode (no UI)
options.add_argument("--disable-gpu")  # Needed for some systems
options.add_argument("--window-size=1920,1080")  # Set window size
driver = webdriver.Chrome(options=options)

try:
    driver.get(URL)
    wait = WebDriverWait(driver, 15)  # Increase wait time

    print("Page source loaded. Checking elements...")

    # Identify the dropdown IDs
    dining_hall_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstLocations"))))
    day_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstDay"))))
    meal_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstMealType"))))

    # Print available options for debugging
    print("Available Days:", [option.text for option in day_dropdown.options])

    # Match the correct day dynamically (in case it's "Tuesday, March 19")
    for option in day_dropdown.options:
        if day_text in option.text:  # Check if "Tuesday" is in "Tuesday, March 19"
            day_dropdown.select_by_visible_text(option.text)
            print(f"Selected day: {option.text}")
            break
    else:
        raise Exception(f"Day '{day_text}' not found in dropdown!")

    time.sleep(2)  # Wait for changes

    # Select the dining hall
    dining_hall_dropdown.select_by_visible_text(dining_hall_name)
    time.sleep(2)  # Wait for changes

    # Select the meal
    meal_dropdown.select_by_visible_text(meal_text)
    time.sleep(3)  # Wait for menu load

    # Get the menu items
    menu_items = driver.find_elements(By.CLASS_NAME, "menu-item")
    print(f"Menu for {dining_hall_name} on {day_text} ({meal_text}):")

    if menu_items:
        for item in menu_items:
            print("-", item.text)
    else:
        print("No menu items found.")

finally:
    driver.quit()

Page source loaded. Checking elements...
Available Days: ['', '3/16/2025 - Sunday', '3/17/2025 - Monday', '3/18/2025 - Tuesday', '3/19/2025 - Wednesday', '3/20/2025 - Thursday', '3/21/2025 - Friday', '3/22/2025 - Saturday']
Selected day: 3/18/2025 - Tuesday
Menu for Arrillaga Family Dining Commons on Tuesday (Dinner):
No menu items found.


In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# URL of Stanford dining menu page
URL = "https://rdeapps.stanford.edu/dininghallmenu/"

# Define selection options
dining_hall_name = "Arrillaga Family Dining Commons"  # Change if needed
day_text = "Tuesday"
meal_text = "Dinner"

# Initialize WebDriver (headless mode for better performance)
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)

try:
    driver.get(URL)
    wait = WebDriverWait(driver, 15)

    print("Page source loaded. Checking elements...")

    # Identify the dropdown elements
    dining_hall_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstLocations"))))
    day_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstDay"))))
    meal_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstMealType"))))

    # Match the correct day dynamically
    for option in day_dropdown.options:
        if "3/18/2025" in option.text:  # Match based on format
            day_dropdown.select_by_visible_text(option.text)
            print(f"Selected day: {option.text}")
            break
    time.sleep(2)

    # Select the dining hall
    dining_hall_dropdown.select_by_visible_text(dining_hall_name)
    time.sleep(2)

    # Select the meal
    meal_dropdown.select_by_visible_text(meal_text)
    time.sleep(3)

    # Debug: Print page source to see if menu is loaded
    print("Fetching menu items...")
    time.sleep(2)

    # Try fetching menu by checking multiple elements
    menu_container = driver.find_elements(By.CLASS_NAME, "menu-container")  # Check main container
    if not menu_container:
        print("No menu container found. Checking full page source...")
        # Debug: Print full page source to see if menu is loaded
        with open("full_page_source.html", "w") as file:
            file.write(driver.page_source)

    menu_items = driver.find_elements(By.XPATH, "//div[contains(@class, 'menu-item') or contains(@class, 'food-item')]")  # Find items
    print(f"Menu for {dining_hall_name} on Tuesday ({meal_text}):")

    if menu_items:
        for item in menu_items:
            print("-", item.text)
    else:
        print("No menu items found. The structure might be different.")

finally:
    driver.quit()

Page source loaded. Checking elements...
Selected day: 3/18/2025 - Tuesday
Fetching menu items...
No menu container found. Checking full page source...
Menu for Arrillaga Family Dining Commons on Tuesday (Dinner):
No menu items found. The structure might be different.


In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# URL of Stanford dining menu page
URL = "https://rdeapps.stanford.edu/dininghallmenu/"

# Define selection options
dining_hall_name = "Arrillaga Family Dining Commons"  # Modify as needed
day_text = "3/18/2025"  # The correct date format
meal_text = "Dinner"

# Initialize WebDriver (headless mode for better performance)
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)

try:
    driver.get(URL)
    wait = WebDriverWait(driver, 15)

    print("Page loaded. Selecting options...")

    # Select Dining Hall
    dining_hall_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstLocations"))))
    dining_hall_dropdown.select_by_visible_text(dining_hall_name)
    time.sleep(2)

    # Select Day
    day_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstDay"))))
    day_dropdown.select_by_value(day_text)  # Use the correct value format "3/18/2025"
    time.sleep(2)

    # Select Meal
    meal_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstMealType"))))
    meal_dropdown.select_by_visible_text(meal_text)
    time.sleep(3)
    
    print("Fetching menu items...")

    # Extract menu items
    menu_items = driver.find_elements(By.CLASS_NAME, "clsMenuItem")
    
    print(f"Menu for {dining_hall_name} on {day_text} ({meal_text}):")
    if menu_items:
        for item in menu_items:
            dish_name = item.find_element(By.CLASS_NAME, "clsLabel_Name").text.strip()
            print("-", dish_name)
    else:
        print("No menu items found. Please check the page structure.")

finally:
    driver.quit()

Page loaded. Selecting options...
Fetching menu items...
Menu for Arrillaga Family Dining Commons on 3/18/2025 (Dinner):
- Creamy Butter Chicken
- Chana Masala
- Curried Vegetables
- Seasonal Steamed Vegetables
- Biriyani
- Tandoori Naan
- Panini Station
- Performance Bar
- Pasta Marinara
- Pasta Pesto
- Performance Bar
- Penne Pasta (Gluten-Free)


In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# URL of Stanford dining menu page
URL = "https://rdeapps.stanford.edu/dininghallmenu/"

# Define selection options
dining_hall_name = "Arrillaga Family Dining Commons"  # Modify as needed
day_text = "3/18/2025"  # The correct date format
meal_text = "Dinner"

# Initialize WebDriver (headless mode for better performance)
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)

try:
    driver.get(URL)
    wait = WebDriverWait(driver, 15)

    print("Page loaded. Selecting options...")

    # Select Dining Hall
    dining_hall_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstLocations"))))
    dining_hall_dropdown.select_by_visible_text(dining_hall_name)
    time.sleep(2)

    # Select Day
    day_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstDay"))))
    day_dropdown.select_by_value(day_text)  # Use the correct value format "3/18/2025"
    time.sleep(2)

    # Select Meal
    meal_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstMealType"))))
    meal_dropdown.select_by_visible_text(meal_text)
    time.sleep(3)
    
    print("Fetching menu items...")

    # Extract menu items
    menu_items = driver.find_elements(By.CLASS_NAME, "clsMenuItem")

    menu_data = []
    
    if menu_items:
        for item in menu_items:
            # Dish name
            dish_name = item.find_element(By.CLASS_NAME, "clsLabel_Name").text.strip()
            
            # Ingredients
            try:
                ingredients = item.find_element(By.CLASS_NAME, "clsLabel_Ingredients").text.replace("Ingredients:", "").strip()
            except:
                ingredients = "N/A"

            # Allergens
            try:
                allergens = item.find_element(By.CLASS_NAME, "clsLabel_Allergens").text.replace("Allergens:", "").strip()
            except:
                allergens = "N/A"

            # Dietary Icons (e.g., Vegan, Gluten-Free, Halal)
            dietary_icons = []
            icon_elements = item.find_elements(By.CLASS_NAME, "clsLabel_IconImage")
            for icon in icon_elements:
                dietary_icons.append(icon.get_attribute("alt"))  # Extracts "Gluten Free", "Vegan", etc.

            # Store structured data
            menu_data.append({
                "dish_name": dish_name,
                "ingredients": ingredients,
                "allergens": allergens,
                "dietary_icons": dietary_icons
            })
    
    else:
        print("No menu items found. Please check the page structure.")

    # Print structured output
    print(f"\nStructured Menu Data for {dining_hall_name} on {day_text} ({meal_text}):")
    for dish in menu_data:
        print(f"- {dish['dish_name']}")
        print(f"  Ingredients: {dish['ingredients']}")
        print(f"  Allergens: {dish['allergens']}")
        print(f"  Dietary Icons: {', '.join(dish['dietary_icons']) if dish['dietary_icons'] else 'None'}\n")

finally:
    driver.quit()

# Now, `menu_data` contains all structured information as a list of dictionaries

Page loaded. Selecting options...
Fetching menu items...

Structured Menu Data for Arrillaga Family Dining Commons on 3/18/2025 (Dinner):
- Creamy Butter Chicken
  Ingredients: chicken, unsalted butter, curry sauce (onions, ginger, garlic, tomato puree, tomato sauce, tomato paste, ketchup, turmeric, garam masala, coriander seeds, curry powder, ground cumin, cumin seeds, chili powder, salt), cream, coriander seeds, cinnamon, cardamom seeds, cloves, nutmeg, bay leaf, pepper
  Allergens: MILK
  Dietary Icons: Gluten Free, Halal

- Chana Masala
  Ingredients: chickpeas, tomatoes, onions, garlic, ginger, cumin, chana masala (coriander, dry mango, salt, red chili, pomegranate seeds, kachri, cumin, black pepper, black salt, cinnamon, dry ginger, fenugreek leaves, mint leaves, clove, nutmeg, big cardamom, caraway, mace), canola/olive oil blend, bay leaf, curry sauce (onions, cumin seeds, garam masala, coriander seed, curry powder, chili powder, garlic, ginger, ketchup, tomato sauce, tomato pas

In [11]:
import json
import os
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# URL of Stanford dining menu page
URL = "https://rdeapps.stanford.edu/dininghallmenu/"

# Initialize WebDriver (headless mode for better performance)
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)

try:
    driver.get(URL)
    wait = WebDriverWait(driver, 15)
    print("Page loaded. Fetching dropdown options...")

    # Extract all dining halls
    dining_hall_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstLocations"))))
    dining_halls = [option.text for option in dining_hall_dropdown.options if option.text.strip()]
    
    # Extract all available dates (within 7 days)
    day_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstDay"))))
    dates = {option.text: option.get_attribute("value") for option in day_dropdown.options if option.get_attribute("value").strip()}

    # Extract all meal types
    meal_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstMealType"))))
    meals = [option.text for option in meal_dropdown.options if option.text.strip()]
    
    print(f"Dining Halls: {dining_halls}")
    print(f"Dates: {list(dates.keys())}")
    print(f"Meal Types: {meals}")

    # Directory to store data
    output_dir = "stanford_dining_menus"
    os.makedirs(output_dir, exist_ok=True)

    # Iterate through all dates and meal types
    for date_label, date_value in dates.items():
        for meal in meals:
            filename = f"{date_value.replace('/', '-')}-{meal}.json"
            filepath = os.path.join(output_dir, filename)

            # Dictionary to store all dining hall menus for this date-meal combination
            day_meal_data = {}

            for dining_hall in dining_halls:
                print(f"Scraping {dining_hall} on {date_label} ({meal})...")
                
                # Select the dining hall
                dining_hall_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstLocations"))))
                dining_hall_dropdown.select_by_visible_text(dining_hall)
                time.sleep(0.1)

                # Select the date
                day_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstDay"))))
                day_dropdown.select_by_value(date_value)
                time.sleep(0.1)

                # Select the meal
                meal_dropdown = Select(wait.until(EC.presence_of_element_located((By.ID, "MainContent_lstMealType"))))
                meal_dropdown.select_by_visible_text(meal)
                time.sleep(1)

                # Extract menu items
                menu_items = driver.find_elements(By.CLASS_NAME, "clsMenuItem")
                menu_data = []

                if menu_items:
                    for item in menu_items:
                        # Dish name
                        dish_name = item.find_element(By.CLASS_NAME, "clsLabel_Name").text.strip()
                        
                        # Ingredients
                        try:
                            ingredients = item.find_element(By.CLASS_NAME, "clsLabel_Ingredients").text.replace("Ingredients:", "").strip()
                        except:
                            ingredients = "N/A"

                        # Allergens
                        try:
                            allergens = item.find_element(By.CLASS_NAME, "clsLabel_Allergens").text.replace("Allergens:", "").strip()
                        except:
                            allergens = "N/A"

                        # Dietary Icons (e.g., Vegan, Gluten-Free, Halal)
                        dietary_icons = []
                        icon_elements = item.find_elements(By.CLASS_NAME, "clsLabel_IconImage")
                        for icon in icon_elements:
                            dietary_icons.append(icon.get_attribute("alt"))  # Extracts "Gluten Free", "Vegan", etc.

                        # Store structured data
                        menu_data.append({
                            "dish_name": dish_name,
                            "ingredients": ingredients,
                            "allergens": allergens,
                            "dietary_icons": dietary_icons
                        })
                
                # Store data in dictionary
                day_meal_data[dining_hall] = menu_data

            # Save structured data as JSON
            with open(filepath, "w", encoding="utf-8") as f:
                json.dump(day_meal_data, f, indent=4, ensure_ascii=False)
            print(f"Saved data to {filepath}")

finally:
    driver.quit()

print("Scraping completed successfully!")

Page loaded. Fetching dropdown options...
Dining Halls: ['Arrillaga Family Dining Commons', 'Branner Dining', 'EVGR Dining', 'Florence Moore Dining', 'Gerhard Casper Dining', 'Lakeside Dining', 'Ricker Dining', 'Stern Dining', 'Wilbur Dining']
Dates: ['3/16/2025 - Sunday', '3/17/2025 - Monday', '3/18/2025 - Tuesday', '3/19/2025 - Wednesday', '3/20/2025 - Thursday', '3/21/2025 - Friday', '3/22/2025 - Saturday']
Meal Types: ['Breakfast', 'Lunch', 'Dinner', 'Brunch']
Scraping Arrillaga Family Dining Commons on 3/16/2025 - Sunday (Breakfast)...
Scraping Branner Dining on 3/16/2025 - Sunday (Breakfast)...
Scraping EVGR Dining on 3/16/2025 - Sunday (Breakfast)...
Scraping Florence Moore Dining on 3/16/2025 - Sunday (Breakfast)...
Scraping Gerhard Casper Dining on 3/16/2025 - Sunday (Breakfast)...
Scraping Lakeside Dining on 3/16/2025 - Sunday (Breakfast)...
Scraping Ricker Dining on 3/16/2025 - Sunday (Breakfast)...
Scraping Stern Dining on 3/16/2025 - Sunday (Breakfast)...
Scraping Wilbur D