In [2]:
import bs4 as bs
import requests
import re
import json

In [3]:
def getRestSiteMap():
    menus = {}
    url = "https://my.uhds.oregonstate.edu/api/dining/weeklymenu/1"
    response = requests.get(url)
    soup = bs.BeautifulSoup(response.text, 'html.parser')
    locations = soup.find('select', id='locations')
    options = locations.find_all('option')
    for option in options:
        location = option.text.strip()
        location = location.replace('é', 'e')
        value = option['value']
        menus[location] = value

    # rename the key to a different name
    menus['The MainSqueeze'] = menus.pop('The Main Squeeze')
    menus["Bing's at Weatherford"] = menus.pop("Bing's Cafe")
    return menus
rest_site_map = getRestSiteMap()
display(rest_site_map)

{"Ava's Cafe": '7',
 'Bay Leaf': '14',
 'Bites': '22',
 "Calabaloo's": '16',
 'Cascadia Cafe': '3',
 'Cascadia Deli': '4',
 'Cascadia Market': '5',
 'Clubhouse Deli': '9',
 'Coffee Corral': '8',
 "Cooper's Creek": '10',
 'Dixon Cafe': '2',
 'e.Cafe': '6',
 'East Side Eats': '17',
 'EBGBs': '11',
 'Five Four One': '18',
 'Global Fare': '27',
 'Grill': '30',
 'Java II': '32',
 'JavaStop': '23',
 'La Calle': '19',
 'Nori': '28',
 'North Porch Cafe': '24',
 'Off The Quad': '25',
 'Raintree Coffee Co.': '20',
 'Ring of Fire': '12',
 'Serrano Grill': '13',
 'Southside Station Deli': '26',
 'Southside Station Pizzeria': '29',
 'The Dam': '31',
 "Trader Bing's Cafe": '1',
 'West Side Grill': '15',
 'The MainSqueeze': '21',
 "Bing's at Weatherford": '33'}

In [35]:
def getRestAddress(url):
    response = requests.get(url)
    soup = bs.BeautifulSoup(response.text, 'html.parser')
    # find p tag with text location
    location = soup.find('p', string=re.compile('Location')).find_next_sibling('p').text.strip().replace('\n', ', ').replace('\t', '')
    return location

display(getRestAddress("https://uhds.oregonstate.edu/restaurants/bings-cafe"))

'Weatherford Hall, 300 SW 26th St, Corvallis OR 97331'

In [36]:
def getRestName(url):
    source = requests.get(url)
    soup = bs.BeautifulSoup(source.content, 'html.parser')
    restBlock = soup.find('div', class_='field field-name-field-large-column field-type-text-long field-label-hidden')
    restBlock = restBlock.find('div', class_='field-item even')
    # find the first child of restBlock and convert it to a string and remove weird characters
    restName = restBlock.contents[0].text.strip()
    restName = restName.replace('é', 'e')
    if restName == '':
        restName = restBlock.contents[2].text.strip()
    if '\xa0' in restName:
        restName = restName.replace('\xa0', ' ')
    if restName == 'The MainSqueeze':
        restName = 'The Main Squeeze'
    if restName == "Bing's at Weatherford":
        restName = "Bing's Cafe"
    return restName

In [37]:
def getMenus(restName, date):
    url = "https://my.uhds.oregonstate.edu/api/dining/weeklymenu/" + rest_site_map[restName]

    source = requests.get(url)
    soup = bs.BeautifulSoup(source.content, 'html.parser')
    weekly = soup.find_all('div', class_='section')

    menus = []
    for today in weekly:
        if date in today.h6.text:
            menu = {}
            menu['title'] = today.h6.text
            menu['items'] = []
            for p in today.find_all('p'):
                menu['items'].append(p.text)
            menus.append(menu)

    return menus

In [38]:
url = 'https://my.uhds.oregonstate.edu/api/drupal/hours'
response = requests.get(url)
html_content = response.text

soup = bs.BeautifulSoup(html_content, 'html.parser')

buildings = soup.find_all('h1')
restGroups = soup.find('div', class_='pure-g').find_all('div', class_='pure-g')

restaurants = []

for building, restGroup in zip(buildings, restGroups):
    for rest in restGroup.find_all('div', class_='concept_block'):
        building_name = building.text
        url = rest.find('a', class_='concept').get('href')
        
        address = ""
        menus = []

        times_div = rest.find_all('div', class_='time')
        times = []
        for time in times_div:
            times.append(time.text.strip())

        name = rest.find('a', class_='concept').text.strip()
        if '*' not in name:
            name = getRestName(url)
        else:
            restaurants.append({'name': name, 'times': times, 'building': building_name, 'address': address, 'menus': menus})
            continue

        menus = getMenus(name, 'November 07')
        address = getRestAddress(url)
        
        restaurants.append({'name': name, 'times': times, 'building': building_name, 'address': address, 'menus': menus})

display(restaurants)

[{'name': "Trader Bing's Cafe",
  'times': ['7:30 AM - 6:30 PM'],
  'building': 'Austin Hall',
  'address': 'Inside Austin Hall, 2751 SW Jefferson Way, Corvallis, OR 97331',
  'menus': [{'title': 'BREAKFAST - November 07th',
    'items': ['Entrepreneur Breakfast Sandwich',
     'Promotion Breakfast Sandwich',
     'The Plan Breakfast Sandwich',
     'TBC Strategic Spin Wrap',
     'TBC System Execution']},
   {'title': 'Salads - November 07th',
    'items': ['Chairman Chicken Caesar Salad',
     'TBC Side Caesar Salad',
     "Teri's COO Salad",
     'TBC Side Salad']},
   {'title': 'Options to Add - November 07th',
    'items': ['Balsamic Vinaigrette Dressing ',
     'Red Onion ',
     'Tomato ',
     'Honey Mustard Dressing ',
     'Grey Poupon Dijon Mustard',
     'Mayonnaise',
     'Fresh Mozzarella ',
     'Avocado ',
     'Roman Caesar Dressing ',
     'Cream Cheese ',
     'Shaved Parmesan',
     'Chipotle Mayonnaise ',
     'Roasted Red Pepper ',
     'Bacon ',
     'Sliced  Whi

In [11]:
with open('restaurants.json', 'w') as f:
    json.dump(restaurants, f, indent=4)

In [46]:
from datetime import datetime

class DiningScraper:
    def __init__(self, date=None):
        if date is None:
            date = datetime.now().strftime('%B %d')
        self.date = date
        self.rest_site_map = self.get_rest_site_map()
        self.restaurants = self.scrape_restaurants()
    
    def get_rest_site_map(self):
        menus = {}
        url = "https://my.uhds.oregonstate.edu/api/dining/weeklymenu/1"
        response = requests.get(url)
        soup = bs.BeautifulSoup(response.text, 'html.parser')
        locations = soup.find('select', id='locations')
        options = locations.find_all('option')
        for option in options:
            location = option.text.strip()
            location = location.replace('é', 'e')
            value = option['value']
            menus[location] = value
        return menus
    
    def get_rest_address(self, url):
        response = requests.get(url)
        soup = bs.BeautifulSoup(response.text, 'html.parser')
        location = soup.find('p', string=re.compile('Location')).find_next_sibling('p').text.strip().replace('\n', ', ').replace('\t', '')
        return location
    
    def get_rest_name(self, url):
        source = requests.get(url)
        soup = bs.BeautifulSoup(source.content, 'html.parser')
        restBlock = soup.find('div', class_='field field-name-field-large-column field-type-text-long field-label-hidden')
        restBlock = restBlock.find('div', class_='field-item even')
        restName = restBlock.contents[0].text.strip()
        restName = restName.replace('é', 'e')
        if restName == '':
            restName = restBlock.contents[2].text.strip()
        if '\xa0' in restName:
            restName = restName.replace('\xa0', ' ')
        if restName == 'The MainSqueeze':
            restName = 'The Main Squeeze'
        if restName == "Bing's at Weatherford":
            restName = "Bing's Cafe"
        return restName
    
    def get_menus(self, restName):
        url = "https://my.uhds.oregonstate.edu/api/dining/weeklymenu/" + self.rest_site_map[restName]
        source = requests.get(url)
        soup = bs.BeautifulSoup(source.content, 'html.parser')
        weekly = soup.find_all('div', class_='section')
        menus = []
        for today in weekly:
            if self.date in today.h6.text:
                menu = {}
                menu['title'] = today.h6.text
                menu['items'] = []
                for p in today.find_all('p'):
                    menu['items'].append(p.text)
                menus.append(menu)
        return menus
    
    def scrape_restaurants(self):
        url = 'https://my.uhds.oregonstate.edu/api/drupal/hours'
        response = requests.get(url)
        html_content = response.text
        soup = bs.BeautifulSoup(html_content, 'html.parser')
        buildings = soup.find_all('h1')
        restGroups = soup.find('div', class_='pure-g').find_all('div', class_='pure-g')
        restaurants = []
        for building, restGroup in zip(buildings, restGroups):
            for rest in restGroup.find_all('div', class_='concept_block'):
                building_name = building.text
                url = rest.find('a', class_='concept').get('href')
                address = ""
                menus = []
                times_div = rest.find_all('div', class_='time')
                times = []
                for time in times_div:
                    times.append(time.text.strip())
                name = rest.find('a', class_='concept').text.strip()
                if '*' not in name:
                    name = self.get_rest_name(url)
                else:
                    restaurants.append({
                        'name': name,
                        'times': times,
                        'building': building_name,
                        'address': address,
                        'menus': menus
                    })
                    continue
                menus = self.get_menus(name)
                address = self.get_rest_address(url)
                restaurants.append({
                    'name': name,
                    'times': times,
                    'building': building_name,
                    'address': address,
                    'menus': menus
                })
        return restaurants
    
    scraper = DiningScraper()
    display(scraper.restaurants)

[{'name': "Trader Bing's Cafe",
  'times': ['7:30 AM - 6:30 PM'],
  'building': 'Austin Hall',
  'address': 'Inside Austin Hall, 2751 SW Jefferson Way, Corvallis, OR 97331',
  'menus': [{'title': 'BREAKFAST - November 07th',
    'items': ['Entrepreneur Breakfast Sandwich',
     'Promotion Breakfast Sandwich',
     'The Plan Breakfast Sandwich',
     'TBC Strategic Spin Wrap',
     'TBC System Execution']},
   {'title': 'Salads - November 07th',
    'items': ['Chairman Chicken Caesar Salad',
     'TBC Side Caesar Salad',
     "Teri's COO Salad",
     'TBC Side Salad']},
   {'title': 'Options to Add - November 07th',
    'items': ['Balsamic Vinaigrette Dressing ',
     'Red Onion ',
     'Tomato ',
     'Honey Mustard Dressing ',
     'Grey Poupon Dijon Mustard',
     'Mayonnaise',
     'Fresh Mozzarella ',
     'Avocado ',
     'Roman Caesar Dressing ',
     'Cream Cheese ',
     'Shaved Parmesan',
     'Chipotle Mayonnaise ',
     'Roasted Red Pepper ',
     'Bacon ',
     'Sliced  Whi