In [1]:
import requests
from bs4 import BeautifulSoup
import typing
from pathlib import Path

In [7]:
from enum import Enum
class Constants(Enum):
    CMU_SYLLABUS_REGISTRY_URL = 'https://canvas.cmu.edu/courses/sis_course_id:syllabus-registry' 
    CMU_CANVAS_URL = 'https://canvas.cmu.edu' 
    COOKIE = {
        'canvas_session': 'L88s0oZPvegNtlI0txEL4Q+QRmf1xZxzWYv3AKbVMBiZ8N9ZWcbIzZnuQWoBUcEELKL1zoysNBkag1Ba-UCLlnSzvOsEwdeWpPO37Y1gl37oT0FMifmjZRM6dBFbthjh9XlrFclHPYWbSDMuC8PRPQNXM2CLmg6CISd_STyDL2C6rn08tXKbeLCKh8gg-lIIf5HWpvIBAk24XQtioG4r0MGYP2gNB5nO0247PD_mtzufNjuFlbUOMaMtR-k3N-70l0aZ1z1x4aynW26enj4Y8y0-1pfoVuDEE3XaAQAdhPbYPdGIAzQiClBwCx19hCjSQ4seWs091TR9wv6BDbAQZBonI1KL4Lm_zt7vkCBNehWQhfcst4e--wsMINxSxbwNh47Z2FHu3xldcIu3TVjLa56jNilLwni9ftqMRA1OhfNCdHi9XwJ5fvfWukCCdVRMjjyuJT_uBU6IDQ61J25iQqgjNd1ud58eJw1IheM7untH8_44AFjIsCrmN24LFOwAEIvqzENFHLsw4ANER2O0LKUP_vPl2403y3uuuAQo2j5L-L8-zPfnOsAYRBYsxXPGCHnLZHChr1yHAxOnqAmAb8xbt2T-fKyBxDWjEpw1BgL_GokbZB16Zt3BF1Cadg0sc4VUemaNXUPtzQVmAACUuxaLWGRCkb1bgPLDnf0bMMsNkgVxEkL-cCgrNnjArjmRkROucKf6wP6DMSi2HdW1esYPffOZoIyHEUqi4aTaawHysVVmymEkgqr1cbJ7KRx83kYQXif6zuBJEWd9kaemA6PiTzXa9mBU_LPUlldnVKjx50hIS0fSXB1tMWsh9OJOZIziJXs6wzknclEdejDRZndyld-aTLOLbElVKIT4l2zDg.uPhGgHIPBeqXNvHKk8O0Hyo10gk.Z1a-QQ'
    }
    COURSE_CATEGORIES = [
        'Available Syllabi',
        'Unavailable Syllabi',
        'Individualized Experiences',
    ]

In [4]:
def get_and_unwrap(*args, **kwargs) -> BeautifulSoup:
    response = requests.get(*args, **kwargs)
    assert response.status_code == 200
    return BeautifulSoup(response.text, 'html.parser')
def select_unique(html, *args, **kwargs):
    result = html.select(*args, **kwargs)
    assert len(result) == 1, 'Selection is not unique!'
    return result[0]

In [142]:
soup = get_and_unwrap(Constants.CMU_SYLLABUS_REGISTRY_URL.value)
semester = soup.find(attrs={"aria-label": "Fall 2024 (F24)"})
semesters = soup.select('div[aria-label]')
for semester in semesters:
    print(semester.get('aria-label'))
departments = semester.select('div.content > ul.context_module_items > li[id^="context_module_item_"] > div.ig-row > div.ig-info > div.module-item-title > span.item_name')

In [40]:
class Course:
    def __init__(self, html, cat : str='Available Syllabi'):
        self.html = html
        self.a = select_unique(self.html, 'a.ig-title')
        self.cat = cat
    @property 
    def name(self):
        return self.a.get('title')
    @property
    def href(self):
        return Constants.CMU_CANVAS_URL.value + self.a.get('href')
    def get(self):
        self.result = get_and_unwrap(self.href, cookies=Constants.COOKIE.value)
    
class Department:
    def __init__(self, html):
        self.html = html
        self.a = select_unique(self.html, 'a.external_url_link')
    @property 
    def name(self):
        return self.a.get('title')
    @property
    def href(self):
        return self.a.get('href')
    @staticmethod
    def get_category(html, cat: str):
        category = select_unique(html, f'div[aria-label="{cat}"]')
        courses = category.select('div.content > ul.context_module_items > li[id^="context_module_item_"] > div.ig-row > div.ig-info > div.module-item-title > span.item_name')
        return courses
    def get(self):
        html = get_and_unwrap(self.href, cookies=Constants.COOKIE.value)
        self.courses = {cat: [Course(c, cat) for c in self.get_category(html, cat)] for cat in Constants.COURSE_CATEGORIES.value}

class Semester:
    def __init__(self, html):
        self.html = html
        departments = html.select('div.content > ul.context_module_items > li[id^="context_module_item_"] > div.ig-row > div.ig-info > div.module-item-title > span.item_name')
        self.departments = [Department(d) for d in departments]
    @property 
    def name(self):
        return self.html.get('aria-label')

class SyllabusRegistry:
    def __init__(self):
        html = get_and_unwrap(Constants.CMU_SYLLABUS_REGISTRY_URL.value)
        self.html = html
        semesters = html.select('div[aria-label^="Fall"], div[aria-label^="String"], div[aria-label^="Summer"]')
        self.semesters = [Semester(s) for s in semesters]

In [29]:
sr = SyllabusRegistry()

In [30]:
s = sr.semesters[0]

In [41]:
s.departments[0].get()
href = s.departments[0].courses['Available Syllabi'][3].href

In [42]:
s.departments[0].courses['Available Syllabi'][3].get()

In [44]:
s.departments[0].courses['Available Syllabi'][3].href

'https://canvas.cmu.edu/courses/40932/modules/items/5803464'

In [47]:
s.departments[0].courses['Available Syllabi'][3].result.script.next_sibling.next_sibling

<script>
    INST = {"environment":"production","allowMediaComments":true,"kalturaSettings":{"domain":"nv.instructuremedia.com","resource_domain":"nv.instructuremedia.com","rtmp_domain":"iad.rtmp.instructuremedia.com","partner_id":"9","subpartner_id":"0","player_ui_conf":"0","kcw_ui_conf":"0","upload_ui_conf":"0","max_file_size_bytes":534773760,"do_analytics":false,"hide_rte_button":false,"js_uploader":true},"logPageViews":true,"editorButtons":[{"name":"Box","id":1551,"favorite":false,"url":"https://www.edu-apps.org/box/","icon_url":"https://www.edu-apps.org/assets/lti_box_engine/icon.png","canvas_icon_class":null,"width":430,"height":200,"use_tray":false,"on_by_default":false,"description":"\u003cp\u003eEmbed files from Box\u003c/p\u003e\n"},{"name":"Commons Favorites","id":2647,"favorite":false,"url":"https://lor.instructure.com/api/lti/favorite-resources","icon_url":"https://lor.instructure.com/img/icon_commons.png","canvas_icon_class":null,"width":800,"height":400,"use_tray":true,"

In [48]:
import json