In [1]:
import json
import requests
from bs4 import BeautifulSoup


def get_subject_urls(base_url):
    response = requests.get(base_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    subject_links = soup.select('ul.nav.levelone li a')
    url = 'https://catalog.northeastern.edu'
    return [url + link['href'] for link in subject_links]


def scrape_courses(subject_url):
    response = requests.get(subject_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    courses = []

    for course_block in soup.select('.courseblock'):
        title_tag = course_block.select_one('.courseblocktitle strong')
        title = title_tag.get_text(strip=True) if title_tag else "N/A"

        desc_tag = course_block.select_one('p.cb_desc')
        description = desc_tag.get_text(strip=True) if desc_tag else "N/A"

        extra_info = [
            extra.get_text(strip=True)
            for extra in course_block.select('p.courseblockextra')
        ]

        courses.append({
            'title': title,
            'description': description,
            'extras': extra_info
        })

    return courses


base_url = 'https://catalog.northeastern.edu/course-descriptions/'
subject_urls = get_subject_urls(base_url)

all_courses = []
for url in subject_urls:
    all_courses.extend(scrape_courses(url))


# --------------------------
# âœ… Save to JSON file
# --------------------------
with open("neu_courses.json", "w", encoding="utf-8") as f:
    json.dump(all_courses, f, indent=4, ensure_ascii=False)

print("Saved", len(all_courses), "courses to neu_courses.json")

Saved 7907 courses to neu_courses.json
