In [1]:
import json
all_course_list_url = "https://stevens.smartcatalogiq.com/Institutions/Stevens-Institution-of-Technology/json/2023-2024/Academic-Catalog.json"


In [2]:
import time
import re
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

options = ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)  # Optional argument, if not specified will search path.
issues = []
def run_window(driver, url):
    # elem = WebDriverWait(driver, 30).until(
    #     EC.presence_of_element_located((By.ID, "main")) #This is a dummy element
    # )
    driver.get(url)
    time.sleep(20) # Let the user actually see something!
    try:
        search_box = driver.find_element(By.ID, 'main')
    except Exception as e:
        issues.append(url)
        return None

    try:
        course_title = search_box.find_element(By.TAG_NAME, 'h1').text
    except Exception as e:
        course_title = ""
    try:
        course_description = search_box.find_element(By.CLASS_NAME, 'desc').text
    except Exception as e:
        course_description = ""
    try:
        course_credits = search_box.find_element(By.CLASS_NAME, 'credits').text
        
    except Exception as e:
        course_credits = ""
    try:
        course_prereqs = search_box.find_element(By.CLASS_NAME, 'sc_prereqs').text
        regex = r"[A-Z]{2,4}\s\d{3}"
        course_prereqs = [match.group(0) for match in re.finditer(regex, course_prereqs)]
        
    except Exception as e:
        course_prereqs = ""
    try:
        course_distribution = search_box.find_element(By.ID, 'distribution').text
    except Exception as e:
        course_distribution = ""
    try:
        
        course_offering = search_box.find_element(By.TAG_NAME, 'p').text
    except Exception as e:
        course_offering = ""


    # time.sleep(1) # Let the user actually see something!
    offers = []
    if "Fall" in course_offering:
        offers.append("Fall")
    if "Spring" in course_offering:
        offers.append("Spring")
    if "Summer Session 1" in course_offering:
        offers.append("Summer Session 1")
    if "Summer Session 2" in course_offering:
        offers.append("Summer Session 2")
        
    return {
        'course_title': course_title,
        'course_description': course_description,
        'course_credits': course_credits,
        'course_prereqs': course_prereqs,
        'course_offered_in': offers
    }

In [3]:
data_ = json.load(open("courses.json", "r"))

In [4]:

base_url = "https://stevens.smartcatalogiq.com/en"
courses_list_idx = 24 

all_courses = data_['Children'][courses_list_idx]
all_courses = all_courses['Children']
import os
if os.path.exists("course_data_extracted.json"):
  os.remove("course_data_extracted.json")

file = open("course_data_extracted.json", "a")
file.write("[")
for idx, c in enumerate(all_courses):
  print(f"{idx}. You're looking at department: {c['Name']}")

  for idx, department in enumerate(c['Children']):
    print(f"{idx}. You're looking at course difficulty: {department['Name']}")

    for idx, course in enumerate(department['Children']):
      
      course_detail_url = f"{base_url}{course['Path']}"
      
      print(f"Fetching data for {course['Name']}")
      data = run_window(driver, course_detail_url)
      if ( data != None):
        data['course_code'] = course['Name']
        data['course_level'] = department['Name']
        data['course_rating'] = None
        data['course_professors'] = []
        data['course_syllabus'] = ""
        data['currently_enrolled'] = []
        data['previous_enrolled'] = []
        data['stevens_course_link'] = f"{base_url}{course['Path']}"
        file.write(json.dumps(data) + ',')
file.write("]")
file.close()

0. You're looking at department: AAI - Applied Artificial Intelligence
0. You're looking at course difficulty: 500
Fetching data for AAI 551
1. You're looking at course difficulty: 600
Fetching data for AAI 627
Fetching data for AAI 628
Fetching data for AAI 646
Fetching data for AAI 672
Fetching data for AAI 695
2. You're looking at course difficulty: 700
Fetching data for AAI 708
3. You're looking at course difficulty: 800
Fetching data for AAI 800
4. You're looking at course difficulty: 900
Fetching data for AAI 900
1. You're looking at department: ACC - Accounting
0. You're looking at course difficulty: 200
Fetching data for ACC 200
Fetching data for ACC 215
1. You're looking at course difficulty: 300
Fetching data for ACC 311
Fetching data for ACC 312
Fetching data for ACC 351
Fetching data for ACC 352
2. You're looking at course difficulty: 400
Fetching data for ACC 421
Fetching data for ACC 431
3. You're looking at course difficulty: 500
Fetching data for ACC 510
Fetching data f

In [6]:
print(issues)

['https://stevens.smartcatalogiq.com/en/2023-2024/Academic-Catalog/Courses/AAI-Applied-Artificial-Intelligence/700/AAI-708']


In [15]:
import json
courses = json.load(open("course_data_extracted.json", "r"))
courses[0]

{'course_title': 'AAI 551 Engineering Programming: Python',
 'course_description': 'This course presents tool, techniques, algorithms, and programming techniques using the Python programming language for data intensive applications and decision making. The course formally introduces techniques to: (i) gather,(ii) store, and (iii) process large volumes of data to make informed decisions. Such techniques find applicability in many engineering application areas, including communications systems, embedded systems, smart grids, robotics, Internet, and enterprise networks, or any network where information flows and alters decision making.',
 'course_credits': '3',
 'course_prereqs': [],
 'course_offered_in': [],
 'course_code': 'AAI 551',
 'course_level': '500',
 'course_rating': None,
 'course_professors': [],
 'course_syllabus': '',
 'currently_enrolled': [],
 'previous_enrolled': [],
 'stevens_course_link': 'https://stevens.smartcatalogiq.com/en/2023-2024/Academic-Catalog/Courses/AAI-Appl

In [10]:
unique_mapper = {}
unique_courses = []
for c in courses:
  split = c['course_code'].split(' ')[0]
  if split is not None and split not in unique_mapper:
    unique_courses.append({'course_code': split, 'department': " ".join(c['stevens_course_link'].split('/')[-3].split('-')[1:])})
    unique_mapper[split] = " ".join(c['stevens_course_link'].split('/')[-3].split('-')[1:])

In [12]:
unique_courses

[{'course_code': 'AAI', 'department': 'Applied Artificial Intelligence'},
 {'course_code': 'ACC', 'department': 'Accounting'},
 {'course_code': 'BIA', 'department': 'Business Intelligence and Analytics'},
 {'course_code': 'BIO', 'department': 'Biology'},
 {'course_code': 'BIOE', 'department': 'Bioengineering'},
 {'course_code': 'BME', 'department': 'Biomedical Engineering'},
 {'course_code': 'BT', 'department': 'Business and Technology'},
 {'course_code': 'CAL', 'department': 'College of Arts Letters'},
 {'course_code': 'CE', 'department': 'Civil Engineering'},
 {'course_code': 'CH', 'department': 'Chemistry'},
 {'course_code': 'CHE', 'department': 'Chemical Engineering'},
 {'course_code': 'CLK', 'department': 'Clark Scholars'},
 {'course_code': 'CM', 'department': 'Construction Management'},
 {'course_code': 'COMM', 'department': 'Professional Communications'},
 {'course_code': 'CPE', 'department': 'Computer Engineering'},
 {'course_code': 'CS', 'department': 'Computer Science'},
 {'c

In [9]:
len(unique_mapper)

70

In [21]:
from uuid import uuid4

new_courses = []

for c in courses:
  c['_id'] = str(uuid4())
  new_courses.append(c)

json.dump(new_courses, open("new_course_data_extracted_1.json", "w"))

False