In [1]:
import pandas as pd

aoc_courses_df = pd.read_csv("aoc_courses.csv")
aoc_courses = aoc_courses_df['course_code'].tolist()
aoc_courses[:5]

['apbiv-200', 'apbiv-342', 'apbiv-401', 'apbiv-423', 'apscv-366']

In [2]:
import requests

base_url = "https://vancouver.calendar.ubc.ca/course-descriptions/courses/"
example_course = aoc_courses[10]

example_url = base_url+example_course
example_response = requests.get(example_url)
example_response.status_code

200

In [3]:
from bs4 import BeautifulSoup

example_content = example_response.content
example_soup = BeautifulSoup(example_content, 'html.parser')
example_course_content = example_soup.find(class_='node node--type-course node--promoted node--view-mode-full')
print(example_course_content)

<article class="node node--type-course node--promoted node--view-mode-full">
<div class="flow-root text-formatted node__content relative">
<h3 class="text-lg">BEST_V 202 (3)  <strong>Alternative Energy Systems</strong></h3>
<p class="mt-0">Introduction to safe, clean, and sustainable supplies of energy. Economic, social, environmental, and policy issues raised by current systems of energy use and production are examined. [3-0-0] <em></em> </p>
</div>
</article>


In [4]:
import re

test = {}
test['course_code'] = re.findall(r'\b[A-Z]{3,}_V \d{3}\b', example_course_content.find('h3').text)[0]
test['course_title'] = example_course_content.find('strong').text
test['course_desc'] = example_course_content.find(class_='mt-0').text.strip()
print(test)

{'course_code': 'BEST_V 202', 'course_title': 'Alternative Energy Systems', 'course_desc': 'Introduction to safe, clean, and sustainable supplies of energy. Economic, social, environmental, and policy issues raised by current systems of energy use and production are examined. [3-0-0]'}


In [6]:
import time

aoc_courses_list = []

for course in aoc_courses:
    try:
        url = base_url + course
        response = requests.get(url)
        content = response.content

        soup = BeautifulSoup(content, 'html.parser')
        course_content = soup.find(class_='node node--type-course node--promoted node--view-mode-full')

        # if not course_content:
        #     continue

        code_match = re.findall(r'\b[A-Z]{3,}_V \d{3}\b', course_content.find('h3').text)
        if code_match:
            course_data = {
                'course_code': code_match[0],
                'course_title': course_content.find('strong').text,
                'course_desc': course_content.find(class_='mt-0').text.strip()
            }
            aoc_courses_list.append(course_data)
            
    except Exception as e:
        print(f"Skipping course {course} due to error: {e}")
        continue

    time.sleep(0.5)

Skipping course lfsv-101 due to error: 'NoneType' object has no attribute 'find'
Skipping course mathv-200 due to error: 'NoneType' object has no attribute 'find'
Skipping course mathv-215 due to error: 'NoneType' object has no attribute 'find'
Skipping course mathv-221 due to error: 'NoneType' object has no attribute 'find'
Skipping course mathv-255 due to error: 'NoneType' object has no attribute 'find'
Skipping course mechv-411 due to error: 'NoneType' object has no attribute 'find'
Skipping course micbv-211 due to error: 'NoneType' object has no attribute 'find'
Skipping course micbv-301 due to error: 'NoneType' object has no attribute 'find'
Skipping course poliv-375a due to error: 'NoneType' object has no attribute 'find'


In [8]:
print(len(aoc_courses_list))
print(len(aoc_courses))

130
139


In [9]:
aoc_courses_list[:3]

[{'course_code': 'APBI_V 200',
  'course_title': 'Introduction to Soil Science',
  'course_desc': 'Physical, chemical and biological properties of soils; soil formation, classification, use and conservation. There are no prerequisites for this course, but background in Biology 12, Chemistry 12, and Physics 12 (or first-year university-level) is strongly advised. [3-2]'},
 {'course_code': 'APBI_V 342',
  'course_title': 'Soil Biology',
  'course_desc': 'The diversity of soil organisms (bacteria, protozoa, fungi, animals, plants) in natural and managed ecosystems; roles in primary production, nutrient cycling, decomposition and reclamation; interactions between soil organisms; responses to environmental change. [2-3-0] Prerequisite: [BIOL121] Equivalency: FRST310'},
 {'course_code': 'APBI_V 401',
  'course_title': 'Soil Processes',
  'course_desc': 'Understanding environmental factors influencing soil-forming processes and their ecological implications. Credit will be granted for only on

In [11]:
import json

with open("aoc_courses.json", "w") as outfile:
    json.dump(aoc_courses_list, outfile, indent=4)