In [1]:
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as ureq
import time
import re

In [2]:
url = "http://www.calendar.ubc.ca/okanagan/index.cfm?tree=18,317,989,1382"

# 1. Collect course codes to be used to search for course names and descriptions

In [3]:
uClient = ureq(url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")

In [4]:
tables = page_soup.findAll("table")
len(tables)

2

In [5]:
containers = [container for table in tables for container in table.findAll("td")]
len(containers)

61

In [6]:
course_codes = [container.text.strip() for container in containers if bool(re.match("[A-Z]{4} [0-9]{3}", container.text.strip()))]
course_codes

['ENGR 303',
 'ENGR 305',
 'ENGR 310',
 'ENGR 315',
 'ENGR 320',
 'ENGR 375',
 'ENGR 376',
 'ENGR 377',
 'ENGR 380',
 'ENGR 381',
 'ENGR 385',
 'ENGR 387',
 'ENGR 413',
 'ENGR 476',
 'ENGR 499']

In [7]:
len(course_codes)

15

# 2. Use the course codes to find and scrape corresponding course names and descriptions

In [9]:
uClient = ureq("http://www.calendar.ubc.ca/okanagan/courses.cfm?go=code&institution=2&code=ENGR")
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")

In [10]:
course_names = []
course_descs = []
counter = 0

for course_code in course_codes:
    
    course_title_container = page_soup.find(text = re.compile("^" + course_code))
    if course_title_container is None:
        print("Cannot find ", course_code)
    else:
        course_names.append(course_title_container.findNext().text)
        course_descs.append(course_title_container.findNext().findNext().text.strip())
    
        counter += 1
        print("scraped ", course_code)
    
print(counter)

scraped  ENGR 303
scraped  ENGR 305
scraped  ENGR 310
scraped  ENGR 315
scraped  ENGR 320
scraped  ENGR 375
scraped  ENGR 376
scraped  ENGR 377
scraped  ENGR 380
scraped  ENGR 381
scraped  ENGR 385
scraped  ENGR 387
scraped  ENGR 413
scraped  ENGR 476
scraped  ENGR 499
15


# 3. Inspect collected data and write to CSV

In [11]:
course_names

['Engineering Project Management',
 'Engineering Economic Analysis',
 'Fluid Mechanics II',
 'Systems and Control',
 'Electromechanical Devices',
 'Energy System Design',
 'Materials Science II',
 'Manufacturing Processes I',
 'Design of Machine Elements',
 'Kinematics and Dynamics of Machinery',
 'Heat Transfer Applications',
 'Vibration of Mechanical Systems',
 'Law and Ethics for Engineers',
 'Mechanics of Materials II',
 'Engineering Capstone Design Project']

In [12]:
course_descs

['Project management including initiating, planning, executing, controlling, and closing engineering projects. Managing the scope, costs, schedule, risks, and human resources in engineering projects. External party engagement, including Indigenous communities. [3-0-0] Prerequisite: All of APSC 169, APSC 201.',
 'Cost concepts, accounting, time value of money; depreciation and taxes; public sector projects; economic evaluation techniques; handling uncertainty; sustainability in economic evaluation; societal context; infrastructure management needs; project impacts, mitigating risk. Case studies. [3-0-0] Prerequisite: Second-year standing in the B.A.Sc. program.',
 'Differential conservation, equations and solutions, boundary layers, compressible flows, and introduction to turbomachinery. [3-2*-1] Prerequisite: APSC 253.',
 'Dynamic systems, linear systems, control concepts, block diagrams, transient response, root locus, frequency response, Bode and Nyquist plots, and controller design.

In [13]:
import pandas as pd

df = pd.DataFrame({
    
    "Course Number": course_codes,
    "Course Name": course_names,
    "Course Description": course_descs    
    
})

df

Unnamed: 0,Course Number,Course Name,Course Description
0,ENGR 303,Engineering Project Management,"Project management including initiating, plann..."
1,ENGR 305,Engineering Economic Analysis,"Cost concepts, accounting, time value of money..."
2,ENGR 310,Fluid Mechanics II,"Differential conservation, equations and solut..."
3,ENGR 315,Systems and Control,"Dynamic systems, linear systems, control conce..."
4,ENGR 320,Electromechanical Devices,"DC and AC magnetic circuits, transformers, DC ..."
5,ENGR 375,Energy System Design,Primary energy sources and carriers. Energy co...
6,ENGR 376,Materials Science II,"Review comprehensive study of phase diagrams, ..."
7,ENGR 377,Manufacturing Processes I,"Metrology, metal forming processes, plastic de..."
8,ENGR 380,Design of Machine Elements,Product design methodology; static and fatigue...
9,ENGR 381,Kinematics and Dynamics of Machinery,"The design, analysis, and synthesis of mechani..."


In [14]:
df.to_csv('UBC_Okanagan_MechEng_Core_(Years3-4)_Courses.csv', index = False)