In [1]:
from bs4 import BeautifulSoup as soup
from selenium import webdriver
import time

In [2]:
url = "https://catalog.umanitoba.ca/undergraduate-studies/engineering/mechanical-engineering/mechanical-engineering-bsc/index.html#degreerequirementstext"

In [3]:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument('--incognito')
chrome_options.add_argument('--headless')

driver = webdriver.Chrome("C:\\Users\\jerry\\Downloads\\chromedriver", options=chrome_options)

In [4]:
driver.get(url)

# 1. Collect course link texts for driver to click on

In [5]:
page_soup = soup(driver.page_source, 'lxml')

In [6]:
containers = page_soup.find("div", {"id": "degreerequirementstextcontainer"}).find("table", {"class": "sc_courselist"}).find("tbody").findAll("a")
len(containers)

28

In [7]:
link_texts = [container.text.replace("\xa0", " ") for container in containers]
link_texts

['ENG 3000',
 'ENG 3020',
 'ECE 3010',
 'ENG 2030',
 'ENG 2040',
 'MATH 2130',
 'MATH 2132',
 'MATH 3132',
 'MECH 2112',
 'MECH 2150',
 'MECH 2202',
 'MECH 2222',
 'MECH 2262',
 'MECH 2272',
 'MECH 3170',
 'MECH 3420',
 'MECH 3430',
 'MECH 3460',
 'MECH 3482',
 'MECH 3492',
 'MECH 3502',
 'MECH 3542',
 'MECH 3652',
 'MECH 3982',
 'MECH 3992',
 'MECH 4860',
 'PHYS 1070',
 'STAT 2220']

# 2. Automation script to scrape all courses

In [10]:
from selenium.webdriver.common.keys import Keys

counter = 0
course_names = []
course_codes = []
course_descs = []

driver.get(url)
time.sleep(5)

for link_text in link_texts:
    
    link = driver.find_element_by_link_text(link_text)
    time.sleep(2)
    link.click()
    time.sleep(3)
    
    page_soup = soup(driver.page_source, 'lxml')
    course_block = page_soup.find("div", {"class": "lfjsbubble"}).find("div", {"class": "courseblock"})
    
    course_codes.append(course_block.find("div", {"class": "cols noindent"}).findAll("span")[0].text.strip())
    course_names.append(course_block.find("div", {"class": "cols noindent"}).findAll("span")[1].text.strip())
    course_descs.append(course_block.find("p").text.strip().replace("\xa0", " "))
    
    print("Scraped ", course_codes[-1])
    counter += 1
    
    link.send_keys(Keys.ESCAPE)
    time.sleep(2)
    
    
print("Successfully scraped {} courses".format(counter))

Scraped  ENG 3000
Scraped  ENG 3020
Scraped  ECE 3010
Scraped  ENG 2030
Scraped  ENG 2040
Scraped  MATH 2130
Scraped  MATH 2132
Scraped  MATH 3132
Scraped  MECH 2112
Scraped  MECH 2150
Scraped  MECH 2202
Scraped  MECH 2222
Scraped  MECH 2262
Scraped  MECH 2272
Scraped  MECH 3170
Scraped  MECH 3420
Scraped  MECH 3430
Scraped  MECH 3460
Scraped  MECH 3482
Scraped  MECH 3492
Scraped  MECH 3502
Scraped  MECH 3542
Scraped  MECH 3652
Scraped  MECH 3982
Scraped  MECH 3992
Scraped  MECH 4860
Scraped  PHYS 1070
Scraped  STAT 2220
Successfully scraped 28 courses


# 3. Inspect and write to CSV

In [11]:
course_names

['Engineering Economics',
 'Technology, Society and the Future',
 'Elements of Electric Machines and Digital Systems',
 'Engineering Communication: Strategies for the Profession',
 'Engineering Communication: Strategies, Practice and Design',
 'Engineering Mathematical Analysis 1',
 'Engineering Mathematical Analysis 2',
 'Engineering Mathematical Analysis 3',
 'Fundamentals of Mechanical and Computer Aided Design',
 'Mechanical Engineering Modelling and Numerical Methods',
 'Thermodynamics',
 'Mechanics of Materials',
 'Fundamentals of Fluid Mechanics',
 'Engineering Materials 1',
 'Project Management',
 'Vibrations and Acoustics',
 'Measurements and Control',
 'Heat Transfer',
 'Kinematics and Dynamics',
 'Fluid Mechanics and Applications',
 'Stress Analysis and Design',
 'Engineering Materials 2',
 'Machine Design',
 'Mechanical Laboratories in Solid Mechanics',
 'Mechanical Laboratories in Thermofluids',
 'Engineering Design',
 'Physics 2: Waves and Modern Physics',
 'Contemporary 

In [12]:
course_codes

['ENG 3000',
 'ENG 3020',
 'ECE 3010',
 'ENG 2030',
 'ENG 2040',
 'MATH 2130',
 'MATH 2132',
 'MATH 3132',
 'MECH 2112',
 'MECH 2150',
 'MECH 2202',
 'MECH 2222',
 'MECH 2262',
 'MECH 2272',
 'MECH 3170',
 'MECH 3420',
 'MECH 3430',
 'MECH 3460',
 'MECH 3482',
 'MECH 3492',
 'MECH 3502',
 'MECH 3542',
 'MECH 3652',
 'MECH 3982',
 'MECH 3992',
 'MECH 4860',
 'PHYS 1070',
 'STAT 2220']

In [13]:
course_descs

['This course offers an introduction to the economic aspects of the engineering discipline. It covers applied economic concepts such as: time value of money, taxation in cash flows, breakeven points, inflation of goods, cost/benefit ratios, income and depreciation, and general microeconomic concepts. The focus includes analysis techniques such as: cash flow analysis, cost-based analysis, rate of return analysis, sensitivity analysis, replacement analysis, and risk mitigation. Concepts are introduced in the context of sustainability and project management fundamentals in a professional practice setting. May not be held with CIVL 4050.',
 'Impact of technology and technological change on society-past, present, future; specific technologies, e.g. construction. machine power, computers, communications, medical, military: the process of technological change; invisible effects of technology; technology and resource use; sustainable development, limits to growth and the role of technology. Ma

In [14]:
import pandas as pd

df = pd.DataFrame({
    "Course Number": course_codes,
    "Course Name": course_names,
    "Course Description": course_descs    
})

df

Unnamed: 0,Course Number,Course Name,Course Description
0,ENG 3000,Engineering Economics,This course offers an introduction to the econ...
1,ENG 3020,"Technology, Society and the Future",Impact of technology and technological change ...
2,ECE 3010,Elements of Electric Machines and Digital Systems,(Lab required) Introduction to elementary conc...
3,ENG 2030,Engineering Communication: Strategies for the ...,Students work in a team-based environment to p...
4,ENG 2040,"Engineering Communication: Strategies, Practic...",This team-based course focuses on a rhetorical...
5,MATH 2130,Engineering Mathematical Analysis 1,(Lab required) Multivariable differential and ...
6,MATH 2132,Engineering Mathematical Analysis 2,"(Lab required) Infinite series, Taylor and Mac..."
7,MATH 3132,Engineering Mathematical Analysis 3,(Lab required) Vector integral calculus; serie...
8,MECH 2112,Fundamentals of Mechanical and Computer Aided ...,(Lab required) Provide instruction on the appl...
9,MECH 2150,Mechanical Engineering Modelling and Numerical...,(Lab required) A case-study-based introduction...


In [15]:
df.to_csv('UManitoba_MechEng_Core_Courses.csv', index = False)

In [17]:
driver.quit()