In [1]:
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as uReq

In [2]:
url = "https://www.mcgill.ca/study/2020-2021/faculties/engineering/undergraduate/programs/bachelor-engineering-beng-honours-mechanical-engineering"

In [3]:
uClient = uReq(url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")

# 1. Obtain course titles

In [4]:
course_blocks = page_soup.findAll("ul", {"class": "program-set"})
len(course_blocks)

8

In [5]:
course_blocks_eng = course_blocks[:-2]
len(course_blocks_eng)

6

In [6]:
containers = [container for course_block in course_blocks_eng for container in course_block.findAll("li", {"class": "program-course"})]
len(containers)

72

In [7]:
import re
course_titles = [re.split(" \([0-9]+ credit[s]*\)", container.a.text.strip())[0] for container in containers]
len(course_titles)

72

In [8]:
course_titles

['CHEM 110 General Chemistry 1',
 'CHEM 120 General Chemistry 2',
 'MATH 133 Linear Algebra and Geometry',
 'MATH 140 Calculus 1',
 'MATH 141 Calculus 2',
 'PHYS 131 Mechanics and Waves',
 'PHYS 142 Electromagnetism and Optics',
 'CCOM 206 Communication in Engineering',
 'CIVE 207 Solid Mechanics',
 'COMP 208 Computer Programming for Physical Sciences and\r Engineering\r',
 'FACC 100 Introduction to the Engineering Profession',
 'FACC 250 Responsibilities of the Professional Engineer',
 'FACC 300 Engineering Economy',
 'FACC 400 Engineering Professional Practice',
 'MATH 262 Intermediate Calculus',
 'MATH 263 Ordinary Differential Equations for Engineers',
 'MATH 264 Advanced Calculus for Engineers',
 'MATH 271 Linear Algebra and Partial Differential Equations',
 'MECH 201 Introduction to Mechanical Engineering',
 'MECH 210 Mechanics 1',
 'MECH 220 Mechanics 2',
 'MECH 240 Thermodynamics 1',
 'MECH 262 Statistics and Measurement Laboratory',
 'MECH 290 Design Graphics for Mechanical En

# 2. Obtain course codes and names from course titles

In [9]:
course_codes = [" ".join(title.split()[:2]) for title in course_titles]
course_codes

['CHEM 110',
 'CHEM 120',
 'MATH 133',
 'MATH 140',
 'MATH 141',
 'PHYS 131',
 'PHYS 142',
 'CCOM 206',
 'CIVE 207',
 'COMP 208',
 'FACC 100',
 'FACC 250',
 'FACC 300',
 'FACC 400',
 'MATH 262',
 'MATH 263',
 'MATH 264',
 'MATH 271',
 'MECH 201',
 'MECH 210',
 'MECH 220',
 'MECH 240',
 'MECH 262',
 'MECH 290',
 'MECH 292',
 'MECH 309',
 'MECH 321',
 'MECH 331',
 'MECH 341',
 'MECH 346',
 'MECH 360',
 'MECH 362',
 'MECH 383',
 'MECH 403D1',
 'MECH 403D2',
 'MECH 404',
 'MECH 419',
 'MECH 430',
 'MECH 494',
 'MATH 323',
 'MATH 326',
 'MATH 327',
 'MATH 381',
 'MATH 407',
 'MATH 417',
 'MATH 478',
 'MECH 513',
 'MECH 546',
 'MECH 562',
 'MECH 577',
 'MECH 578',
 'MECH 579',
 'CHEE 563',
 'MECH 497',
 'MECH 498',
 'MECH 499',
 'MECH 513',
 'MECH 529',
 'MECH 530',
 'MECH 532',
 'MECH 535',
 'MECH 536',
 'MECH 541',
 'MECH 543',
 'MECH 544',
 'MECH 553',
 'MECH 557',
 'MECH 559',
 'MECH 563',
 'MECH 565',
 'MECH 573',
 'MECH 577']

In [10]:
course_names = [" ".join(title.split()[2:]) for title in course_titles]
course_names

['General Chemistry 1',
 'General Chemistry 2',
 'Linear Algebra and Geometry',
 'Calculus 1',
 'Calculus 2',
 'Mechanics and Waves',
 'Electromagnetism and Optics',
 'Communication in Engineering',
 'Solid Mechanics',
 'Computer Programming for Physical Sciences and Engineering',
 'Introduction to the Engineering Profession',
 'Responsibilities of the Professional Engineer',
 'Engineering Economy',
 'Engineering Professional Practice',
 'Intermediate Calculus',
 'Ordinary Differential Equations for Engineers',
 'Advanced Calculus for Engineers',
 'Linear Algebra and Partial Differential Equations',
 'Introduction to Mechanical Engineering',
 'Mechanics 1',
 'Mechanics 2',
 'Thermodynamics 1',
 'Statistics and Measurement Laboratory',
 'Design Graphics for Mechanical Engineering',
 'Design 1: Conceptual Design',
 'Numerical Methods in Mechanical Engineering',
 'Mechanics of Deformable Solids',
 'Fluid Mechanics 1',
 'Thermodynamics 2',
 'Heat Transfer',
 'Principles of Manufacturing',


# 3. Obtain course descriptions

In [11]:
course_descs = [container.find("div", {"class": "content"}).p.text.strip() for container in containers]
len(course_descs)

72

In [12]:
course_descs

['Chemistry : A study of the fundamental principles of atomic structure, radiation and nuclear chemistry, valence theory, coordination chemistry, and the periodic table.',
 'Chemistry : A study of the fundamental principles of physical chemistry.',
 'Mathematics & Statistics (Sci) : Systems of linear equations, matrices, inverses, determinants; geometric vectors in three dimensions, dot product, cross product, lines and planes; introduction to vector spaces, linear dependence and independence, bases. Linear transformations. Eigenvalues and diagonalization.',
 'Mathematics & Statistics (Sci) : Review of functions and graphs. Limits, continuity, derivative. Differentiation of elementary functions. Antidifferentiation. Applications.',
 'Mathematics & Statistics (Sci) : The definite integral. Techniques of integration. Applications. Introduction to sequences and series.',
 'Physics : The basic laws and principles of Newtonian mechanics; oscillations, waves, and wave optics.',
 'Physics : T

# 4. Clean and write to CSV

In [13]:
course_descs = [desc.replace("\n", " ") for desc in course_descs]

In [14]:
import pandas as pd

df = pd.DataFrame({
    
    "Course Number": course_codes,
    "Course Name": course_names,
    "Course Description": course_descs    
    
})

df

Unnamed: 0,Course Number,Course Name,Course Description
0,CHEM 110,General Chemistry 1,Chemistry : A study of the fundamental princip...
1,CHEM 120,General Chemistry 2,Chemistry : A study of the fundamental princip...
2,MATH 133,Linear Algebra and Geometry,Mathematics & Statistics (Sci) : Systems of li...
3,MATH 140,Calculus 1,Mathematics & Statistics (Sci) : Review of fun...
4,MATH 141,Calculus 2,Mathematics & Statistics (Sci) : The definite ...
...,...,...,...
67,MECH 559,Engineering Systems Optimization,Mechanical Engineering : Introduction to syste...
68,MECH 563,Biofluids and Cardiovascular Mechanics,Mechanical Engineering : Basic principles of c...
69,MECH 565,Fluid Flow and Heat Transfer Equipment,Mechanical Engineering : Pipes and piping syst...
70,MECH 573,Mechanics of Robotic Systems,Mechanical Engineering : Manipulator performan...


In [15]:
df.to_csv('McGill_Honours_MechEng_Core_and_Electives_Courses.csv', index = False)