In [66]:
import requests
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm
import time
from datetime import datetime

In [2]:
TERM = "w_22_2370"

In [3]:
UG_SUBJ = "https://www.lsa.umich.edu/cg/cg_subjectlist.aspx?termArray={}&cgtype=ug&allsections=true".format(TERM)

In [4]:
r = requests.get(UG_SUBJ)

In [5]:
soup = BeautifulSoup(r.text)

In [6]:
departments = [cell.text.strip() for cell in soup.select_one(".table.table-striped.table-condensed").select("tr > td:nth-child(1)")]

In [7]:
%%time
course_sections = []
url = "https://www.lsa.umich.edu/cg/cg_results.aspx?termArray={}&cgtype={}&department={}&allsections=true&show=40"
for GRAD in ("ug", "gr"):
    for dep in departments:
        print(GRAD, dep)
        # Get each course listing from the department
        while True:
            try:
                r = requests.get(url.format(TERM, GRAD, dep), timeout=5)
            except requests.exceptions.Timeout:
                print("Retrying")
                continue
            break
        soup = BeautifulSoup(r.text)
        results = (soup.select(".row.result") + soup.select(".row.resultalt"))
        while soup.select_one("#contentMain_hlnkNextBtm") is not None:
            print("getting next page")
            while True:
                try:
                    r = requests.get("https://www.lsa.umich.edu/cg/" + soup.select_one("#contentMain_hlnkNextBtm").get("href"), timeout=5)
                    soup = BeautifulSoup(r.text)
                    results += (soup.select(".row.result") + soup.select(".row.resultalt"))
                except requests.exceptions.Timeout:
                    print("Retrying")
                    continue
                break
        for result in results:
            # For each course listing, save the lecture course
            parts = [a.strip() for a in result.select_one("font").text.strip().split("\r\n")]
            dept = parts[0]
            number = parts[1]
            name = " ".join(parts[3:])
            section, term, credits, mode, instructor, _ = [a.text.strip() for a in result.select_one(".bottompadding_main").find_all("div", recursive=False)]
            section = " ".join(section.split())
            term = " ".join(term.split()[1:])
            credits = credits.split()[-1]
            mode = mode.split()[-1]
            course_page = result.select_one("a").get("href")
            instructor = " ".join(instructor.split()[1:])
            course_sections.append({
                "dept": dept,
                "number": number,
                "name": name,
                "section": section,
                "term": term,
                "credits": credits,
                "mode": mode,
                "instructor": instructor,
                "url": course_page,
            })

ug AAS
getting next page
ug AERO
ug AEROSP
ug AES
ug ALA
getting next page
ug AMAS
ug AMCULT
getting next page
ug ANATOMY
ug ANTHRARC
ug ANTHRBIO
ug ANTHRCUL
ug APPPHYS
ug ARABIC
ug ARCH
ug ARMENIAN
ug ARTDES
ug ARTSADMN
ug ASIAN
getting next page
ug ASIANLAN
getting next page
getting next page
ug ASIANPAM
ug ASTRO
ug AUTO
ug BA
ug BCS
ug BE
ug BIOINF
ug BIOLCHEM
ug BIOLOGY
getting next page
ug BIOMEDE
ug BIOPHYS
getting next page
ug BIOSTAT
ug CATALAN
ug CDB
ug CEE
ug CHE
ug CHEM
getting next page
ug CJS
ug CLARCH
ug CLCIV
ug CLIMATE
ug CMPLXSYS
ug COGSCI
ug COMM
ug COMP
ug COMPLIT
ug CSP
ug CZECH
ug DANCE
ug DATASCI
ug DIGITAL
ug DUTCH
ug EARTH
getting next page
getting next page
ug EAS
ug ECON
getting next page
ug EDCURINS
ug EDUC
ug EEB
ug EECS
getting next page
getting next page
getting next page
ug EHS
ug ELI
ug ENGLISH
getting next page
getting next page
getting next page
getting next page
getting next page
getting next page
ug ENGR
ug ENS
ug ENSCEN
ug ENVIRON
getting next page


In [40]:
len(course_sections)

6461

In [45]:
filtered = dict()
for c in course_sections:
    filtered[c["dept"] + " " + c["number"]] = c["url"]

In [67]:
datetime.now()

datetime.datetime(2021, 11, 14, 23, 45, 9, 523735)

In [60]:
# Get ALL of the sections
all_sections = []

for name, url in tqdm(filtered.items()):
    while True:
        try:
            r = requests.get("https://www.lsa.umich.edu/cg/" + url, timeout=5)
        except requests.exceptions.Timeout:
            print("Retrying")
            continue
        break
    soup = BeautifulSoup(r.text)
    for row in soup.select(".row.clsschedulerow"):
        row = row.select_one(".row")
        parts = [" ".join(a.text.strip().split()) for a in row.select(".col-md-1")]
        # turn into object
        obj = {"course": name, "time": datetime.now()}
        for part in parts:
            pieces = part.strip().split(":")
            key = pieces[0].strip()
            val = " ".join([p.strip() for p in pieces[1:]])
            obj[key] = val
        all_sections.append(obj)

  0%|          | 0/3388 [00:00<?, ?it/s]

Retrying
Retrying
Retrying
Retrying
Retrying
Retrying
Retrying
Retrying
Retrying
Retrying
Retrying
Retrying


In [61]:
all_sections

[{'Section': '004 (SEM)',
  'Instruction Mode': 'In Person',
  'Class No': '34814',
  'Enroll Stat': 'Open',
  'Open Seats': '18',
  'Wait List': '-'},
 {'Section': '003 (SEM)',
  'Instruction Mode': 'Hybrid',
  'Class No': '26479',
  'Enroll Stat': 'Open',
  'Open Seats': '18',
  'Wait List': '-'},
 {'Section': '004 (SEM)',
  'Instruction Mode': 'In Person',
  'Class No': '36990',
  'Enroll Stat': 'Open',
  'Open Seats': '18',
  'Wait List': '-'},
 {'Section': '005 (SEM)',
  'Instruction Mode': 'In Person',
  'Class No': '37588',
  'Enroll Stat': 'Open',
  'Open Seats': '9',
  'Wait List': '-'},
 {'Section': '002 (REC)',
  'Instruction Mode': 'Hybrid',
  'Class No': '19498',
  'Enroll Stat': 'Open',
  'Open Seats': '18',
  'Wait List': '-'},
 {'Section': '003 (REC)',
  'Instruction Mode': 'Hybrid',
  'Class No': '24616',
  'Enroll Stat': 'Open',
  'Open Seats': '16',
  'Wait List': '-'},
 {'Section': '001 (REC)',
  'Instruction Mode': 'Hybrid',
  'Class No': '30434',
  'Enroll Stat': 

In [63]:
import pandas as pd

In [64]:
df = pd.DataFrame(all_sections)

In [65]:
df.head()

Unnamed: 0,Section,Instruction Mode,Class No,Enroll Stat,Open Seats,Wait List
0,004 (SEM),In Person,34814,Open,18,-
1,003 (SEM),Hybrid,26479,Open,18,-
2,004 (SEM),In Person,36990,Open,18,-
3,005 (SEM),In Person,37588,Open,9,-
4,002 (REC),Hybrid,19498,Open,18,-


In [153]:
df["mode"].value_counts() / df["mode"].count()

Online    0.681597
Person    0.181280
Hybrid    0.137122
Name: mode, dtype: float64

In [154]:
(df.groupby("dept")["mode"].value_counts() / df.groupby("dept")["mode"].count()).loc["ENGLISH"]

mode
Online    0.737762
Hybrid    0.150350
Person    0.111888
Name: mode, dtype: float64

In [155]:
df.groupby("mode").get_group("Person").dept.value_counts()[:10]

THTREMUS    136
STDABRD      86
PSYCH        48
ENGLISH      32
ARTDES       25
GERMAN       24
BIOLOGY      19
ENVIRON      14
AAS          14
CHEM         11
Name: dept, dtype: int64

In [156]:
df.groupby("mode").get_group("Hybrid").dept.value_counts()[:10]

ENGLISH     43
MATH        28
HISTORY     25
ASIANLAN    20
FTVM        20
AAS         19
CEE         17
MECHENG     16
ENVIRON     16
ASIAN       16
Name: dept, dtype: int64

In [157]:
df.to_csv("./undergrad_sections.csv", index=None)

In [142]:
(df.dept.unique())

array(['AAS', 'AERO', 'AEROSP', 'AES', 'ALA', 'AMCULT', 'ANATOMY',
       'ANTHRARC', 'ANTHRBIO', 'ANTHRCUL', 'APPPHYS', 'ARABAM', 'ARABIC',
       'ARCH', 'ARMENIAN', 'ARTDES', 'ARTSADMN', 'ASIAN', 'ASIANLAN',
       'ASIANPAM', 'ASTRO', 'AUTO', 'BA', 'BCS', 'BIOINF', 'BIOLCHEM',
       'BIOLOGY', 'BIOMEDE', 'BIOPHYS', 'BIOSTAT', 'CATALAN', 'CEE',
       'CHE', 'CHEM', 'CJS', 'CLARCH', 'CLCIV', 'CLIMATE', 'CMPLXSYS',
       'COGSCI', 'COMM', 'COMP', 'COMPLIT', 'CSP', 'CZECH', 'DANCE',
       'DATASCI', 'DIGITAL', 'DUTCH', 'EARTH', 'EAS', 'ECON', 'EDCURINS',
       'EDUC', 'EEB', 'EECS', 'ELI', 'ENGLISH', 'ENGR', 'ENS', 'ENSCEN',
       'ENVIRON', 'ES', 'ESENG', 'FRENCH', 'FTVM', 'GEOG', 'GERMAN',
       'GREEK', 'GREEKMOD', 'GTBOOKS', 'HEBREW', 'HISTART', 'HISTORY',
       'HONORS', 'HS', 'HUMGEN', 'INSTHUM', 'INTLSTD', 'INTMED', 'IOE',
       'ISLAM', 'ITALIAN', 'JAZZ', 'JUDAIC', 'KINESLGY', 'KRSTD', 'LACS',
       'LATIN', 'LATINOAM', 'LING', 'LSWA', 'MACROMOL', 'MATH', 'MATSCIE',
 

In [137]:
df.head()

Unnamed: 0,dept,number,name,section,term,credits,mode,instructor
0,AAS,103,First Year Social Science Seminar,Section 001 (SEM) Reading Africa: Critical Per...,FA 2020,3,Person,"Stein,Howard"
1,AAS,103,First Year Social Science Seminar,Section 003 (SEM) The City Life in Urban Africa,FA 2020,3,Online,"Murray,Martin J"
2,AAS,103,First Year Social Science Seminar,Section 005 (SEM) Race and Democracy,FA 2020,3,Hybrid,"Dillard,Angela Denise"
3,AAS,104,First Year Humanities Seminar,Section 004 (SEM) Nonviolence: From Montgomery...,FA 2020,3,Person,"Ellsworth,Scott A; homepage"
4,AAS,104,First Year Humanities Seminar,Section 006 (SEM) Say It Loud: Black Culture i...,FA 2020,3,Online,"Berrey,Stephen"


In [149]:
df.dept.value_counts().sort_values(ascending=False)

MATH       323
ENGLISH    286
SPANISH    188
EECS       170
PHYSICS    154
          ... 
GTBOOKS      1
BIOSTAT      1
MUSMETH      1
UARTS        1
MKT          1
Name: dept, Length: 171, dtype: int64

# Chart types

- force thingy chart with circles??? maybe size by # of sections
- something simpler would be a proportional bars
- searchable table

# Questions

- Is my class online?
- Which departments have the most/least online classes?
- Which subjects have the most/least online classes?
- Which professors are teaching most/least online?


How are professors preparing to teach different types of classes?