In [1]:
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd

In [6]:
math = pd.DataFrame()

curr_dept = "MATH"

url = f"https://www.registrar.ucla.edu/Academics/Course-Descriptions/Course-Details?SA={curr_dept}&funsel=3"

req = requests.get(url)
soup = BeautifulSoup(req.text, "lxml")

In [6]:
def get_class_name(src):
    temp = src.split(".")
    return temp[0].strip(), temp[1].strip()

In [7]:
num_dict = { 
    'one':   1, 
    'two':   2, 
    'three': 3, 
    'four':  4, 
    'five':  5, 
    'six':   6, 
    'seven': 7, 
    'eight': 8, 
    'nine':  9, 
    'zero' : 0
}

def get_hours(src):
    # first line has the hours information
    line = src.split(".")[0].lower().strip()
    
    hours = re.findall(r'\w+(?= hour)', line)
    
    total_hours = 0
    for num in hours:
        total_hours += num_dict[num]
        
    return total_hours
    

In [8]:
class_dict = {
    "course": curr_dept,
    "courses": curr_dept,
    "mathematics": "MATH",
    "statistics": "STATS",
    "philosophy": "PHILOS",
    "mechanical and aerospace engineering": "MECH&AE",
    "physics": "PHYSICS",
    "computer science": "COM+SCI",
    "materials science": "MAT+SCI",
    "program in computing": "PIC",
    "chemistry": "CHEM",
    "economics": "ECON",
    "life science": "LS"
}

def get_requisites(src, curr_dept):
    class_dict["course"] = curr_dept
    class_dict["courses"] = curr_dept
    
    line = src.split(".")[0].lower().strip()
    reqs = []
    req_dept = class_dict["course"]
    
    for i in class_dict.keys():
        line = line.replace(i, class_dict[i])
#     print(line)
    optional = False # boolean check if class is optional
    for word in line.split(" "):
        if word in class_dict.values():
            req_dept = word
        
        if "or " in word + " ":
            optional = True
        
        if any(i.isdigit() for i in word):
            class_code = word.upper().replace(',', "").replace(r")", "")
            class_name = f"{req_dept} {class_code}"
            
            if optional: # optional classes you can take instead
                try: # incase there are no requisites
                    reqs[-1] = f"{reqs[-1]}/{class_name}"
                except:
                    pass
                optional = False
            else:
                reqs.append(class_name)
                
    return reqs
            

In [9]:
def get_grade_type(src):
    line = src.rsplit(".", 2)[1].lower().strip() # gets last sentence
    if "or" in line:
        return "both"
    elif r"p/np" in line:
        return "pnp"
    else:
        return "letter"

In [10]:
for low_up_div in ("lower", "upper"):
    div_classes = soup.find("div",{"id": low_up_div})
    for ucla_class in div_classes.select(".media-body"):
        current_class = {"dept": curr_dept}

        current_class["class_id"], current_class["class_name"] = get_class_name(ucla_class.find("h3").text.strip())
        
        if(r"(honors)" in current_class["class_name"].lower()):
            continue
        
        units_desc = ucla_class.select("p")
        current_class["units"] = float(units_desc[0].text.split(":")[1].split(" ")[-1].strip())

        desc = units_desc[1].text.strip()

        while (r"(" in desc.split(".")[0].lower()) | ("preparation" in desc.split(".")[0].lower()):
            desc = desc.split(".", 1)[1]

        current_class["hours"] = get_hours(desc)
        desc = desc.split(".", 1)[1] # gets rid of first sentence for course hours

        while ("preparation" in desc.split(".")[0].lower()):
            desc = desc.split(".", 1)[1]


        if ("requisite" in desc.split(".")[0].lower()):
            current_class["requisites"] = get_requisites(desc, curr_dept)
            desc = desc.split(".", 1)[1] # gets rid of second sentence for requisites
        else:
            current_class["requisites"] = []

        if "corequisite" in desc.split(".")[0].lower():
            current_class["corequisites"] = get_requisites(desc, curr_dept)
            desc = desc.split(".", 1)[1] # gets rid of second sentence for requisites
        else:
            current_class["corequisites"] = []

        current_class["grade_type"] = get_grade_type(desc)
        desc = desc.rsplit(".", 2)[0] # removes last sentence

        current_class["desc"] = desc
        math = math.append(current_class, ignore_index=True)

In [11]:
math

Unnamed: 0,class_id,class_name,corequisites,dept,desc,grade_type,hours,requisites,units
0,1,Precalculus,[],MATH,Function concept. Linear and polynomial funct...,both,4.0,[],4.0
1,3A,Calculus for Life Sciences Students,[],MATH,Not open for credit to students with credit i...,both,4.0,[MATH 48/MATH 1],4.0
2,3B,Calculus for Life Sciences Students,[],MATH,Not open for credit to students with credit f...,both,4.0,[MATH 3A],4.0
3,3C,Ordinary Differential Equations with Linear Al...,[],MATH,"Multivariable modeling, matrices and vectors,...",both,4.0,[MATH 3B],4.0
4,11N,Gateway to Mathematics: Number Theory,[],MATH,Introductory number theory course for freshme...,both,4.0,"[MATH 31A, MATH 31B]",4.0
...,...,...,...,...,...,...,...,...,...
105,191H,Honors Research Seminars: Mathematics,[],MATH,Participating seminar on advanced topics in m...,both,3.0,[],4.0
106,M192A,Introduction to Collaborative Learning Theory ...,[],MATH,Training seminar for undergraduate students w...,letter,1.0,[],1.0
107,195,Community Internships in Mathematics Education,[],MATH,Limited to juniors/seniors. Internship to be ...,pnp,0.0,[],4.0
108,197,Individual Studies in Mathematics,[],MATH,Limited to juniors/seniors. At discretion of ...,both,3.0,[],4.0


In [13]:
math.to_csv("classes.csv")

In [14]:
math

Unnamed: 0,class_id,class_name,corequisites,dept,desc,grade_type,hours,requisites,units
0,1,Precalculus,[],MATH,Function concept. Linear and polynomial funct...,both,4.0,[],4.0
1,3A,Calculus for Life Sciences Students,[],MATH,Not open for credit to students with credit i...,both,4.0,[MATH 48/MATH 1],4.0
2,3B,Calculus for Life Sciences Students,[],MATH,Not open for credit to students with credit f...,both,4.0,[MATH 3A],4.0
3,3C,Ordinary Differential Equations with Linear Al...,[],MATH,"Multivariable modeling, matrices and vectors,...",both,4.0,[MATH 3B],4.0
4,11N,Gateway to Mathematics: Number Theory,[],MATH,Introductory number theory course for freshme...,both,4.0,"[MATH 31A, MATH 31B]",4.0
...,...,...,...,...,...,...,...,...,...
105,191H,Honors Research Seminars: Mathematics,[],MATH,Participating seminar on advanced topics in m...,both,3.0,[],4.0
106,M192A,Introduction to Collaborative Learning Theory ...,[],MATH,Training seminar for undergraduate students w...,letter,1.0,[],1.0
107,195,Community Internships in Mathematics Education,[],MATH,Limited to juniors/seniors. Internship to be ...,pnp,0.0,[],4.0
108,197,Individual Studies in Mathematics,[],MATH,Limited to juniors/seniors. At discretion of ...,both,3.0,[],4.0


In [23]:
# Weight of Required Class
def weight_r(isReq, are_Req):
    wR = 0
    if isReq and are_Req:
        wR = -100000000
    elif isReq and not are_Req: 
        wR = 50
    else:
        wR = 0
    return wR

In [24]:
# Weight of Units
def weight_u(unitsIn):
    wu = 0
    if num_classes == 3 and (total_units + unitsIn > 19 or total_units + unitsIn < 12):
        wu = -10000
    elif num_classes == 3:
        wu = 0
    elif (total_units + unitsIn < 19 and total_units + unitsIn > 12) and unitsIn == 4:
        wu = 25
    else:
        wu = 0
    return wu

In [25]:
# Weight of num of GE
def weight_ge(isGe):
    wge = 0
    if isGe and total_GE == 0:
        wge = 50
    elif isGe and total_GE == 1:
        wge = 10
    elif isGe and total_GE >= 2:
        wge = -30
    elif not isGe:
        wge = 0
    return wge

In [26]:
# Weight of class hours
def weight_h(hoursIn):
    hrw = 0
    if hoursIn == 0:
        hrw = 10
    elif hoursIn in (1, 2):
        hrw = 25
    elif hoursIn in (3, 4):
        hrw = 40
    elif hoursIn in (5, 6):
        hrw = 30
    elif hoursIn in range(7, 20):
        hrw = 10
    return hrw

In [27]:
def weight_p(ratingIn):
    wrate = 0
    ratingIn *= 10
    if ratingIn in range(0, 91):
        wrate = 1
    if ratingIn in range(91, 191):
        wrate = 2
    if ratingIn in range(191, 291):
        wrate = 3
    if ratingIn in range(291, 391):
        wrate = 4
    if ratingIn in range(391, 501):
        wrate = 5
    return wrate

In [22]:
def weight_pr(pre_reqIn):
    if pre_reqIn == 0:
        wpr = 0
    elif pre_reqIn in range(1, 4):
        wpr = 10
    elif pre_reqIn in range(4, 9):
        wpr = 20
    elif pre_reqIn in range(9, 16):
        wpr = 30
    elif pre_reqIn in range(16, 100):
        wpr = 50
    return wpr

In [None]:
for years in range(1, 5):
    for quarters in range(1, 4):
        total_units = 0
        num_classes = 0
        total_GE = 0
        total_hours = 0
        for classes in range(1, 5):
            # calc_weight for all classes available
            # sort classes by weight 
            # User chooses a class
            num_classes += 1
            # add to total_hours, total_GE, and total_Units based on the user's input


In [28]:
def calc_weight(class_id, class_name, dept, hours, num_pre_reqs, num_ge, num_units, req_class, prof_rating):
    
    return weight(df["hours"]) + weight_pr(df["prereq_scores"]) + weight_u(df["units"]) + weight_r()
    return weight_h(hours) + weight_pr(num_pre_reqs) + weight_ge(num_ge) + weight_u(num_units) + weight_r(
        is_req(class_id), check_req(req_class)) + weight_p(prof_rating)

In [86]:
def calc_num_prereqs(df):
    class_name = df["dept"] + " " + df["class_id"]
    total_reqs = 0
    
    
    total_reqs = 0
    for req in ("requisites", "corequisites"):
        for index, rows in math[req].iteritems():
            for curr_class in rows:
                if class_name in curr_class.split("/"):
                    total_reqs += 1
                
    return(total_reqs)

In [87]:
math["prereq_score"] = math.apply(calc_num_prereqs, axis=1)

In [88]:
math

Unnamed: 0,class_id,class_name,corequisites,dept,desc,grade_type,hours,requisites,units,weight,prereq_score
0,1,Precalculus,[],MATH,Function concept. Linear and polynomial funct...,both,4.0,[],4.0,0.0,1
1,3A,Calculus for Life Sciences Students,[],MATH,Not open for credit to students with credit i...,both,4.0,[MATH 48/MATH 1],4.0,0.0,1
2,3B,Calculus for Life Sciences Students,[],MATH,Not open for credit to students with credit f...,both,4.0,[MATH 3A],4.0,0.0,2
3,3C,Ordinary Differential Equations with Linear Al...,[],MATH,"Multivariable modeling, matrices and vectors,...",both,4.0,[MATH 3B],4.0,0.0,1
4,11N,Gateway to Mathematics: Number Theory,[],MATH,Introductory number theory course for freshme...,both,4.0,"[MATH 31A, MATH 31B]",4.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...
105,191H,Honors Research Seminars: Mathematics,[],MATH,Participating seminar on advanced topics in m...,both,3.0,[],4.0,1.0,0
106,M192A,Introduction to Collaborative Learning Theory ...,[],MATH,Training seminar for undergraduate students w...,letter,1.0,[],1.0,0.0,0
107,195,Community Internships in Mathematics Education,[],MATH,Limited to juniors/seniors. Internship to be ...,pnp,0.0,[],4.0,4.0,0
108,197,Individual Studies in Mathematics,[],MATH,Limited to juniors/seniors. At discretion of ...,both,3.0,[],4.0,1.0,0


In [106]:
math_reqs = pd.DataFrame(columns=["requisite_type", "num_required", "course"])
math_reqs.loc[-1] = ["MATH 31A or 31AL", 2, ["MATH 31A/MATH 31AL"]]
math_reqs.reset_index(drop=True)
math_reqs.loc[1] = ["MATH 32A", 1, ["MATH 32A"]]
math_reqs.loc[2] = ["MATH 32B", 1, ["MATH 32B"]]
math_reqs.loc[3] = ["MATH 33A", 1, ["MATH 33A"]]
math_reqs.loc[4] = ["MATH 33B", 1, ["MATH 33B"]]
math_reqs.loc[5] = ["PHYSICS 1A", 1, ["PHYSICS 1A"]]
math_reqs.loc[6] = ["PIC 10A", 1, ["PIC 10A"]]
math_reqs.loc[7] = ["Select of 2", 2, ["CHEM 20A/CHEM 20B/ECON 11/LS 7A/PHILOS 31/PHILOS 13/PHYSICS 1B/PHYSICS 1C/PHYSICS 5B/PHYSICS 5C"]]
# Required
math_reqs.loc[9] = ["MATH 110A", 1, ["MATH 110A"]]
math_reqs.loc[10] = ["MATH 110B", 1, ["MATH 110B"]]
math_reqs.loc[11] = ["MATH 115A", 1, ["MATH 115A"]]
math_reqs.loc[12] = ["MATH 120A", 1, ["MATH 120A"]]
math_reqs.loc[13] = ["MATH 131A", 1, ["MATH 131A"]]
math_reqs.loc[14] = ["MATH 131B", 1, ["MATH 131B"]]

# elective
math_reqs.loc[15] = ["Upper Div Electives", 5, ["MATH 106/MATH 111/MATH 114C/MATH M114S/MATH 115B/MATH 116/MATH 117/MATH 118/MATH 120B/MATH 121/MATH 123/MATH 131C/MATH 132/MATH 133/MATH 134/MATH 135/MATH 136/MATH 142/MATH 146/MATH M148/MATH 151A/MATH 151B/MATH 155/MATH 156/MATH 164/MATH 167/MATH 168/MATH 170A/MATH 170B/MATH 170E/MATH 170S/MATH 171/MATH 174E/MATH 177/MATH 178A/MATH 178B/MATH 178C/MATH 179/MATH 180/MATH 182E/MATH 184/MATH 188SA/MATH 188SB/MATH 188SC/MATH 189/MATH 189HC/MATH 190A/MATH 190B/MATH 190C/MATH 190D/MATH 190E/MATH 184/MATH 184/MATH 184/MATH 184/MATH 184/MATH 184/MATH 184/MATH 184/MATH 184/MATH 184/MATH 191/MATH 191H/MATH M192A/MATH 195/MATH 197/MATH 199"]]

In [108]:
math_reqs.to_csv("math_reqs.csv")