# Analyzing Computer Science versus Business Management Introductory Course Professors Reviews and Their Trends Over Time

William Ingold, Erik Kelemen, Ashish Manda

## Introduction

## Grabbing Introductory Course Professors From UMD.io

In [135]:
import requests
professors_url = "https://api.umd.io/v1/professors"

In [157]:
import csv
from os import path

cmsc_professor_names_filepath = './data/cmsc_professor_names.csv'
bmgt_professor_names_filepath = './data/bmgt_professor_names.csv'

have_cmsc_professors = path.exists(cmsc_professor_names_filepath)
have_bmgt_professors = path.exists(bmgt_professor_names_filepath)

def read_professor_name_data(professor_filepath):
    """Reads the professor names and their courses from a CSV file.
    
    Args:
        professor_filepath: String holding a filepath to the professor csv file.
        
    Returns:
        A dictionary of professor names to a set of courses they have taught.
    """
    
    with open(professor_filepath, mode='r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        line_count = 0

        professors = {}

        for row in csv_reader:
            if line_count != 0:
                professors[row['name']] = set([course for course in row['courses'].split(' ')])
            line_count += 1

        return professors

def save_professor_data(professors, filepath):
    """Saves the professor names and their courses to a CSV file.
    
    Args:
        professors: A dictionary of professor name keys and a set of courses for values.
    """
    
    columns = ['name', 'courses']
    try:
        with open(filepath, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=columns)
            writer.writeheader()
            
            for name, courses in professors.items():
                writer.writerow({'name': name, 'courses': ' '.join(courses)})
                
    except IOError:
        print("Error in writing the CSV file")

In [137]:
def get_professors_for_courses(course_ids):
    """Gets all the professors for the given course_ids and returns a list of them.
    
    Args:
        course_ids: A list of course ids (e.g. ['CMSC216', CMSC250']).
        
    Returns:
        List of professors that teach the given courses.
    
    """
    
    professors = {}
    
    for course_id in course_ids:
        params = {'course_id': course_id}

        response = requests.get(professors_url, params)
        
        if response.status_code == 200:

            for item in response.json():
                name = item['name']

                if name in professors:
                    professors[name].add(course_id)
                else:
                    professors[name] = {course_id}

    return professors

### Computer Science Professors

In [146]:
have_bmgt_professors = False 
have_cmsc_professors = False 

In [158]:
cmsc_course_ids = ["CMSC131", "CMSC132", "CMSC216", "CMSC250"]

if not have_cmsc_professors:
    cmsc_professors = get_professors_for_courses(cmsc_course_ids)
    save_professor_data(cmsc_professors, cmsc_professor_names_filepath)
    have_cmsc_professors = True
else: 
    cmsc_professors = read_professor_name_data(cmsc_professor_names_filepath)

if not cmsc_professors:
    print("Error response from umd.io API")

if 'Iason Filippou' in cmsc_professors:
    cmsc_professors.pop('Iason Filippou') # A typo of Jason Filippou from the database
    
print(cmsc_professors)

{'Fawzi Emad': {'CMSC131', 'CMSC250', 'CMSC132'}, 'Ilchul Yoon': {'CMSC131', 'CMSC216', 'CMSC132'}, 'Nelson Padua-Perez': {'CMSC131', 'CMSC216', 'CMSC132'}, 'Pedram Sadeghian': {'CMSC131', 'CMSC132'}, 'Anwar Mamat': {'CMSC132'}, 'Laurence Herman': {'CMSC216', 'CMSC132'}, 'A Shankar': {'CMSC216'}, 'Aditya Acharya': {'CMSC250'}, 'Alexander Brassel': {'CMSC250'}, 'Clyde Kruskal': {'CMSC250'}, 'David Sekora': {'CMSC250'}, 'Donald Perlis': {'CMSC250'}, 'Jason Filippou': {'CMSC250'}, 'Mohammad Nayeem Teli': {'CMSC250'}, 'Roger Eastman': {'CMSC250'}}


### Business Management Professors

In [126]:
bmgt_course_ids = ["BMGT110", "BMGT220", "BMGT221", "BMGT230"]

if not have_bmgt_professors:
    bmgt_professors = get_professors_for_courses(bmgt_course_ids)
    save_professor_data(bmgt_professors, bmgt_professor_names_filepath)
    have_bmgt_professors = True
else:
    bmgt_professors = read_professor_name_data(bmgt_professor_names_filepath)

if not bmgt_professors:
    print("Error response from umd.io API")

print(bmgt_professors)

{'Hugh Turner': {'BMGT110'}, 'Jeff Miller': {'BMGT110'}, 'Cody Hyman': {'BMGT220'}, 'Laurel Mazur': {'BMGT221', 'BMGT220'}, 'Progyan Basu': {'BMGT220'}, 'Viktoriya Zotova': {'BMGT220'}, 'Gary Bulmash': {'BMGT221'}, 'Gerald Ward': {'BMGT221'}, 'Ai Ren': {'BMGT230'}, 'Daehoon Noh': {'BMGT230'}, 'Erich Studer-Ellis': {'BMGT230'}, 'Huan Cao': {'BMGT230'}, 'Radu Lazar': {'BMGT230'}, 'Shubham Akshat': {'BMGT230'}, 'Ziwei Cao': {'BMGT230'}}


## Grabbing Reviews

## From RateMyProfessor

In [127]:
from bs4 import BeautifulSoup

ratemyprofessor_url = "https://www.ratemyprofessors.com/search.jsp"

params = {'queryoption':'HEADER', 'schoolID':'1270', 'queryBy':'teacherName', 'schoolName':'University+of+Maryland'}

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:82.0) Gecko/20100101 Firefox/82.0",
    "Access-Control-Allow-Origin": "*",
    "Access-Control-Allow-Headers": "Content-Type",
    "Access-Control-Allow-Methods": "GET"
}

In [128]:
def find_rmp_professor_url(html_doc):
    """Finds the professor's URL on the search page and returns it.
    
    Args:
        html_doc: A string containing an HTML document.
        
    Returns:
        The full URL for the professor's page (if found).
    
    """
    
    soup = BeautifulSoup(html_doc, 'html.parser')
    partial_url = soup.find('li', class_='listing PROFESSOR').find('a', href=True)
    
    if partial_url:
        main_url = "https://www.ratemyprofessors.com"
        return main_url + partial_url['href']

In [129]:
def query_rmp_for_professor_url(professor_name, headers, params):
    """Queries RateMyProfessor for the professor, given the parameters and headers.
    
    Args:
        professor_name: The <first name> <last name> of the professor.
        headers: Dictionary of headers for the get request.
        params: Dictionary of parameters for the get request.
        
    Returns:
        The full URL for the professor's page after searching for it (if found).
        
    """
    
    params['query'] = professor_name
    
    response = requests.get(ratemyprofessor_url, headers=headers, params=params)
    
    if response.status_code == 200:
        return find_rmp_professor_url(response.text)

In [130]:
def get_rmp_prof_stats(page_text):
    """Parses the professor's stats from their page and returns them. Namely their overall rating, 
    how many would take again, overall difficulty and how many ratings they have on RateMyProfessor.
    
    Args:
        page_text: An HTML document of the professor's page.
        
    Returns:
        A dictionary containing their rating, take again percentage, difficulty rating, and rating count.
    """
    
    soup = BeautifulSoup(page_text, 'html.parser')
    
    rating_score = soup.select('div[class*="RatingValue__Numerator"]')[0].text
    
    feedback = soup.select('div[class*="TeacherFeedback__StyledTeacherFeedback"]')[0].select('div[class*="FeedbackItem__FeedbackNumber"]')
    
    take_again = feedback[0].text
    difficulty = feedback[1].text
    
    rating_count = soup.select('div[class*="RatingValue__NumRatings"]')[0].select('a')[0].text
    rating_count = ''.join([x for x in rating_count if x.isdigit()])
    
    return {'rating': rating_score, 'take_again': take_again, 'difficulty': difficulty, 'rating_count': rating_count}

In [131]:
def get_rmp_prof_top_tags(page_text):
    """Parses and returns the professor's top tags.
    
    Args:
        page_text: An HTML document of the professor's page.
        
    Returns:
        A list of tags describing the professor.
    """
    
    soup = BeautifulSoup(page_text, 'html.parser')
    
    tags = []
    unparsed_tags = soup.select('div[class*="TeacherTags__TagsContainer"]')[0].select('span')
    
    
    for tag in unparsed_tags:
        tags.append(tag.text)
        
    return tags

#### Need Selenium because Javascript hides more reviews

In [132]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time

driver = webdriver.Firefox(executable_path='./bin/geckodriver.exe')

In [133]:
def load_all_rmp_reviews(page_url):
    """Loads all the reviews for a given porfessor and returns the text of all of them.
    
    Args:
        page_url: The URL for the professor's page.
        
    Returns:
        A string containing the HTML for all the reviews.
    """
    
    driver.get(page_url)
    
    # RateMyProfessors has a cookies pop up that overlays the website, it needs to be closed first
    time.sleep(0.5)
    close_cookies = driver.find_elements(By.XPATH, '//button[text()="Close"]')
    
    if close_cookies:
        close_cookies[0].click()
        
    load_more = driver.find_elements(By.XPATH, '//button[text()="Load More Ratings"]')
    
    # RateMyProfessors paginates the reviews via Javascript, so we must continually load more while the button is available
    while load_more:
        load_more[0].click()
        time.sleep(1)
        load_more = driver.find_elements(By.XPATH, '//button[text()="Load More Ratings"]')
        
    return driver.find_element_by_id('ratingsList')

In [134]:
def parse_rating_header(soup):
    """Parses and returns the rating header for a single review.
    
    Args:
        soup: An initialized BeautifulSoup object for the professor's page.
        
    Returns:
        A dictionary containing the course and date for the review.
    """
    
    rating_header = soup.select('div[class*="RatingHeader__StyledHeader"]')[0]
    course = rating_header.select('div[class*="RatingHeader__StyledClass"]')[0].text.strip()
    date = rating_header.select('div[class*="TimeStamp__StyledTimeStamp"]')[0].text.strip()
    
    return {'course': course, 'date': date}

def parse_meta_data(soup):
    """Parses and returns the meta data for a single review.
    
    Args:
        soup: An initialized BeautifulSoup object for the professor's page.
        
    Returns:
        A dictionary containing the meta data (e.g. Would Take Again) for the review.
    """
    
    course_meta = soup.select('div[class*="CourseMeta__StyledCourseMeta"]')[0]
    review_meta_data = {}

    for meta_div in course_meta.select('div'):
        meta_data = meta_div.text.split(':')
        meta_name = meta_data[0].strip()
        meta_value = meta_data[1].strip()

        review_meta_data[meta_name] = meta_value

    return review_meta_data

def parse_rating_data(soup):
    """Parses and returns the rating data for a single review.
    
    Args:
        soup: An initialized BeautifulSoup object for the professor's page.
        
    Returns:
        A dictionary containing the rating data for the quality and difficulty for the review.
    """
    
    rating_values_text = soup.select('div[class*="RatingValues__StyledRatingValues"]')[0].select('div[class*="RatingValues__RatingValue"]')
    quality = rating_values_text[0].text
    difficulty = rating_values_text[1].text

    rating_data = {'quality': quality, 'difficulty': difficulty}
    
    return rating_data

def parse_review_tags(soup):
    """Parses and returns the tags for a single review.
    
    Args:
        soup: An initialized BeautifulSoup object for the professor's page.
        
    Returns:
        A list containing the tags for the review.
    """
    
    tag_container = soup.select('div[class*="RatingTags__StyledTags"]')
    
    if tag_container: # Since not all reviews add tags
        unparsed_tags = tag_container[0].select('span')

        tags = []
        for tag in unparsed_tags:
            tags.append(tag.text)

        return tags
    
def parse_thumb_scoring(soup):
    """Parses and returns the thumb scoring data for a single review.
    
    Args:
        soup: An initialized BeautifulSoup object for the professor's page.
        
    Returns:
        A dictionary containing the thumb scoring data for the review.
    """
    
    thumb_container = soup.select('div[class*="RatingFooter__StyledRatingFooter"]')[0].select('div[class*="RatingFooter__HelpTotal"]')

    thumb_up = thumb_container[0].text
    thumb_down = thumb_container[1].text
    thumb_data = {'thumb-up': thumb_up, 'thumb-down': thumb_down}

    return thumb_data

def parse_review_text(soup):
    """Parses and returns the review body text for a single review.
    
    Args:
        soup: An initialized BeautifulSoup object for the professor's page.
        
    Returns:
        A string containing the review text for the review.
    """
    
    review_text = soup.select('div[class*="Comments__StyledComments"]')[0].text
    
    return review_text
    
def parse_single_rmp_review(review_item, courses):
    """Parses and returns all data for a single review.
    Namely it returns: Meta data, rating data, tags, thumb_scoring, and review text.
    
    Args:
        review_item: A single review list item containing all the appropraite HTML.
        
    Returns:
        A dictionary containing the meta data, rating data, tags, thumb_scoring, and review text
        for a single review.
    """
    
    
    soup = BeautifulSoup(review_item, 'html.parser')
    
    course_and_date = parse_rating_header(soup)
    
    # TODO: Loses course reviews like 'CMSC131CMSC132' where students combined multiple courses they took
    if course_and_date['course'] in courses:
        
        # Meta data
        meta_data = parse_meta_data(soup)
        
        # Rating data
        rating_data = parse_rating_data(soup)
        
        # Tags 
        tags = parse_review_tags(soup)
        
        # Thumb Scoring
        thumb_scoring = parse_thumb_scoring(soup)
        
        # Review body
        review_text = parse_review_text(soup)
        
        return {'meta_data': meta_data, 'rating_data': rating_data, 'tags': tags, 'thumb_scoring': thumb_scoring, 'review_text': review_text}

In [12]:
def get_all_rmp_reviews(reviews_list_html):
    soup = BeautifulSoup(reviews_list_html, 'html.parser')

    
    return None

### Computer Science Professors

In [7]:
fawzi_url = query_rmp_for_professor_url('Fawzi Emad', headers, params)
print(fawzi_url)

response = requests.get(fawzi_url, headers=headers)

https://www.ratemyprofessors.com/ShowRatings.jsp?tid=313062


Split for testing purposes, don't want to query the page multiple times

In [87]:
if response.status_code == 200:
    print(get_rmp_prof_stats(response.text))
    print(get_rmp_prof_top_tags(response.text))
    
else:
    print('error')

{'rating': '4.4', 'take_again': '83%', 'difficulty': '3.1', 'rating_count': '114'}
['Amazing lectures', 'Respected', "Skip class? You won't pass.", 'Beware of pop quizzes', 'Hilarious']


In [94]:
single_review = """
<div class="Rating__RatingBody-sc-1rhvpxz-0 dGrvXb"><div class="RatingHeader__StyledHeader-sc-1dlkqw1-0 uBHCj"><div class="RatingHeader__ClassInfoWrapper-sc-1dlkqw1-1 jxOApy"><div class="RatingHeader__StyledClass-sc-1dlkqw1-2 gxDIt"><img src="/static/media/computer-icon.17c26169.svg" alt="Computer Icon" data-for="GLOBAL_TOOLTIP" data-tip="Online Class" data-tooltip="true" class="OnlineCourseLogo__StyledLogo-qyf3kt-0 gemNec" currentitem="false"> CMSC131</div><div class="EmotionLabel__StyledEmotionLabel-sc-1u525uj-0 cJfJJi"><span role="img" aria-label="Sunglasses">😎</span>awesome</div></div><div class="TimeStamp__StyledTimeStamp-sc-9q2r30-0 bXQmMr RatingHeader__RatingTimeStamp-sc-1dlkqw1-3 BlaCV">Dec 3rd, 2020</div></div><div class="RatingValues__StyledRatingValues-sc-6dc747-0 bJSTHc"><div class="RatingValues__RatingContainer-sc-6dc747-1 DObVa"><div class="RatingValues__RatingLabel-sc-6dc747-2 gLxTSP">Quality</div><div class="RatingValues__RatingValue-sc-6dc747-3 kLWEWI">5.0</div></div><div class="RatingValues__RatingContainer-sc-6dc747-1 DObVa"><div class="RatingValues__RatingLabel-sc-6dc747-2 gLxTSP">Difficulty</div><div class="RatingValues__RatingValue-sc-6dc747-3 jILzuI">1.0</div></div></div><div class="Rating__RatingInfo-sc-1rhvpxz-3 kEVEoU"><div class="RatingHeader__StyledHeader-sc-1dlkqw1-0 fUEMJm"><div class="RatingHeader__ClassInfoWrapper-sc-1dlkqw1-1 jxOApy"><div class="RatingHeader__StyledClass-sc-1dlkqw1-2 gxDIt"><img src="/static/media/computer-icon.17c26169.svg" alt="Computer Icon" data-for="GLOBAL_TOOLTIP" data-tip="Online Class" data-tooltip="true" class="OnlineCourseLogo__StyledLogo-qyf3kt-0 gemNec" currentitem="false"> CMSC131</div><div class="EmotionLabel__StyledEmotionLabel-sc-1u525uj-0 cJfJJi"><span role="img" aria-label="Sunglasses">😎</span>awesome</div></div><div class="TimeStamp__StyledTimeStamp-sc-9q2r30-0 bXQmMr RatingHeader__RatingTimeStamp-sc-1dlkqw1-3 BlaCV">Dec 3rd, 2020</div></div><div class="CourseMeta__StyledCourseMeta-x344ms-0 fPJDHT"><div class="MetaItem__StyledMetaItem-y0ixml-0 LXClX">Would Take Again: <span>Yes</span></div><div class="MetaItem__StyledMetaItem-y0ixml-0 LXClX">Grade: <span>A</span></div><div class="MetaItem__StyledMetaItem-y0ixml-0 LXClX">Textbook: <span>No</span></div><div class="MetaItem__StyledMetaItem-y0ixml-0 LXClX">Online Class: <span>Yes</span></div></div><div class="Comments__StyledComments-dzzyvm-0 gRjWel">Fawzi is a legend</div><div class="RatingTags__StyledTags-sc-1boeqx2-0 eLpnFv"><span class="Tag-bs9vf4-0 hHOVKF">Hilarious</span><span class="Tag-bs9vf4-0 hHOVKF">Lecture heavy</span></div><div class="RatingFooter__StyledRatingFooter-ciwspm-0 dbULCX"><div class="RatingFooter__ButtonWrapper-ciwspm-1 cwcCIQ"><div class="RatingFooter__HelpTotal-ciwspm-2 kAVFzA"><img src="/static/media/thumbs-up-black.eddae738.svg" class="VoteThumb__StyledVoteThumb-p2gtch-0 jCbELu" data-tooltip="true" data-tip="Helpful" data-for="GLOBAL_TOOLTIP" alt="Thumbs up"> 0</div><div class="RatingFooter__HelpTotal-ciwspm-2 kAVFzA"><img src="/static/media/thumbs-down-black.bd601b36.svg" class="VoteThumb__StyledVoteThumb-p2gtch-0 jCbELu" data-tooltip="true" data-tip="Not helpful" data-for="GLOBAL_TOOLTIP" alt="Thumbs down"> 0</div></div><div class="RatingFooter__ButtonWrapper-ciwspm-1 cwcCIQ"><a href="https://www.ratemyprofessors.com//flagTeacherRating.jsp?rid=33939742" class="ReportFlag__StyledReportFlag-sc-1c42epr-0 hjlYuE" data-tooltip="true" data-tip="Report this rating" data-for="GLOBAL_TOOLTIP" data-testid="reportflag_test_id" aria-disabled="false" currentitem="false"><div class="ReportFlag__FlagWrapper-sc-1c42epr-1 kVglhF"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><defs><path id="prefix__a" d="M3.93 10c1.417 0 2.383.276 4.371 1.072 1.762.704 2.546.928 3.629.928 1.189 0 2.094-.165 2.754-.428.095-.039.177-.075.246-.108v-9.86c-.82.253-1.814.396-3 .396-1.417 0-2.383-.276-4.371-1.072C5.797.224 5.013 0 3.93 0 2.741 0 1.836.165 1.176.428A4.094 4.094 0 00.93.536v9.86c.82-.253 1.814-.396 3-.396z"></path></defs><g fill="none" fill-rule="evenodd"><path fill="#151515" fill-rule="nonzero" d="M3 3a1 1 0 01.293-.707c.22-.22.614-.483 1.21-.721C5.407 1.21 6.564 1 8 1c1.417 0 2.383.276 4.371 1.072C14.133 2.776 14.917 3 16 3c1.189 0 2.094-.165 2.754-.428.341-.137.508-.249.539-.28C19.923 1.663 21 2.11 21 3v12a1 1 0 01-.293.707c-.22.22-.614.483-1.21.721-.903.362-2.06.572-3.497.572-1.417 0-2.383-.276-4.371-1.072C9.867 15.224 9.083 15 8 15c-1.189 0-2.094.165-2.754.428a4.09 4.09 0 00-.247.108L5 22a1 1 0 01-2 0V3zm5 0c-1.189 0-2.094.165-2.754.428A4.094 4.094 0 005 3.536v9.86C5.82 13.143 6.814 13 8 13c1.417 0 2.383.276 4.371 1.072 1.762.704 2.546.928 3.629.928 1.189 0 2.094-.165 2.754-.428.095-.039.177-.075.246-.108v-9.86c-.82.253-1.814.396-3 .396-1.417 0-2.383-.276-4.371-1.072C9.867 3.224 9.083 3 8 3z"></path><g transform="translate(4.07 3)"><mask id="prefix__b" fill="#fff"><use xlink:href="#prefix__a"></use></mask><use fill="none" fill-rule="nonzero" xlink:href="#prefix__a"></use><g fill="none" mask="url(#prefix__b)"><path d="M0 0H64V64H0z" transform="translate(-25 -27)"></path></g></g></g></svg></div></a></div></div></div></div>
"""

print(parse_single_rmp_review(single_review, ["CMSC131"]))

{'meta_data': {'Would Take Again': 'Yes', 'Grade': 'A', 'Textbook': 'No', 'Online Class': 'Yes'}, 'rating_data': {'quality': '5.0', 'difficulty': '1.0'}, 'tags': ['Hilarious', 'Lecture heavy'], 'thumb_scoring': {'thumb-up': ' 0', 'thumb-down': ' 0'}, 'review_text': 'Fawzi is a legend'}


### Business Management Professors

## From Planetterp

### Computer Science Professors

### Business Management Professors