In [46]:
import time
import re
import pandas as pd
from tqdm.notebook import tqdm

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

In [47]:
# Initiate a Chrome web driver
driver = webdriver.Chrome()

In [48]:
# Key page URLs
login_page_url = 'https://monash.csod.com/samldefault.aspx'
class_list_page_url = 'https://monash.csod.com/LMS/BrowseTraining/BrowseTraining.aspx'

# Login by Okta

In [49]:
driver.get(login_page_url)

# Load online class list page

In [50]:
def click_button(button):
    while True:
        try:
            button.click()
            break
        except:
            time.sleep(1)

In [51]:
def loading_class_list_page():
    print('[....] Loading the online class list page', end='\r')
    driver.get(class_list_page_url)
    while True:
        # Check if the clear type button has been rendered
        elements = driver.find_elements_by_xpath("//a[@data-bind='click: vmLoType.clearSelectedLoType']")
        if len(elements) > 0:
            print('[Done]')
            break
        time.sleep(1)

In [52]:
def get_class_list_page():
    loading_class_list_page()
    # Choose type: Online classes
    print('[....] Clear classes type', end='\r')
    clear_type_button = driver.find_element_by_xpath("//a[@data-bind='click: vmLoType.clearSelectedLoType']")
    click_button(clear_type_button)
    print('[Done]')
    print('[....] Choose online course type', end='\r')
    online_course_button = driver.find_element_by_xpath("//span[@role='presentation']/a[@data-original-title='Online course']")
    click_button(online_course_button)
    print('[Done]')
    # Choose view: List
    print('[....] Change to list view', end='\r')
    list_view_button = driver.find_element_by_xpath("//a[@title='List view']")
    click_button(list_view_button)
    print('[Done]')

In [53]:
get_class_list_page()

[Done] Loading the online class list page
[Done] Click the clear type button
[Done] Click the online course type button
[Done] Click the list view button


# Get the total numebr of classes

In [54]:
def get_num_classes():
    num_results_span = driver.find_element_by_xpath("//h1/span[@data-bind='text: totalResultString(), visible: !isLoadingResults()']")
    num_classes = int(num_results_span.text.split()[0])
    return num_classes

In [55]:
num_classes = get_num_classes()
print('Total number of available classes:', num_classes)

Total number of available classes: 89


# Fetch classes data

In [56]:
def get_class_name_and_link(i):
    # Load the online classes list
    print('[....] Searching for the class link', end='\r')
    class_list = driver.find_elements_by_xpath("//h3/a[@class='ellipsis']")
    while i >= len(class_list):
        # Scroll down
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(1)
        class_list = driver.find_elements_by_xpath("//h3/a[@class='ellipsis']")
    print('[Done]')
    class_name = class_list[i].text
    class_link = class_list[i]
    return class_name, class_link

In [57]:
def loading_class_page(page_link):
    # Loading the class page
    print('[....] Loading the class page', end='\r')
    page_link.click()
    while True:
        # Check if the class title has been rendered
        elements = driver.find_elements_by_xpath("//h1[@data-test='LD_Title']")
        if len(elements) > 0:
            print('[Done]')
            break
        time.sleep(1)
    # Click the show more button
    print('[....] Click the show more button', end='\r')
    show_more_button = driver.find_element_by_xpath("//button[@class='sc-gtfDJT eikTzr sc-hgHYgh qtyAb']")
    click_button(show_more_button)
    print('[Done]')

In [72]:
def fetch_credit_type():
    print('[....] Fetch credit type data', end='\r')
    details_text = driver.find_element_by_xpath("//section[@data-test='Details_Section']").text
    if "Professionalism" in details_text:
        credit_type = "Professional"
    elif "Excellence in Research & Teaching" in details_text:
        credit_type = "Research"
    else:
        credit_type = "Unsure"
    print('[Done]')
    return credit_type

In [59]:
def fetch_credit_hours():
    print('[....] Fetch credit hours data', end='\r')
    text = driver.find_elements_by_xpath("//div[@class='sc-ccSCjj bfWTHi']")[-1].text
    credit_hours = text.split('\n')[1]
    print('[Done]')
    return credit_hours

In [60]:
def fetch_additional_hours():
    print('[....] Fetch additional hours data', end='\r')
    details_text = driver.find_element_by_xpath("//section[@data-test='Details_Section']").text
    re_result = re.findall(r'additional ?(\d\.?\d?)', details_text)
    additional_hours = re_result[0] if len(re_result) > 0 else '0'
    print('[Done]')
    return additional_hours

In [78]:
# Initiate the dataframe
df = pd.DataFrame(columns=['Class Name', 'Type', 'Graduate Education Hour', 'Additional Hour', 'URL'])
# Fetching data
for i in tqdm(range(num_classes)):
    print('- Starting to fetch No.%d class data ...' % i)
    # Start from the list page
    if 'BrowseTraining' not in driver.current_url:
        get_class_list_page()
    # Only consider the MGR Online classes
    class_name, class_link = get_class_name_and_link(i)
    if class_name[:3] != 'MGR':
        continue
    # Load the i-th class page
    loading_class_page(class_link)
    # Fetch data
    credit_type = fetch_credit_type()
    credit_hours = fetch_credit_hours()
    additional_hours = fetch_additional_hours()
    class_page_url = driver.current_url
    # Store data into dataframe
    print('[....] Store data into dataframe', end='\r')
    df.loc[len(df)] = [class_name, credit_type, credit_hours, additional_hours, class_page_url]
    print('[Done]')

HBox(children=(IntProgress(value=0, max=89), HTML(value='')))

- Starting to fetch No.0 class data ...
[Done] Loading the online class list page
[Done] Click the clear type button
[Done] Click the online course type button
[Done] Click the list view button
[Done] Searching for the class link
- Starting to fetch No.1 class data ...
[Done] Searching for the class link
- Starting to fetch No.2 class data ...
[Done] Searching for the class link
- Starting to fetch No.3 class data ...
[Done] Searching for the class link
- Starting to fetch No.4 class data ...
[Done] Searching for the class link
- Starting to fetch No.5 class data ...
[Done] Searching for the class link
- Starting to fetch No.6 class data ...
[Done] Searching for the class link
- Starting to fetch No.7 class data ...
[Done] Searching for the class link
- Starting to fetch No.8 class data ...
[Done] Searching for the class link
- Starting to fetch No.9 class data ...
[Done] Searching for the class link
- Starting to fetch No.10 class data ...
[Done] Searching for the class link
- Startin

[Done] Loading the online class list page
[Done] Click the clear type button
[Done] Click the online course type button
[Done] Click the list view button
[Done] Searching for the class link
[Done] Loading the class page
[Done] Click the show more button
[Done] Fetch credit type data
[Done] Fetch credit hours data
[Done] Fetch additional hours data
[Done] Store data into dataframe
- Starting to fetch No.33 class data ...
[Done] Loading the online class list page
[Done] Click the clear type button
[Done] Click the online course type button
[Done] Click the list view button
[Done] Searching for the class link
[Done] Loading the class page
[Done] Click the show more button
[Done] Fetch credit type data
[Done] Fetch credit hours data
[Done] Fetch additional hours data
[Done] Store data into dataframe
- Starting to fetch No.34 class data ...
[Done] Loading the online class list page
[Done] Click the clear type button
[Done] Click the online course type button
[Done] Click the list view butto

[Done] Searching for the class link
[Done] Loading the class page
[Done] Click the show more button
[Done] Fetch credit type data
[Done] Fetch credit hours data
[Done] Fetch additional hours data
[Done] Store data into dataframe
- Starting to fetch No.52 class data ...
[Done] Loading the online class list page
[Done] Click the clear type button
[Done] Click the online course type button
[Done] Click the list view button
[Done] Searching for the class link
[Done] Loading the class page
[Done] Click the show more button
[Done] Fetch credit type data
[Done] Fetch credit hours data
[Done] Fetch additional hours data
[Done] Store data into dataframe
- Starting to fetch No.53 class data ...
[Done] Loading the online class list page
[Done] Click the clear type button
[Done] Click the online course type button
[Done] Click the list view button
[Done] Searching for the class link
[Done] Loading the class page
[Done] Click the show more button
[Done] Fetch credit type data
[Done] Fetch credit ho

[....] Searching for the class link[Done]
- Starting to fetch No.82 class data ...
[....] Searching for the class link[Done]
- Starting to fetch No.83 class data ...
[....] Searching for the class link[Done]
- Starting to fetch No.84 class data ...
[....] Searching for the class link[Done]
- Starting to fetch No.85 class data ...
[....] Searching for the class link[Done]
- Starting to fetch No.86 class data ...
[....] Searching for the class link[Done]
- Starting to fetch No.87 class data ...
[....] Searching for the class link[Done]
- Starting to fetch No.88 class data ...
[....] Searching for the class link[Done]



In [80]:
print(len(df))
df.head()

55


Unnamed: 0,Class Name,Type,Graduate Education Hour,Additional Hour,URL
0,MGR Online: Body Language for Leaders,Professional,1.0,0,https://monash.csod.com/ui/lms-learning-detail...
1,MGR Online: Building The Consulting Skill Set,Professional,1.5,1,https://monash.csod.com/ui/lms-learning-detail...
2,MGR Online: Communicating with Confidence,Professional,1.5,0,https://monash.csod.com/ui/lms-learning-detail...
3,MGR Online: Communicating with Empathy,Professional,1.5,0,https://monash.csod.com/ui/lms-learning-detail...
4,MGR Online: Communication Foundations,Professional,2.0,0,https://monash.csod.com/ui/lms-learning-detail...


In [81]:
df.to_csv('MGR_classes_credits.csv', index=False)

In [83]:
df.sort_values(['Type', 'Graduate Education Hour', 'Class Name'])

Unnamed: 0,Class Name,Type,Graduate Education Hour,Additional Hour,URL
0,MGR Online: Body Language for Leaders,Professional,1.0,0.0,https://monash.csod.com/ui/lms-learning-detail...
5,MGR Online: Effective Listening,Professional,1.0,0.0,https://monash.csod.com/ui/lms-learning-detail...
9,MGR Online: From Idea to Business Opportunity,Professional,1.0,1.0,https://monash.csod.com/ui/lms-learning-detail...
10,MGR Online: Having Difficult Conversations,Professional,1.0,0.0,https://monash.csod.com/ui/lms-learning-detail...
16,MGR Online: Leading with Emotional Intelligence,Professional,1.0,0.0,https://monash.csod.com/ui/lms-learning-detail...
25,MGR Online: Project Management Foundations: Bu...,Professional,1.0,0.0,https://monash.csod.com/ui/lms-learning-detail...
27,MGR Online: Project Management Foundations: Co...,Professional,1.0,0.0,https://monash.csod.com/ui/lms-learning-detail...
29,MGR Online: Project Management Foundations: In...,Professional,1.0,0.0,https://monash.csod.com/ui/lms-learning-detail...
34,MGR Online: Project Management Foundations: Risk,Professional,1.0,0.0,https://monash.csod.com/ui/lms-learning-detail...
37,MGR Online: Project Management Foundations: St...,Professional,1.0,0.0,https://monash.csod.com/ui/lms-learning-detail...
