## Tools we need to get started

In [2]:
""" Tools that we need to run this program """
from time import sleep
import csv
import sys
import copy
from bs4 import BeautifulSoup
import requests

## Scraper function

In [3]:
# We should not listen to what pylint has to say here
# pylint: disable=too-many-locals, too-many-branches, too-many-statements, unspecified-encoding, consider-using-with
""" Extracts (and make a csv file in your directory) information on
        every single section of each course offered in the selecter semester at UIUC

    Args:
        year (string): academic year
        semester (string): choose from spring, summer, fall, and winter
"""
semester = 'fall'
year = '2022'
# open/create a csv file
file = open(f'{semester}{year}_courses.csv', 'w')
writer = csv.writer(file)
csv_header = ['Year','Semester','SubjectName','SubjectId','CourseNumber','CourseName'
                ,'Description','CreditHours','CRN','SectionNumber','SectionType','StartTime',
                'EndTime','DaysOfWeek','RoomNumber','BuildingName','MainInstructor']
# write the csv header to the csv file
writer.writerow(csv_header)
# connect to the UIUC Course Explorer API
url = f'https://courses.illinois.edu/cisapp/explorer/schedule/{year}/{semester}.xml'
xml_data = requests.get(url).content
soup = BeautifulSoup(xml_data, 'xml')
subjects = soup.find_all('subject')
# traverse the subject list
for subject in subjects:
    row = [year, semester, subject.string, subject['id']]
    course_data = ''
    done = False
    # We need to catch an exception wait for a bit when we send a bunch of HTTP
    # requests to the API so that the connection error stops the whole program
    while done is False:
        try:
            course_data = requests.get(subject['href']).content
            done = True
        except requests.exceptions.ConnectionError:
            sleep(5)
    soup1 = BeautifulSoup(course_data, 'xml')
    courses = soup1.find_all('course')
    # traverse a course list
    for course in courses:
        sub_row = copy.deepcopy(row)
        sub_row.append(course['id'])
        sub_row.append(course.string)
        section_data = ''
        done = False
        # We need to catch an exception wait for a bit when we send a bunch of HTTP
        # requests to the API so that the connection error stops the whole program
        while done is False:
            try:
                section_data = requests.get(course['href']).content
                done = True
            except requests.exceptions.ConnectionError:
                sleep(5)
        sections_data = BeautifulSoup(section_data, 'xml')
        sub_sub_row = copy.deepcopy(sub_row)
        if sections_data.find('description') is not None:
            sub_sub_row.append(sections_data.find('description').string)
        else:
            sub_sub_row.append('')
        if sections_data.find('creditHours') is not None:
            sub_sub_row.append(sections_data.find('creditHours').string.replace('.', ''))
        else:
            sub_sub_row.append('')
        sections = sections_data.find_all('section')
        for section in sections:
            sub_sub_sub_row = copy.deepcopy(sub_sub_row)
            sub_sub_sub_row.append(section['id'])
            sub_sub_sub_row.append(section.string)
            # Some courses have an invalid link which causes the whole
            # program to stop and we'd never let this happen
            if 'http://cis.local' in section['href']:
                continue
            specific_data = ''
            done = False
            # We need to catch an exception wait for a bit when we send a bunch of HTTP
            # requests to the API so that the connection error stops the whole program
            while done is False:
                try:
                    specific_data = requests.get(section['href']).content
                    done = True
                except requests.exceptions.ConnectionError:
                    sleep(5)
            soup2 = BeautifulSoup(specific_data, 'xml')
            complete_row = copy.deepcopy(sub_sub_sub_row)
            about_section = soup2.find('meeting')
            complete_row.append(about_section.find('type').string)
            complete_row.append(about_section.find('start').string)
            if about_section.find('end') is not None:
                complete_row.append(about_section.find('end').string)
            else:
                complete_row.append('')
            if about_section.find('daysOfTheWeek') is not None:
                complete_row.append(about_section.find('daysOfTheWeek').string.replace(' ', ''))
            else:
                complete_row.append('')
            if about_section.find('roomNumber') is not None:
                complete_row.append(about_section.find('roomNumber').string)
            else:
                complete_row.append('')
            if about_section.find('buildingName') is not None:
                complete_row.append(about_section.find('buildingName').string)
            else:
                complete_row.append('')
            instructors = about_section.find('instructors')
            instructor = ''
            if len(instructors.find_all('instructor')) != 0:
                instructor = instructors.find_all('instructor')[0].string
            complete_row.append(instructor)
            # write the complete information on a section of a course to the csv file
            writer.writerow(complete_row)
file.close()