# UTS Course Data Script
The goal of this python script is to scrape the UTS handbook for degree, course and subject data. This is so that it can be stored in our database and the necessary algorithms can be run on it, so that we can recommend subjects automatically.

## Library Imports
* requests library: Uses HTTP to request the webpage from a server with a given URL (in this case https://www.handbook.uts.edu.au/).
* BeautifulSoup library: Parsers HTML files so that it can be searched and filtered.
* json library: Reading, writing and handling JSON files.

In [21]:
import requests
from bs4 import BeautifulSoup
import json

In [22]:
baseUrl = 'https://www.handbook.uts.edu.au/'

In [23]:
def getPageHTML(url):
    return BeautifulSoup(requests.get(url).content, 'html.parser')

In [24]:
def getDegrees(courseArea, graduateType):
    degrees = []
    degreesPage = BeautifulSoup(requests.get(f"{baseUrl}{courseArea}/{graduateType}/index.html").content, 'html.parser')
    tables = degreesPage.find(id='content').find(class_='ie-images').find_all('table')
    
    if len(tables) <= 1:
        return None
    else:
        for table in tables:
            tableCells = table.find_all("td")
            for cellNumber in range(1, len(tableCells)):
                isDegreeCode = tableCells[cellNumber-1].text.strip()[0] == "C"
                degrees.append({
                    'name': tableCells[cellNumber if isDegreeCode else cellNumber-1].text.strip(),
                    'degreeCode': (tableCells[cellNumber-1 if isDegreeCode else cellNumber].text).lower().strip(),
                    'subjects': {}
                })
                
    return degrees

In [25]:
indexPage = requests.get(baseUrl)

soup = BeautifulSoup(indexPage.content, 'html.parser')
# print(soup.find(class_='toc').find_all('a', href=True)[0]['href'])

# links to the pages for undergraduate and post graduate degrees in all course areas
courseAreas = {}

for courseArea in soup.find(class_='toc').find_all('a', href=True):
    abbreviation = courseArea['href'][1:4]
    
    courseAreas[courseArea.text] = {
        'abbreviation': abbreviation,
        'url': f"https://www.handbook.uts.edu.au{courseArea['href']}",
        'ug_degrees': getDegrees(abbreviation, 'ug'),
        'pg_degrees': getDegrees(abbreviation, 'pg')
    }

print(courseAreas)

{'Analytics and Data Science': {'abbreviation': 'ads', 'url': 'https://www.handbook.uts.edu.au/ads/index.html', 'ug_degrees': None, 'pg_degrees': [{'name': 'Doctor of Philosophy', 'degreeCode': 'c02062', 'subjects': {}}, {'name': 'Doctor of Philosophy', 'degreeCode': 'c03064', 'subjects': {}}, {'name': 'Master of Learning Analytics (Research)', 'degreeCode': 'c03064', 'subjects': {}}, {'name': 'Master of Data Science and Innovation', 'degreeCode': 'c04372', 'subjects': {}}, {'name': 'Master of Data Science and Innovation', 'degreeCode': 'c06124', 'subjects': {}}, {'name': 'Graduate Diploma in Data Science and Innovation', 'degreeCode': 'c06124', 'subjects': {}}, {'name': 'Graduate Diploma in Data Science and Innovation', 'degreeCode': 'c11274', 'subjects': {}}, {'name': 'Graduate Certificate in Data Science and Innovation', 'degreeCode': 'c11274', 'subjects': {}}]}, 'Business': {'abbreviation': 'bus', 'url': 'https://www.handbook.uts.edu.au/bus/index.html', 'ug_degrees': [{'name': 'Bac

## Scraping subject data seperately
Since many degrees share common subjects, I will be scraping the subject data seperately so I can normalise the json. Which in turn will reduce the data redundancy of the database.

In [29]:
subjects = {}
majors = {}

def subjectMajorIndex(text):
    containsMajor = tableCellText.find("major")
    containsMajors = tableCellText.find("majors")
    
    if containsMajor != -1:
        return containsMajor
    if containsMajors != -1:
        return containsMajors
    
    return False

for key in courseAreas:
    courseArea = courseAreas[key]
    if courseArea['ug_degrees'] != None:
        for index, degree in enumerate(courseArea['ug_degrees']):
#             print("degree", degree)
            tables = getPageHTML(f"{baseUrl}courses/{degree['degreeCode']}.html").find_all('table')
            currentYear = None
            currentSem = None
            major = None
            
#             print(len(tables))
            if len(tables) > 0:
                for table in tables:
                    for tableCell in table:
                        try:
                            tableCellText = tableCell.text.strip()
                            majorIndex = subjectMajorIndex(tableCellText)
                            
                            if majorIndex != -1 and "commencing" in tableCellText:
                                major = tableCellText[:majorIndex].strip()
                                majors[major] = degree['degreeCode']
                            elif tableCellText[0:4] == "Year":
                                currentYear = tableCellText
                            elif tableCellText[-7:] == "session":
                                currentSem = tableCellText[:-7].strip()
                            elif tableCell.text[0:5].strip().isdigit(): # tableCell.text.strip()[0:5].isdigit()
                                subjectCode = tableCell.text[0:6].strip() # tableCell.text[0:6].strip()
#                                 print("here")
#                                 print("x", subjectCode)
                                if subjectCode not in subjects:
                                    subjects[subjectCode] = {
                                        'name': tableCell.text.strip()[5:-3].strip(),
                                        'degreeCodes': {}
                                    }
                                else:
                                    subjects[subjectCode]['degreeCodes'][degree['degreeCode']] = True
                        except: 
                            pass

print(majors)
print(subjects)

{'Accounting and Finance': 'c10026', 'Marketing and Management': 'c10026', 'Economics': 'c10026', 'HRM': 'c10026', 'Events': 'c10355', 'Tourism': 'c10355', 'Sport Business': 'c10432', 'Digital Creative Enterprise': 'c10432', '': 'c09122', 'Applied Physics': 'c10162', 'Biomedical Science': 'c10162', 'Biotechnology': 'c10162', 'Chemistry': 'c10442', 'Environmental Biology': 'c10162', 'Marine Biology': 'c10162', 'Mathematics': 'c10162', 'Medical Science': 'c10162', 'Nanotechnology': 'c10162', 'Human Resource Management': 'c10169', 'Business Information Systems and Finance': 'c10278', 'Sports': 'c10355', 'Sport': 'c10412', 'Biology': 'c10442', 'CSI': 'c10442', 'Digital Forensics': 'c10442', 'English': 'c10405', 'TAS': 'c10406', 'Primary': 'c10444', 'Secondary': 'c10444', 'Biomedical Engineering': 'c09067', 'Civil Engineering': 'c09067', 'Civil and Environmental Engineering': 'c09067', 'Electrical Engineering': 'c09128', 'Electronic Engineering': 'c09067', 'Mechanical Engineering': 'c09067'

In [31]:
del majors[""]

In [7]:
print(len(subjects))

967


In [8]:
print(courseAreas)

{'Analytics and Data Science': {'abbreviation': 'ads', 'url': 'https://www.handbook.uts.edu.au/ads/index.html', 'ug_degrees': None, 'pg_degrees': [{'name': 'Doctor of Philosophy', 'degreeCode': 'c02062', 'subjects': {}}, {'name': 'Doctor of Philosophy', 'degreeCode': 'c03064', 'subjects': {}}, {'name': 'Master of Learning Analytics (Research)', 'degreeCode': 'c03064', 'subjects': {}}, {'name': 'Master of Data Science and Innovation', 'degreeCode': 'c04372', 'subjects': {}}, {'name': 'Master of Data Science and Innovation', 'degreeCode': 'c06124', 'subjects': {}}, {'name': 'Graduate Diploma in Data Science and Innovation', 'degreeCode': 'c06124', 'subjects': {}}, {'name': 'Graduate Diploma in Data Science and Innovation', 'degreeCode': 'c11274', 'subjects': {}}, {'name': 'Graduate Certificate in Data Science and Innovation', 'degreeCode': 'c11274', 'subjects': {}}]}, 'Business': {'abbreviation': 'bus', 'url': 'https://www.handbook.uts.edu.au/bus/index.html', 'ug_degrees': [{'name': 'Bac

## Logic error with appending subjectCode to subjects with in courseAreas (Only run below cell if you want to attempt to store subject codes in courseAreas dictionary, not necessary now though)
Not sure why but it saves a single subject code 3 times (only for some subject codes).
- course code minus the last digit
- course code with line break (\n) at the start and minus the last digit
- the correct course code

Also there are duplicates.

Cannot find the course of this error so I will just be fixing it by doing a second parse of the dictionary which removes invalid course codes.

In [9]:
for courseName in courseAreas:
    if courseAreas[courseName]["ug_degrees"] != None:
        for degree_index in range(len(courseAreas[courseName]["ug_degrees"])):
            for year in courseAreas[courseName]["ug_degrees"][degree_index]["subjects"]:
                for sem in courseAreas[courseName]["ug_degrees"][degree_index]["subjects"][year]:
                    for subject in courseAreas[courseName]["ug_degrees"][degree_index]["subjects"][year][sem]:
                        print(subject.isdigit(), subject)
                        if len(subject) < 5 or not subject.isdigit():
                            courseAreas[courseName]["ug_degrees"][degree_index]["subjects"][year][sem].remove(subject)

True 26100
True 22107
True 23115
True 26134
True 22107
True 26100
True 22207
True 21129
True 24108
True 25300
True 26100
True 22107
True 23115
True 26134
True 23115
True 26134
True 22107
True 26100
True 22420
True 22321
True 25556
True 25624
True 22207
True 21129
True 24108
True 25300
True 21129
True 22207
True 23115
True 26134
True 22320
True 79014
True 25503
True 25557
True 24202
True 24309
True 21510
True 21512
True 25300
True 24108
True 21129
True 22207
True 22522
True 79017
True 25410
True 24210
True 24415
True 21511
True 21440
True 23566
True 25556
True 25300
True 24108
True 22421
True 22319
True 21513
True 21504
True 23571
True 21510
True 21555
True 21036
True 24100
True 23568
True 23567
True 21407
True 21037
True 25624
True 21511
True 21658
True 21440
True 25503
True 21510
True 21512
True 21656
True 23580
True 21646
True 21228
True 21657
True 21505
True 22107
True 22207
True 22605
True 25300
True 23115
True 26100
True 26134
True 22157
True 24108
True 22321
True 22320
True 79017

True 22207
True 97600
True 97601
True 97602
True 97603
True 97604
True 97642
True 97742
True 97842
True 26100
True 22107
True 26134
True 23115
True 21129
True 24108
True 25300
True 22207
True 97600
True 97601
True 97602
True 97603
True 97604
True 97642
True 97742
True 97842
True 21129
True 22107
True 23115
True 26100
True 24108
True 26134
True 70102
True 70103
True 70102
True 70103
True 26100
True 22107
True 22207
True 70311
True 70114
True 21129
True 24108
True 26134
True 23115
True 70211
True 70616
True 25300
True 22207
True 70311
True 70114
True 70104
True 21555
True 21510
True 70109
True 70211
True 70616
True 25300
True 70327
True 21440
True 70617
True 70108
True 70104
True 21555
True 21510
True 70109
True 70317
True 70517
True 21037
True 70327
True 21440
True 70617
True 70108
True 21036
True 21407
True 21512
True 76008
True 76033
True 76057
True 76081
True 78039
True 76902
True 76012
True 76068
True 70317
True 70517
True 21037
True 71116
True 70417
True 21505
True 21036
True 21407

True 21407
True 21505
True 91176
True 91709
True 21510
True 91705
True 91176
True 91709
True 21407
True 21505
True 21512
True 21037
True 22107
True 26100
True 91161
True 65111
True 21129
True 24108
True 91400
True 65212
True 22107
True 26100
True 91161
True 65111
True 23115
True 26134
True 91703
True 91314
True 21129
True 24108
True 91314
True 91707
True 25300
True 68041
True 22207
True 91239
True 23115
True 26134
True 91400
True 65212
True 21555
True 21510
True 91707
True 91320
True 25300
True 22207
True 91320
True 91703
True 21440
True 91705
True 21036
True 91175
True 21555
True 91175
True 91239
True 68041
True 21512
True 21037
True 91706
True 91403
True 21440
True 91706
True 91403
True 21036
True 21407
True 21505
True 91176
True 91709
True 21510
True 91705
True 91176
True 91709
True 21407
True 21505
True 21512
True 21037
True 22107
True 26100
True 60001
True 65111
True 21129
True 24108
True 91400
True 65212
True 22107
True 26100
True 60001
True 65111
True 23115
True 26134
True 91161

True 21510
True 21227
True 21510
True 21648
True 21651
True 21648
True 21651
True 21643
True 21638
True 21643
True 21634
True 21644
True 21635
True 23115
True 21663
True 81539
True 81540
True 81538
True 81539
True 81538
True 94663
True 94663
True 94657
True 94665
True 94657
True 94665
True 94662
True 94662
True 94658
True 94658
True 48510
True 68037
True 33130
True 48230
True 81511
True 33230
True 65111
True 48441
True 48622
True 48023
True 48221
True 81512
True 48240
True 91161
True 31271
True 91400
True 41035
True 81513
True 48250
True 48520
True 91703
True 81514
True 48260
True 91705
True 91403
True 91706
True 48623
True 48560
True 31005
True 31256
True 31250
True 81515
True 41029
True 48270
True 91403
True 91706
True 48623
True 48560
True 31005
True 31256
True 31250
True 81516
True 81522
True 41030
True 91403
True 91706
True 48623
True 48560
True 31005
True 31256
True 31250
True 42001
True 49274
True 49275
True 49261
True 41028
True 81521
True 81531
True 41055
True 81524
True 81532

True 93219
True 81532
True 65111
True 60001
True 91314
True 33116
True 65111
True 60001
True 91314
True 33116
True 81511
True 81511
True 65213
True 68041
True 91400
True 91161
True 65213
True 68041
True 91400
True 91161
True 81512
True 81512
True 91703
True 91320
True 91500
True 91703
True 91320
True 65202
True 81513
True 81513
True 91239
True 91812
True 91401
True 91239
True 91812
True 65001
True 81514
True 81514
True 91707
True 91706
True 91813
True 99584
True 91707
True 91813
True 99584
True 81515
True 81515
True 91709
True 91708
True 91709
True 91708
True 91330
True 81516
True 81522
True 81516
True 81522
True 81521
True 81531
True 81531
True 81525
True 81523
True 81541
True 81524
True 81532
True 81525
True 81523
True 81528
True 81524
True 81532
True 81528
True 81521
True 65111
True 60001
True 91314
True 33116
True 65111
True 60001
True 91314
True 33116
True 81511
True 81511
True 65213
True 68041
True 91400
True 91161
True 65213
True 68041
True 91400
True 91161
True 81512
True 81512

True 83724
True 83341
True 83723
True 83119
True 83621
True 83622
True 83722
True 83000
True 85202
True 83721
True 83231
True 83882
True 83233
True 83821
True 85302
True 88834
True 83724
True 83341
True 83723
True 83822
True 83823
True 83722
True 83000
True 85202
True 83721
True 83821
True 85302
True 88834
True 83822
True 83823
True 87631
True 87100
True 85502
True 87632
True 87222
True 85503
True 85502
True 85503
True 87731
True 87441
True 87631
True 87100
True 87443
True 87009
True 85202
True 87632
True 87222
True 87832
True 85302
True 87012
True 87731
True 87441
True 87011
True 87010
True 87443
True 87009
True 85202
True 87832
True 85302
True 87012
True 87011
True 87010
True 87631
True 87100
True 85502
True 87632
True 87222
True 85503
True 85502
True 85503
True 87731
True 87441
True 87631
True 87100
True 87443
True 87009
True 85202
True 87632
True 87222
True 87832
True 85302
True 87012
True 87731
True 87441
True 87011
True 87010
True 87443
True 87009
True 85202
True 87832
True 85302

True 16233
True 16232
True 16658
True 16237
True 33130
True 33190
True 65111
True 68101
True 60001
True 65111
True 68101
True 60001
True 33130
True 33190
True 33116
True 65111
True 91161
True 60001
True 33230
True 33290
True 65212
True 68201
True 60101
True 68070
True 33230
True 33290
True 65212
True 68201
True 60101
True 68070
True 91123
True 65212
True 68041
True 91400
True 33360
True 01323
True 01004
True 01004
True 01324
True 01004
True 01004
True 91154
True 01324
True 01004
True 01004
True 68413
True 68206
True 65411
True 01004
True 65411
True 68206
True 01004
True 91132
True 65621
True 01004
True 01004
True 37234
True 37233
True 01324
True 33360
True 37233
True 01323
True 91314
True 91121
True 91320
True 01317
True 37335
True 01324
True 01004
True 68413
True 01324
True 01004
True 91352
True 91178
True 01324
True 01317
True 01323
True 01340
True 01340
True 01317
True 01324
True 01324
True 01340
True 01340
True 01340
True 01324
True 01323
True 01318
True 01324
True 01340
True 01340

True 48023
True 33130
True 48230
True 48510
True 48430
True 33130
True 48230
True 68037
True 48510
True 33230
True 48023
True 41160
True 91400
True 33230
True 48321
True 48230
True 48320
True 60101
True 33230
True 48321
True 48230
True 48320
True 60101
True 33230
True 48321
True 48230
True 48320
True 60101
True 33230
True 48321
True 48320
True 48230
True 33230
True 48441
True 48521
True 48520
True 33230
True 48510
True 48620
True 60101
True 33230
True 48510
True 48620
True 48621
True 33230
True 48510
True 48620
True 48621
True 33230
True 41084
True 48230
True 41092
True 31250
True 33230
True 48024
True 31282
True 31271
True 33230
True 48441
True 41033
True 41084
True 33230
True 48520
True 41033
True 41084
True 48240
True 41090
True 31061
True 91703
True 65111
True 41162
True 41035
True 48240
True 48221
True 48331
True 48340
True 41035
True 48240
True 48221
True 48331
True 48340
True 41035
True 48240
True 48221
True 48331
True 48340
True 41035
True 48240
True 48331
True 48840
True 48221

True 48260
True 91142
True 91320
True 48580
True 48434
True 48561
True 48582
True 48260
True 91309
True 91120
True 48580
True 48434
True 48561
True 48582
True 91703
True 48260
True 48580
True 48434
True 48561
True 48582
True 48270
True 65411
True 48560
True 48570
True 48451
True 48571
True 48572
True 49274
True 48450
True 48550
True 48583
True 48270
True 68413
True 48560
True 48570
True 48451
True 48571
True 48572
True 49274
True 48450
True 48550
True 48583
True 91326
True 91330
True 48560
True 48570
True 48451
True 48571
True 48572
True 49274
True 48450
True 48550
True 48583
True 48270
True 48560
True 48570
True 48451
True 48571
True 48572
True 49274
True 48450
True 48550
True 48583
True 91145
True 91126
True 48560
True 48451
True 48571
True 48572
True 49274
True 48450
True 48550
True 48583
True 48570
True 48270
True 91705
True 48560
True 48570
True 48451
True 48571
True 48572
True 49274
True 48450
True 48550
True 48583
True 41055
True 65509
True 41029
True 67305
True 67509
True 68075

True 32144
True 31276
True 31280
True 32144
True 41183
True 41026
True 32144
True 32144
True 31272
True 32144
True 37335
True 37363
True 32144
True 31275
True 31272
True 31338
True 48450
True 32144
True 37343
True 37344
True 32144
True 41171
True 41174
True 41172
True 41175
True 37161
True 31250
True 31005
True 42028
True 37262
True 41052
True 32144
True 37253
True 37357
True 32931
True 31272
True 41004
True 42913
True 31256
True 32146
True 42028
True 41077
True 41042
True 41043
True 41041
True 31005
True 41170
True 31243
True 32931
True 31272
True 31245
True 32931
True 31272
True 41184
True 41180
True 32931
True 31777
True 31242
True 41889
True 31927
True 41001
True 31253
True 41113
True 41114
True 41052
True 31272
True 32931
True 41021
True 32931
True 31272
True 37234
True 32931
True 31261
True 31274
True 41903
True 41905
True 32009
True 32011
True 42037
True 42036
True 41890
True 41891
True 48436
True 48033
True 32931
True 31272
True 37345
True 37262
True 32931
True 31272
True 41171

True 25579
True 25575
True 25999
True 31265
True 31266
True 48023
True 31268
True 81511
True 31269
True 41092
True 81512
True 31271
True 81513
True 81514
True 31272
True 81515
True 81516
True 81522
True 81521
True 81531
True 81524
True 81532
True 81525
True 81523
True 81528
True 31265
True 31266
True 48023
True 31268
True 81511
True 31269
True 41092
True 81512
True 31271
True 81513
True 81514
True 31272
True 81515
True 81516
True 81522
True 81521
True 81531
True 81524
True 81532
True 81525
True 81523
True 81528
True 31265
True 31266
True 48023
True 31268
True 31269
True 41092
True 31271
True 97201
True 97600
True 97202
True 97203
True 97204
True 97621
True 99218
True 99221
True 31272
True 31265
True 31266
True 48023
True 31268
True 31269
True 41092
True 31271
True 97201
True 97600
True 97202
True 97203
True 97204
True 97621
True 99218
True 99221
True 31272
True 70102
True 70103
True 70114
True 70102
True 70103
True 70211
True 70311
True 70616
True 70311
True 70114
True 70317
True 70104

True 70211
True 70616
True 23592
True 70104
True 70327
True 23572
True 23002
True 70317
True 71116
True 70109
True 70617
True 70517
True 76008
True 76033
True 76057
True 76081
True 78039
True 76902
True 76012
True 76068
True 70417
True 23003
True 70108
True 76090
True 76040
True 23000
True 23565
True 26134
True 70102
True 23567
True 23001
True 23571
True 70103
True 23568
True 70311
True 70114
True 70211
True 70616
True 23592
True 70104
True 70327
True 23572
True 23002
True 70317
True 71116
True 70109
True 70617
True 70517
True 76008
True 76033
True 76057
True 76081
True 78039
True 76902
True 76012
True 76068
True 70417
True 23003
True 70108
True 76090
True 76040
True 70102
True 70103
True 65242
True 65111
True 35255
True 70311
True 70114
True 65212
True 70211
True 91161
True 65342
True 33116
True 70616
True 70327
True 70104
True 65312
True 70317
True 71116
True 91314
True 70517
True 70617
True 91132
True 70417
True 70109
True 65316
True 91137
True 65313
True 70108
True 76008
True 76033

True 76081
True 78039
True 76902
True 76012
True 76068
True 70102
True 70103
True 70114
True 81511
True 70211
True 70311
True 70616
True 81512
True 70327
True 70104
True 70109
True 81513
True 70317
True 70517
True 70617
True 70108
True 81514
True 71116
True 70417
True 81515
True 76008
True 76033
True 76057
True 76081
True 78039
True 76902
True 76012
True 76068
True 81516
True 81531
True 81522
True 81521
True 81528
True 81524
True 81532
True 81525
True 81523
True 70102
True 70103
True 70114
True 81511
True 70211
True 70311
True 70616
True 81512
True 70327
True 70104
True 70109
True 81513
True 70317
True 70517
True 70617
True 70108
True 81514
True 71116
True 70417
True 81515
True 76008
True 76033
True 76057
True 76081
True 78039
True 76902
True 76012
True 76068
True 81516
True 81531
True 81522
True 81521
True 81528
True 81524
True 81532
True 81525
True 81523
True 54000
True 54070
True 70102
True 54001
True 54071
True 70103
True 54002
True 54072
True 70114
True 70211
True 54073
True 70311

True 37141
True 60001
True 37131
True 37151
True 65111
True 91161
True 33116
True 60001
True 65111
True 91161
True 33116
True 60001
True 33190
True 65111
True 68101
True 60001
True 60001
True 65111
True 91107
True 33116
True 37141
True 60001
True 37131
True 37151
True 65212
True 33290
True 68201
True 91161
True 68070
True 91400
True 65621
True 65111
True 68101
True 60001
True 33130
True 65212
True 33290
True 68070
True 68201
True 65111
True 60001
True 68101
True 33130
True 65212
True 91123
True 91400
True 68041
True 65111
True 91400
True 91123
True 68041
True 65212
True 91123
True 91400
True 68041
True 65111
True 91400
True 91161
True 60001
True 37252
True 37132
True 37161
True 60001
True 37131
True 37151
True 65212
True 91123
True 91400
True 68041
True 65111
True 91400
True 91123
True 68041
True 65212
True 91123
True 91400
True 68041
True 68101
True 33130
True 68070
True 65111
True 65212
True 33290
True 68070
True 68201
True 37132
True 60001
True 37131
True 37151
True 91161
True 68041

True 65242
True 91161
True 65312
True 65212
True 91161
True 35255
True 65312
True 65111
True 65242
True 60001
True 65312
True 65212
True 91161
True 35255
True 65312
True 65111
True 65242
True 60001
True 65312
True 48023
True 31250
True 35255
True 65312
True 65212
True 65212
True 65212
True 65342
True 91314
True 65316
True 91320
True 65342
True 91314
True 60001
True 33116
True 65342
True 65202
True 65306
True 65316
True 65202
True 65342
True 65316
True 65306
True 65342
True 68041
True 65316
True 65202
True 65342
True 68041
True 65202
True 65316
True 65342
True 48730
True 65316
True 65325
True 91132
True 91137
True 91138
True 65320
True 91140
True 91326
True 91330
True 91400
True 35255
True 91132
True 91137
True 65320
True 91140
True 91330
True 91400
True 91138
True 91326
True 65409
True 65508
True 65544
True 65644
True 65411
True 65001
True 68070
True 91400
True 91161
True 35255
True 65508
True 65409
True 65321
True 65322
True 91138
True 65544
True 65644
True 65322
True 91138
True 65321

True 76012
True 76068
True 70108
True 76090
True 91145
True 91309
True 70108
True 91118
True 76090
True 91156
True 70108
True 76008
True 76033
True 76057
True 76081
True 78039
True 76902
True 76012
True 76068
True 76090
True 70108
True 91706
True 76090
True 91403
True 76040
True 76008
True 76033
True 76057
True 76081
True 78039
True 76902
True 76012
True 76068
True 76040
True 91338
True 91177
True 91180
True 76040
True 76040
True 76008
True 76033
True 76057
True 76081
True 78039
True 76902
True 76012
True 76068
True 76008
True 76033
True 76057
True 76081
True 78039
True 76902
True 76012
True 76068
True 76040
True 76040
True 76008
True 76033
True 76057
True 76081
True 78039
True 76902
True 76012
True 76068
True 76040
True 70102
True 70103
True 91161
True 65111
True 70311
True 70114
True 91400
True 70211
True 70616
True 33116
True 70104
True 65212
True 68041
True 91239
True 70327
True 70317
True 71116
True 91703
True 70109
True 91705
True 70617
True 91175
True 91707
True 91320
True 70517

True 91154
True 21129
True 24108
True 91107
True 33116
True 23115
True 26134
True 37141
True 37233
True 21129
True 24108
True 37141
True 37233
True 23115
True 26134
True 91703
True 91314
True 21129
True 24108
True 91707
True 91314
True 23115
True 26134
True 68101
True 33360
True 21129
True 24108
True 33230
True 68101
True 23115
True 26134
True 68101
True 33360
True 23115
True 26134
True 65212
True 68201
True 25300
True 91132
True 91330
True 22207
True 23115
True 26134
True 91400
True 65212
True 25300
True 91400
True 91401
True 22207
True 23115
True 26134
True 65212
True 91400
True 25300
True 68201
True 65409
True 22207
True 23115
True 26134
True 65212
True 68201
True 25300
True 91363
True 91161
True 22207
True 23115
True 26134
True 65621
True 91363
True 25300
True 91161
True 91170
True 22207
True 23115
True 26134
True 65621
True 91161
True 25300
True 22207
True 37171
True 37181
True 23115
True 26134
True 37132
True 37161
True 25300
True 68041
True 22207
True 91239
True 23115
True 26134

True 91563
True 91314
True 91320
True 91142
True 91707
True 91320
True 91314
True 91703
True 91707
True 37233
True 37242
True 37262
True 37233
True 37242
True 37262
True 91154
True 91110
True 91168
True 91121
True 81513
True 81513
True 81513
True 81513
True 81513
True 81513
True 81513
True 81513
True 65508
True 65411
True 65409
True 68412
True 68413
True 68206
True 91132
True 91326
True 91330
True 91401
True 91132
True 91326
True 91330
True 91401
True 91132
True 91239
True 91330
True 91401
True 91148
True 37171
True 37335
True 37181
True 37253
True 37336
True 37343
True 37344
True 37357
True 37363
True 37171
True 37335
True 37253
True 37357
True 37363
True 91159
True 91363
True 91157
True 91270
True 91170
True 91155
True 81514
True 81514
True 81514
True 81514
True 81514
True 81514
True 81514
True 81514
True 65509
True 65545
True 65606
True 67510
True 65005
True 68316
True 68606
True 68416
True 68414
True 37234
True 37345
True 37356
True 37373
True 37234
True 37356
True 37373
True 91309

True 81522
True 81521
True 81531
True 81521
True 81531
True 81521
True 81531
True 81521
True 81531
True 81524
True 81532
True 81525
True 81523
True 81528
True 81524
True 81532
True 81525
True 81523
True 81528
True 81524
True 81532
True 81525
True 81523
True 81528
True 81524
True 81532
True 81525
True 81523
True 81528
True 70102
True 70103
True 65242
True 33116
True 70102
True 70103
True 65242
True 65111
True 70102
True 70103
True 65242
True 65111
True 70102
True 70103
True 65242
True 65111
True 35255
True 70311
True 70114
True 65312
True 65212
True 70311
True 70114
True 65312
True 65312
True 70311
True 70114
True 65312
True 70311
True 70114
True 65212
True 65212
True 70211
True 65316
True 65325
True 41092
True 70211
True 91161
True 33116
True 68041
True 70211
True 65342
True 33116
True 70211
True 65342
True 65202
True 33116
True 70616
True 70104
True 48023
True 70109
True 70616
True 70104
True 35255
True 70109
True 70616
True 70104
True 91161
True 70109
True 70616
True 70104
True 35255

True 37233
True 97601
True 24309
True 37253
True 97602
True 31005
True 31253
True 97602
True 31253
True 37253
True 97602
True 31253
True 37335
True 97602
True 31253
True 31250
True 97603
True 97600
True 31250
True 97603
True 97600
True 37262
True 97603
True 97600
True 37262
True 97603
True 97600
True 97604
True 97642
True 31005
True 31253
True 97604
True 97642
True 31005
True 31253
True 97604
True 97642
True 97604
True 97642
True 99218
True 99218
True 99218
True 99218
True 99221
True 99221
True 99221
True 99221
True 37356
True 37373
True 37242
True 37373
True 37253
True 37262
True 31250
True 37356
True 37373
True 31250
True 37234
True 37242
True 31253
True 37373
True 37357
True 37399
True 37343
True 37344
True 37399
True 37357
True 37399
True 31005
True 31253
True 37363
True 37399
True 33116
True 65111
True 65242
True 60001
True 33116
True 65111
True 65242
True 60001
True 33116
True 65111
True 65242
True 60001
True 33116
True 31268
True 41092
True 65242
True 65212
True 91161
True 35255

In [10]:
print(courseAreas)

{'Analytics and Data Science': {'abbreviation': 'ads', 'url': 'https://www.handbook.uts.edu.au/ads/index.html', 'ug_degrees': None, 'pg_degrees': [{'name': 'Doctor of Philosophy', 'degreeCode': 'c02062', 'subjects': {}}, {'name': 'Doctor of Philosophy', 'degreeCode': 'c03064', 'subjects': {}}, {'name': 'Master of Learning Analytics (Research)', 'degreeCode': 'c03064', 'subjects': {}}, {'name': 'Master of Data Science and Innovation', 'degreeCode': 'c04372', 'subjects': {}}, {'name': 'Master of Data Science and Innovation', 'degreeCode': 'c06124', 'subjects': {}}, {'name': 'Graduate Diploma in Data Science and Innovation', 'degreeCode': 'c06124', 'subjects': {}}, {'name': 'Graduate Diploma in Data Science and Innovation', 'degreeCode': 'c11274', 'subjects': {}}, {'name': 'Graduate Certificate in Data Science and Innovation', 'degreeCode': 'c11274', 'subjects': {}}]}, 'Business': {'abbreviation': 'bus', 'url': 'https://www.handbook.uts.edu.au/bus/index.html', 'ug_degrees': [{'name': 'Bac

### Degree Data is written to JSON file

In [27]:
with open('courseData.json', 'w') as fp:
    json.dump(courseAreas, fp, indent = 4)

In [32]:
with open('courseMajorsData.json', 'w') as fp:
    json.dump(majors, fp, indent = 4)

In [20]:
with open('subjectData.json', 'w') as fp:
    json.dump(subjects, fp, indent = 4)