#  CAO Points Analysis


https://www.independent.ie/life/family/learning/understanding-your-cao-course-guide-26505318.html


The Mid point is the points score of the applicant in the middle of a list of offerees placed in points score order


Where the letters AQA appear instead of points, it means that all qualified applicants for a course were offered a place. AQA sometimes suggests that the points of the last qualified applicant being offered a place were less than 100.

matriculation - http://www2.cao.ie/downloads/documents/Guidelines-EU-EFTA.pdf


# Imports

In [218]:
# Regular expressions 
import re
# Convenient HTTP requests
import requests as rq
# Dates and times
import datetime as dt
# Import numpy module
import numpy as np
# For downloading.
import urllib.request as urlrq
import urllib.parse as urlpar
# Import tabula to read table in pdf
import tabula as tb
# Import mathplotlib
import matplotlib.pyplot as plt
# Import time
import time
# import pandas
import pandas as pd 
# import seaborn
import seaborn as sns
# import warnings
import warnings
warnings.filterwarnings("ignore")

# Error Checks

In [219]:
########################################################################
# Set Datetime Globally for referencing
########################################################################
# Get the current date and time
now = dt.datetime.now()

# format as a string
nowstr = now.strftime('%Y%m%d_%H%M%S')

print(nowstr)

20211221_031223


In [220]:
########################################################################
# Function to test URLs
# https://pytutorial.com/check-url-is-reachable
########################################################################
def url_checker(url):
	try:
		#Get Url
		resp = rq.get(url)
		# if the request succeeds
		if resp.status_code == 404:
			print(f"{url}: is not reachable")
		else:
			print(f"{url}: is reachable")
			

	#Exception
	except rq.exceptions.RequestException as e:
        # print URL with Errs
		raise SystemExit(print(f"{url}: is Not reachable \nErr: {e}"))

# Save functions

In [221]:
########################################################################
# Function to save CAO Webpages 
########################################################################

def htmlcopy(url):
    # Fetch the CAO points URL
    resp = rq.get(url)
    # Check connection '<Response [200]>' means OK
    if resp.status_code == 200:
        print(f"{url}: is reachable")

        path = 'data/' + nowstr + '_CAO_Webpage_' + url[-4:] + '.html'
        print(str(path))

        # Save the original html file.
        with open(path, 'w') as f:
            f.write(resp.text)
    else:
        print(f"{url}: is not reachable")
    

In [222]:
########################################################################
# Function to save files
########################################################################

def caosavefile(url):
    split = urlpar.urlsplit(url)
    path = 'data/' + nowstr + '_CAO_file_' + split.path.split("/")[-1]
    print(str(path))
    urlrq.urlretrieve(url, path)

# CAO Webpage links

In [223]:
########################################################################
# CAO web pages
# https://www.cao.ie/index.php?page=points&p=2018
# https://www.cao.ie/index.php?page=points&p=2019
# https://www.cao.ie/index.php?page=points&p=2020
# https://www.cao.ie/index.php?page=points&p=2021
########################################################################

html2018 = 'https://www.cao.ie/index.php?page=points&p=2018'
html2019 = 'https://www.cao.ie/index.php?page=points&p=2019'
html2020 = 'https://www.cao.ie/index.php?page=points&p=2020'
html2021 = 'https://www.cao.ie/index.php?page=points&p=2021'

caopointshtml = [html2018,html2019,html2020,html2021]

for url in caopointshtml:
    htmlcopy(url)

https://www.cao.ie/index.php?page=points&p=2018: is reachable
data/20211221_031223_CAO_Webpage_2018.html
https://www.cao.ie/index.php?page=points&p=2019: is reachable
data/20211221_031223_CAO_Webpage_2019.html
https://www.cao.ie/index.php?page=points&p=2020: is reachable
data/20211221_031223_CAO_Webpage_2020.html
https://www.cao.ie/index.php?page=points&p=2021: is reachable
data/20211221_031223_CAO_Webpage_2021.html


# CAO Points location

In [224]:
########################################################################
# CAO points files
# http://www2.cao.ie/points/lvl8_18.pdf
# http://www2.cao.ie/points/lvl76_18.pdf
# http://www2.cao.ie/points/lvl8_19.pdf
# http://www2.cao.ie/points/lvl76_19.pdf
# http://www2.cao.ie/points/CAOPointsCharts2020.xlsx
# http://www2.cao.ie/points/CAOPointsCharts2021.xlsx
########################################################################
CAO2021 = 'http://www2.cao.ie/points/CAOPointsCharts2021.xlsx'
CAO2020 = 'http://www2.cao.ie/points/CAOPointsCharts2020.xlsx'
CAO2019_8 = 'http://www2.cao.ie/points/lvl8_19.pdf'
CAO2019_76 = 'http://www2.cao.ie/points/lvl76_19.pdf'
CAO2018_8 = 'http://www2.cao.ie/points/lvl8_18.pdf'
CAO2018_76 = 'http://www2.cao.ie/points/lvl76_18.pdf'

########################################################################
# List of URL 
# Use function to test if available
########################################################################
caopointslist = [CAO2021,CAO2020,CAO2019_8, CAO2019_76,CAO2018_8, CAO2018_76]

for url in caopointslist:
    url_checker(url)
    caosavefile(url)
    

http://www2.cao.ie/points/CAOPointsCharts2021.xlsx: is reachable
data/20211221_031223_CAO_file_CAOPointsCharts2021.xlsx
http://www2.cao.ie/points/CAOPointsCharts2020.xlsx: is reachable
data/20211221_031223_CAO_file_CAOPointsCharts2020.xlsx
http://www2.cao.ie/points/lvl8_19.pdf: is reachable
data/20211221_031223_CAO_file_lvl8_19.pdf
http://www2.cao.ie/points/lvl76_19.pdf: is reachable
data/20211221_031223_CAO_file_lvl76_19.pdf
http://www2.cao.ie/points/lvl8_18.pdf: is reachable
data/20211221_031223_CAO_file_lvl8_18.pdf
http://www2.cao.ie/points/lvl76_18.pdf: is reachable
data/20211221_031223_CAO_file_lvl76_18.pdf


# Set up data 

# 2021 Points
# Load the spreadsheet using pandas

https://www.cao.ie/index.php?page=points&p=2021

In [225]:
# Download and parse the excel spreadsheet.
# 1451 rows × 15 columns
# Skip first 10 Rows
# # https://stackoverflow.com/questions/11346283/renaming-column-names-in-pandas
df = pd.read_excel(CAO2021, skiprows=11)


DF2021 = df[['Course Code','Course Title','R1 Points','R2 Points ','EOS Points','EOS Midpoints', 'CATEGORY (ISCED Description)', 'HEI']] 

DF2021 = DF2021.rename(columns={'Course Code': 'Code', 'Course Title': 'Title', 'R1 Points': 'R1 Points', 'R2 Points ': 'R2 Points', 'EOS Points': 'EOS',
        'EOS Midpoints': 'Mid', 'CATEGORY (ISCED Description)': 'Category', 'HEI': 'College'})

DF2021

Unnamed: 0,Code,Title,R1 Points,R2 Points,EOS,Mid,Category,College
0,AL605,Music and Instrument Technology,211,,211,319,Engineering and engineering trades,Athlone Institute of Technology
1,AL630,Pharmacy Technician,308,,308,409,Health,Athlone Institute of Technology
2,AL631,Dental Nursing,311,,311,400,Health,Athlone Institute of Technology
3,AL632,Applied Science,297,,297,454,Biological and related sciences,Athlone Institute of Technology
4,AL650,Business,AQA,AQA,AQA,351,Business and administration,Athlone Institute of Technology
...,...,...,...,...,...,...,...,...
1446,WD211,Creative Computing,270,,270,392,Information and Communication Technologies (ICTs),Waterford Institute of Technology
1447,WD212,Recreation and Sport Management,262,,262,304,Personal services,Waterford Institute of Technology
1448,WD230,Mechanical and Manufacturing Engineering,230,230,230,361,Engineering and engineering trades,Waterford Institute of Technology
1449,WD231,Early Childhood Care and Education,266,,266,366,Welfare,Waterford Institute of Technology


# 2020 Points
# Load the spreadsheet using pandas

https://www.cao.ie/index.php?page=points&p=2020


In [226]:
# Download and parse the excel spreadsheet.
# 1464 rows × 23 columns
# Skip first 10 Rows
df = pd.read_excel(CAO2020, skiprows=10)


DF2020 = df[['COURSE CODE2','COURSE TITLE','R1 POINTS','R2 POINTS','EOS','EOS Mid-point','CATEGORY (i.e.ISCED description)', 'HEI']] 

DF2020 = DF2020.rename(columns={'COURSE CODE2': 'Code', 'COURSE TITLE': 'Title', 'R1 POINTS': 'R1 Points', 'R2 POINTS': 'R2 Points', 'EOS Mid-point': 'Mid',
            'CATEGORY (i.e.ISCED description)': 'Category', 'HEI': 'College'})

DF2020

Unnamed: 0,Code,Title,R1 Points,R2 Points,EOS,Mid,Category,College
0,AC120,International Business,209,,209,280,Business and administration,American College
1,AC137,Liberal Arts,252,,252,270,Humanities (except languages),American College
2,AD101,"First Year Art & Design (Common Entry,portfolio)",#+matric,,#+matric,#+matric,Arts,National College of Art and Design
3,AD102,Graphic Design and Moving Image Design (portfo...,#+matric,,#+matric,#+matric,Arts,National College of Art and Design
4,AD103,Textile & Surface Design and Jewellery & Objec...,#+matric,,#+matric,#+matric,Arts,National College of Art and Design
...,...,...,...,...,...,...,...,...
1459,WD208,Manufacturing Engineering,188,,188,339,Manufacturing and processing,Waterford Institute of Technology
1460,WD210,Software Systems Development,279,,279,337,Information and Communication Technologies (ICTs),Waterford Institute of Technology
1461,WD211,Creative Computing,271,,271,318,Information and Communication Technologies (ICTs),Waterford Institute of Technology
1462,WD212,Recreation and Sport Management,270,,270,349,Personal services,Waterford Institute of Technology


# 2019 Points
# Load the spreads pdf

https://www.cao.ie/index.php?page=points&p=2019

- CAO2019_8 = 'http://www2.cao.ie/points/lvl8_19.pdf' - 930 rows × 4 columns
- CAO2019_76 = 'http://www2.cao.ie/points/lvl76_19.pdf' - 461 rows × 4 columns


https://stackoverflow.com/questions/37826926/how-to-trim-starting-spaces-of-entire-column-in-libreoffice-or-google-sheets

Remove all special charcters



In [227]:
# Read csv as a pandas dataframe
df2019lvl8 = pd.read_csv('lvl8_19.csv', sep=',')

df2019lvl8

Unnamed: 0,Code,Title,EOS,Mid
0,AL801,Software Design with Virtual Reality and Gaming,304,328.0
1,AL802,Software Design with Cloud Computing,301,306.0
2,AL803,Software Design with Mobile Apps and Connected...,309,337.0
3,AL805,Network Management and Cloud Infrastructure,329,442.0
4,AL810,Quantity Surveying,307,349.0
...,...,...,...,...
925,WD200,Arts options,221,296.0
926,WD210,Software Systems Development,271,329.0
927,WD211,Creative Computing,275,322.0
928,WD212,Recreation and Sport Management,274,311.0


In [228]:
# Read csv as a pandas dataframe
df2019lvl76 = pd.read_csv('lvl76_19.csv', sep=',')

df2019lvl76

Unnamed: 0,Code,Title,EOS,Mid
0,AL600,Software Design,205,306.0
1,AL601,Computer Engineering,196,272.0
2,AL602,Mechanical Engineering,258,424.0
3,AL604,Civil Engineering,252,360.0
4,AL630,Pharmacy Technician,306,366.0
...,...,...,...,...
456,WD188,Applied Health Care,206,339.0
457,WD205,Molecular Biology with Biopharmaceutical Science,208,441.0
458,WD206,Electronic Engineering,191,322.0
459,WD207,Mechanical Engineering,179,330.0


# 2018 Points
# Load the spreads pdf

https://www.cao.ie/index.php?page=points&p=2018

- CAO2018_8 = 'http://www2.cao.ie/points/lvl8_18.pdf' - 914 rows × 4 columns
- CAO2018_76 = 'http://www2.cao.ie/points/lvl76_18.pdf' - 471 rows × 4 columns

In [229]:
# Read csv as a pandas dataframe
df2018lvl8 = pd.read_csv('lvl8_18.csv', sep=',')

df2018lvl8

Unnamed: 0,Code,Title,EOS,Mid
0,AL801,Software Design Game Development or Cloud Comp...,295,326.0
1,AL810,Quantity Surveying,300,340.0
2,AL820,Mechanical and Polymer Engineering,299,371.0
3,AL830,General Nursing,418,440.0
4,AL832,Psychiatric Nursing,377,388.0
...,...,...,...,...
909,WD197,The Internet of Things,260,329.0
910,WD200,Arts,220,299.0
911,WD210,Software Systems Development,289,327.0
912,WD211,Creative Computing,265,326.0


In [230]:
# Read csv as a pandas dataframe
df2018lvl76 = pd.read_csv('lvl76_18.csv', sep=',')

df2018lvl76

Unnamed: 0,Code,Title,EOS,Mid
0,AL601,Electronics and Computer Engineering,240,321.0
1,AL602,Mechanical Engineering,201,299.0
2,AL604,Civil Engineering,243,320.0
3,AL630,Pharmacy Technician,306,388.0
4,AL631,Dental Nursing,307,348.0
...,...,...,...,...
466,WD205,Molecular Biology with Biopharmaceutical Science,217,398.0
467,WD206,Electronic Engineering,175,330.0
468,WD207,Mechanical Engineering,182,362.0
469,WD208,Manufacturing Engineering,180,298.0


# Join all Dataframes

DF2021 [['Code', 'Title', 'R1 Points', 'R2 Points', 'EOS', 'Mid', 'Category', 'College']]

DF2020 [['Code', 'Title', 'R1 Points', 'R2 Points', 'EOS', 'Mid', 'Category', 'College']]

df2019lvl8 [['Code', 'Title',  'EOS', 'Mid']]

df2019lvl76 [['Code', 'Title',  'EOS', 'Mid']]

df2018lvl8 [['Code', 'Title',  'EOS', 'Mid']]

df2018lvl76  [['Code', 'Title',  'EOS', 'Mid']]





Estimate = 1874 courses

In [231]:
DF2021

Unnamed: 0,Code,Title,R1 Points,R2 Points,EOS,Mid,Category,College
0,AL605,Music and Instrument Technology,211,,211,319,Engineering and engineering trades,Athlone Institute of Technology
1,AL630,Pharmacy Technician,308,,308,409,Health,Athlone Institute of Technology
2,AL631,Dental Nursing,311,,311,400,Health,Athlone Institute of Technology
3,AL632,Applied Science,297,,297,454,Biological and related sciences,Athlone Institute of Technology
4,AL650,Business,AQA,AQA,AQA,351,Business and administration,Athlone Institute of Technology
...,...,...,...,...,...,...,...,...
1446,WD211,Creative Computing,270,,270,392,Information and Communication Technologies (ICTs),Waterford Institute of Technology
1447,WD212,Recreation and Sport Management,262,,262,304,Personal services,Waterford Institute of Technology
1448,WD230,Mechanical and Manufacturing Engineering,230,230,230,361,Engineering and engineering trades,Waterford Institute of Technology
1449,WD231,Early Childhood Care and Education,266,,266,366,Welfare,Waterford Institute of Technology


In [232]:

lvl76_18 = df2018lvl76[['Code', 'Title' ]]

lvl8_18  = df2018lvl8[['Code', 'Title']]

lvl76_19  = df2019lvl76[['Code', 'Title']]

lvl8_19 = df2019lvl8[['Code']]

lvl_20 = DF2020[['Code', 'Title' ]]

lvl_21 = DF2021[['Code' , 'Title']]


In [233]:
# https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html
# Estimate = 1874 indiviual courses codes

frames = [lvl76_18, lvl8_18, lvl76_19, lvl8_19, lvl_20, lvl_21 ]

Codes = pd.concat(frames)

Codes

Unnamed: 0,Code,Title
0,AL601,Electronics and Computer Engineering
1,AL602,Mechanical Engineering
2,AL604,Civil Engineering
3,AL630,Pharmacy Technician
4,AL631,Dental Nursing
...,...,...
1446,WD211,Creative Computing
1447,WD212,Recreation and Sport Management
1448,WD230,Mechanical and Manufacturing Engineering
1449,WD231,Early Childhood Care and Education


In [234]:
AllCourses = Codes.drop_duplicates('Code').reset_index(drop=True)

AllCourses

Unnamed: 0,Code,Title
0,AL601,Electronics and Computer Engineering
1,AL602,Mechanical Engineering
2,AL604,Civil Engineering
3,AL630,Pharmacy Technician
4,AL631,Dental Nursing
...,...,...
1869,SG350,Robotics and Automation
1870,TL774,Inclusive Sport and Physical Activity
1871,TL874,Inclusive Sport and Physical Activity
1872,WD231,Early Childhood Care and Education


In [235]:
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html

# Remove Duplicates

AllCourses = Codes.drop_duplicates('Code').reset_index(drop=True)

# Set indexes of Code for df
AllCourses.set_index('Code', inplace=True)

DF2021.set_index('Code', inplace=True)

AllCourses

Unnamed: 0_level_0,Title
Code,Unnamed: 1_level_1
AL601,Electronics and Computer Engineering
AL602,Mechanical Engineering
AL604,Civil Engineering
AL630,Pharmacy Technician
AL631,Dental Nursing
...,...
SG350,Robotics and Automation
TL774,Inclusive Sport and Physical Activity
TL874,Inclusive Sport and Physical Activity
WD231,Early Childhood Care and Education


DF2021 [['Code', 'Title', 'R1 Points', 'R2 Points', 'EOS', 'Mid', 'Category', 'College']]

DF2020 [['Code', 'Title', 'R1 Points', 'R2 Points', 'EOS', 'Mid', 'Category', 'College']]

df2019lvl8 [['Code', 'Title',  'EOS', 'Mid']]

df2019lvl76 [['Code', 'Title',  'EOS', 'Mid']]

df2018lvl8 [['Code', 'Title',  'EOS', 'Mid']]

df2018lvl76  [['Code', 'Title',  'EOS', 'Mid']]

In [236]:
AllCourses = AllCourses.join(DF2021[['R1 Points', 'R2 Points', 'EOS', 'Mid']])

AllCourses

Unnamed: 0_level_0,Title,R1 Points,R2 Points,EOS,Mid
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AL601,Electronics and Computer Engineering,,,,
AL602,Mechanical Engineering,,,,
AL604,Civil Engineering,,,,
AL630,Pharmacy Technician,308,,308,409
AL631,Dental Nursing,311,,311,400
...,...,...,...,...,...
SG350,Robotics and Automation,384,,384,478
TL774,Inclusive Sport and Physical Activity,121,121,121,378
TL874,Inclusive Sport and Physical Activity,341,,341,476
WD231,Early Childhood Care and Education,266,,266,366


In [None]:
DF2020.set_index('Code', inplace=True)
AllCourses.set_index('Code', inplace=True)
AllCourses = AllCourses.join(DF2020[['R1 Points', 'R2 Points', 'EOS', 'Mid']])


In [None]:
df2019lvl8.set_index('Code', inplace=True)

AllCourses = AllCourses.join(df2019lvl8[['EOS', 'Mid']])


In [None]:

df2019lvl76.set_index('Code', inplace=True)

AllCourses = AllCourses.join(df2019lvl76[['EOS', 'Mid']])


In [None]:
df2018lvl76.set_index('Code', inplace=True)

AllCourses = AllCourses.join(df2018lvl8[['EOS', 'Mid']])


In [None]:
df2018lvl8.set_index('Code', inplace=True)

AllCourses = AllCourses.join(df2018lvl76[['EOS', 'Mid']])

AllCourses

# End