#  CAO Points Analysis


https://www.independent.ie/life/family/learning/understanding-your-cao-course-guide-26505318.html


The Mid point is the points score of the applicant in the middle of a list of offerees placed in points score order


Where the letters AQA appear instead of points, it means that all qualified applicants for a course were offered a place. AQA sometimes suggests that the points of the last qualified applicant being offered a place were less than 100.

matriculation - http://www2.cao.ie/downloads/documents/Guidelines-EU-EFTA.pdf


# Imports

In [2]:
# Regular expressions 
import re
# Convenient HTTP requests
import requests as rq
# Dates and times
import datetime as dt
# Import numpy module
import numpy as np
# For downloading.
import urllib.request as urlrq
import urllib.parse as urlpar
# Import tabula to read table in pdf
import tabula as tb
# Import mathplotlib
import matplotlib.pyplot as plt
# Import time
import time
# import pandas
import pandas as pd 
# import seaborn
import seaborn as sns
# import warnings
import warnings
warnings.filterwarnings("ignore")

# Error Checks

In [3]:
########################################################################
# Set Datetime Globally for referencing
########################################################################
# Get the current date and time
now = dt.datetime.now()

# format as a string
nowstr = now.strftime('%Y%m%d_%H%M%S')

print(nowstr)

20211221_012429


In [4]:
########################################################################
# Function to test URLs
# https://pytutorial.com/check-url-is-reachable
########################################################################
def url_checker(url):
	try:
		#Get Url
		resp = rq.get(url)
		# if the request succeeds
		if resp.status_code == 404:
			print(f"{url}: is not reachable")
		else:
			print(f"{url}: is reachable")
			

	#Exception
	except rq.exceptions.RequestException as e:
        # print URL with Errs
		raise SystemExit(print(f"{url}: is Not reachable \nErr: {e}"))

# Save functions

In [5]:
########################################################################
# Function to save CAO Webpages 
########################################################################

def htmlcopy(url):
    # Fetch the CAO points URL
    resp = rq.get(url)
    # Check connection '<Response [200]>' means OK
    if resp.status_code == 200:
        print(f"{url}: is reachable")

        path = 'data/' + nowstr + '_CAO_Webpage_' + url[-4:] + '.html'
        print(str(path))

        # Save the original html file.
        with open(path, 'w') as f:
            f.write(resp.text)
    else:
        print(f"{url}: is not reachable")
    

In [6]:
########################################################################
# Function to save files
########################################################################

def caosavefile(url):
    split = urlpar.urlsplit(url)
    path = 'data/' + nowstr + '_CAO_file_' + split.path.split("/")[-1]
    print(str(path))
    urlrq.urlretrieve(url, path)

# CAO Webpage links

In [7]:
########################################################################
# CAO web pages
# https://www.cao.ie/index.php?page=points&p=2018
# https://www.cao.ie/index.php?page=points&p=2019
# https://www.cao.ie/index.php?page=points&p=2020
# https://www.cao.ie/index.php?page=points&p=2021
########################################################################

html2018 = 'https://www.cao.ie/index.php?page=points&p=2018'
html2019 = 'https://www.cao.ie/index.php?page=points&p=2019'
html2020 = 'https://www.cao.ie/index.php?page=points&p=2020'
html2021 = 'https://www.cao.ie/index.php?page=points&p=2021'

caopointshtml = [html2018,html2019,html2020,html2021]

for url in caopointshtml:
    htmlcopy(url)

https://www.cao.ie/index.php?page=points&p=2018: is reachable
data/20211221_012429_CAO_Webpage_2018.html
https://www.cao.ie/index.php?page=points&p=2019: is reachable
data/20211221_012429_CAO_Webpage_2019.html
https://www.cao.ie/index.php?page=points&p=2020: is reachable
data/20211221_012429_CAO_Webpage_2020.html
https://www.cao.ie/index.php?page=points&p=2021: is reachable
data/20211221_012429_CAO_Webpage_2021.html


# CAO Points location

In [8]:
########################################################################
# CAO points files
# http://www2.cao.ie/points/lvl8_18.pdf
# http://www2.cao.ie/points/lvl76_18.pdf
# http://www2.cao.ie/points/lvl8_19.pdf
# http://www2.cao.ie/points/lvl76_19.pdf
# http://www2.cao.ie/points/CAOPointsCharts2020.xlsx
# http://www2.cao.ie/points/CAOPointsCharts2021.xlsx
########################################################################
CAO2021 = 'http://www2.cao.ie/points/CAOPointsCharts2021.xlsx'
CAO2020 = 'http://www2.cao.ie/points/CAOPointsCharts2020.xlsx'
CAO2019_8 = 'http://www2.cao.ie/points/lvl8_19.pdf'
CAO2019_76 = 'http://www2.cao.ie/points/lvl76_19.pdf'
CAO2018_8 = 'http://www2.cao.ie/points/lvl8_18.pdf'
CAO2018_76 = 'http://www2.cao.ie/points/lvl76_18.pdf'

########################################################################
# List of URL 
# Use function to test if available
########################################################################
caopointslist = [CAO2021,CAO2020,CAO2019_8, CAO2019_76,CAO2018_8, CAO2018_76]

for url in caopointslist:
    url_checker(url)
    caosavefile(url)
    

http://www2.cao.ie/points/CAOPointsCharts2021.xlsx: is reachable
data/20211221_012429_CAO_file_CAOPointsCharts2021.xlsx
http://www2.cao.ie/points/CAOPointsCharts2020.xlsx: is reachable
data/20211221_012429_CAO_file_CAOPointsCharts2020.xlsx
http://www2.cao.ie/points/lvl8_19.pdf: is reachable
data/20211221_012429_CAO_file_lvl8_19.pdf
http://www2.cao.ie/points/lvl76_19.pdf: is reachable
data/20211221_012429_CAO_file_lvl76_19.pdf
http://www2.cao.ie/points/lvl8_18.pdf: is reachable
data/20211221_012429_CAO_file_lvl8_18.pdf
http://www2.cao.ie/points/lvl76_18.pdf: is reachable
data/20211221_012429_CAO_file_lvl76_18.pdf


# Set up data 

# 2021 Points
# Load the spreadsheet using pandas

https://www.cao.ie/index.php?page=points&p=2021

In [9]:
# Download and parse the excel spreadsheet.
# 1451 rows × 15 columns
# Skip first 10 Rows
# # https://stackoverflow.com/questions/11346283/renaming-column-names-in-pandas
df = pd.read_excel(CAO2021, skiprows=11)


DF2021 = df[['Course Code','Course Title','R1 Points','R2 Points ','EOS Points','EOS Midpoints', 'CATEGORY (ISCED Description)', 'HEI']] 

DF2021 = DF2021.rename(columns={'Course Code': 'Code', 'Course Title': 'Title', 'R1 Points': 'R1 Points', 'R2 Points ': 'R2 Points', 'EOS Points': 'EOS',
        'EOS Midpoints': 'Mid', 'CATEGORY (ISCED Description)': 'Category', 'HEI': 'College'})

DF2021

Unnamed: 0,Code,Title,R1 Points,R2 Points,EOS,Mid,Category,College
0,AL605,Music and Instrument Technology,211,,211,319,Engineering and engineering trades,Athlone Institute of Technology
1,AL630,Pharmacy Technician,308,,308,409,Health,Athlone Institute of Technology
2,AL631,Dental Nursing,311,,311,400,Health,Athlone Institute of Technology
3,AL632,Applied Science,297,,297,454,Biological and related sciences,Athlone Institute of Technology
4,AL650,Business,AQA,AQA,AQA,351,Business and administration,Athlone Institute of Technology
...,...,...,...,...,...,...,...,...
1446,WD211,Creative Computing,270,,270,392,Information and Communication Technologies (ICTs),Waterford Institute of Technology
1447,WD212,Recreation and Sport Management,262,,262,304,Personal services,Waterford Institute of Technology
1448,WD230,Mechanical and Manufacturing Engineering,230,230,230,361,Engineering and engineering trades,Waterford Institute of Technology
1449,WD231,Early Childhood Care and Education,266,,266,366,Welfare,Waterford Institute of Technology


# 2020 Points
# Load the spreadsheet using pandas

https://www.cao.ie/index.php?page=points&p=2020


In [10]:
# Download and parse the excel spreadsheet.
# 1464 rows × 23 columns
# Skip first 10 Rows
df = pd.read_excel(CAO2020, skiprows=10)


DF2020 = df[['COURSE CODE2','COURSE TITLE','R1 POINTS','R2 POINTS','EOS','EOS Mid-point','CATEGORY (i.e.ISCED description)', 'HEI']] 

DF2020 = DF2020.rename(columns={'COURSE CODE2': 'Code', 'COURSE TITLE': 'Title', 'R1 POINTS': 'R1 Points', 'R2 POINTS': 'R2 Points', 'EOS Mid-point': 'Mid',
            'CATEGORY (i.e.ISCED description)': 'Category', 'HEI': 'College'})

DF2020

Unnamed: 0,Code,Title,R1 Points,R2 Points,EOS,Mid,Category,College
0,AC120,International Business,209,,209,280,Business and administration,American College
1,AC137,Liberal Arts,252,,252,270,Humanities (except languages),American College
2,AD101,"First Year Art & Design (Common Entry,portfolio)",#+matric,,#+matric,#+matric,Arts,National College of Art and Design
3,AD102,Graphic Design and Moving Image Design (portfo...,#+matric,,#+matric,#+matric,Arts,National College of Art and Design
4,AD103,Textile & Surface Design and Jewellery & Objec...,#+matric,,#+matric,#+matric,Arts,National College of Art and Design
...,...,...,...,...,...,...,...,...
1459,WD208,Manufacturing Engineering,188,,188,339,Manufacturing and processing,Waterford Institute of Technology
1460,WD210,Software Systems Development,279,,279,337,Information and Communication Technologies (ICTs),Waterford Institute of Technology
1461,WD211,Creative Computing,271,,271,318,Information and Communication Technologies (ICTs),Waterford Institute of Technology
1462,WD212,Recreation and Sport Management,270,,270,349,Personal services,Waterford Institute of Technology


# 2019 Points
# Load the spreads pdf

https://www.cao.ie/index.php?page=points&p=2019

- CAO2019_8 = 'http://www2.cao.ie/points/lvl8_19.pdf' - 930 rows × 4 columns
- CAO2019_76 = 'http://www2.cao.ie/points/lvl76_19.pdf' - 461 rows × 4 columns


https://stackoverflow.com/questions/37826926/how-to-trim-starting-spaces-of-entire-column-in-libreoffice-or-google-sheets

Remove all special charcters



In [18]:
# Read csv as a pandas dataframe
df2019lvl8 = pd.read_csv('lvl8_19.csv', sep=',')

df2019lvl8

Unnamed: 0,Code,Course,EOS,Mid
0,AL801,Software Design with Virtual Reality and Gaming,304,328.0
1,AL802,Software Design with Cloud Computing,301,306.0
2,AL803,Software Design with Mobile Apps and Connected...,309,337.0
3,AL805,Network Management and Cloud Infrastructure,329,442.0
4,AL810,Quantity Surveying,307,349.0
...,...,...,...,...
925,WD200,Arts options,221,296.0
926,WD210,Software Systems Development,271,329.0
927,WD211,Creative Computing,275,322.0
928,WD212,Recreation and Sport Management,274,311.0


In [19]:
# Read csv as a pandas dataframe
df2019lvl76 = pd.read_csv('lvl76_19.csv', sep=',')

df2019lvl76

Unnamed: 0,Code,Course,EOS,Mid
0,AL600,Software Design,205,306.0
1,AL601,Computer Engineering,196,272.0
2,AL602,Mechanical Engineering,258,424.0
3,AL604,Civil Engineering,252,360.0
4,AL630,Pharmacy Technician,306,366.0
...,...,...,...,...
456,WD188,Applied Health Care,206,339.0
457,WD205,Molecular Biology with Biopharmaceutical Science,208,441.0
458,WD206,Electronic Engineering,191,322.0
459,WD207,Mechanical Engineering,179,330.0


# 2018 Points
# Load the spreads pdf

https://www.cao.ie/index.php?page=points&p=2018

- CAO2018_8 = 'http://www2.cao.ie/points/lvl8_18.pdf' - 914 rows × 4 columns
- CAO2018_76 = 'http://www2.cao.ie/points/lvl76_18.pdf' - 471 rows × 4 columns

In [20]:
# Read csv as a pandas dataframe
df2018lvl8 = pd.read_csv('lvl8_18.csv', sep=',')

df2018lvl8

Unnamed: 0,Code,Course,EOS,Mid
0,AL801,Software Design Game Development or Cloud Comp...,295,326.0
1,AL810,Quantity Surveying,300,340.0
2,AL820,Mechanical and Polymer Engineering,299,371.0
3,AL830,General Nursing,418,440.0
4,AL832,Psychiatric Nursing,377,388.0
...,...,...,...,...
909,WD197,The Internet of Things,260,329.0
910,WD200,Arts,220,299.0
911,WD210,Software Systems Development,289,327.0
912,WD211,Creative Computing,265,326.0


In [21]:
# Read csv as a pandas dataframe
df2018lvl76 = pd.read_csv('lvl76_18.csv', sep=',')

df2018lvl76

Unnamed: 0,Code,Course,EOS,Mid
0,AL601,Electronics and Computer Engineering,240,321.0
1,AL602,Mechanical Engineering,201,299.0
2,AL604,Civil Engineering,243,320.0
3,AL630,Pharmacy Technician,306,388.0
4,AL631,Dental Nursing,307,348.0
...,...,...,...,...
466,WD205,Molecular Biology with Biopharmaceutical Science,217,398.0
467,WD206,Electronic Engineering,175,330.0
468,WD207,Mechanical Engineering,182,362.0
469,WD208,Manufacturing Engineering,180,298.0


# Join all Dataframes

df2018lvl76

df2018lvl8

df2019lvl76

df2019lvl8

DF2020

DF2021

In [24]:

NewDF = DF2021.merge(DF2020,on='Code').merge(df2019lvl8,on='Code')

NewDF

Unnamed: 0,Code,Title_x,R1 Points_x,R2 Points_x,EOS_x,Mid_x,Category_x,College_x,Title_y,R1 Points_y,R2 Points_y,EOS_y,Mid_y,Category_y,College_y,Course,EOS,Mid
0,AL801,Software Design for Virtual Reality and Gaming,300,,300,359,Information and Communication Technologies (ICTs),Athlone Institute of Technology,Software Design with Virtual Reality and Gaming,303,,303,367,Information and Communication Technologies (ICTs),Athlone Institute of Technology,Software Design with Virtual Reality and Gaming,304,328.0
1,AL802,Software Design in Artificial Intelligence for...,313,,313,381,Information and Communication Technologies (ICTs),Athlone Institute of Technology,Software Design with Artificial Intelligence f...,332,,332,382,Information and Communication Technologies (ICTs),Athlone Institute of Technology,Software Design with Cloud Computing,301,306.0
2,AL803,Software Design for Mobile Apps and Connected ...,350,,350,398,Information and Communication Technologies (ICTs),Athlone Institute of Technology,Software Design with Mobile Apps and Connected...,337,,337,360,Information and Communication Technologies (ICTs),Athlone Institute of Technology,Software Design with Mobile Apps and Connected...,309,337.0
3,AL805,Computer Engineering for Network Infrastructure,321,,321,381,Information and Communication Technologies (ICTs),Athlone Institute of Technology,Computer Engineering with Network Infrastructure,333,,333,360,Information and Communication Technologies (ICTs),Athlone Institute of Technology,Network Management and Cloud Infrastructure,329,442.0
4,AL810,Quantity Surveying,328,,328,377,Architecture and construction,Athlone Institute of Technology,Quantity Surveying,319,,326,352,Architecture and construction,Athlone Institute of Technology,Quantity Surveying,307,349.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
746,WD200,Arts (options),201,201,201,376,Arts,Waterford Institute of Technology,Arts (options),AQA,AQA,AQA,336,Arts,Waterford Institute of Technology,Arts options,221,296.0
747,WD210,Software Systems Development,260,,260,373,Information and Communication Technologies (ICTs),Waterford Institute of Technology,Software Systems Development,279,,279,337,Information and Communication Technologies (ICTs),Waterford Institute of Technology,Software Systems Development,271,329.0
748,WD211,Creative Computing,270,,270,392,Information and Communication Technologies (ICTs),Waterford Institute of Technology,Creative Computing,271,,271,318,Information and Communication Technologies (ICTs),Waterford Institute of Technology,Creative Computing,275,322.0
749,WD212,Recreation and Sport Management,262,,262,304,Personal services,Waterford Institute of Technology,Recreation and Sport Management,270,,270,349,Personal services,Waterford Institute of Technology,Recreation and Sport Management,274,311.0


# End