# CAO Points Analysis

File include detailed comparison of CAO points in 2019, 2020 and 2021 using the functionality in Pandas,
plus appropiate plots.

***

### Link to CAO Points Website
https://www.cao.ie/index.php?/page=point&p=2021

***

In [1]:
# Convinient package for making HTTP requests.
import requests as rq

# Regular expression.
import re

#Dates and times.
import datetime as dt

In [2]:
# Fetch CAO points from the website.
resp = rq.get("http://www2.cao.ie/points/l8.php")
# Check if its connecting.
# resp

### Save original CAO data set to a HTML file. 

***

In [3]:
# Get the current date and time.
now = dt.datetime.now()
# Format as a string.
nowstr = now.strftime('%Y%m%d_%H%M%S')
# Creating timestamp in file as a filename itself.
path = 'data/cao2021_' + nowstr + '.html'

In [4]:
# The server uses the wrong encoding, fix it.
original_encoding = resp.encoding
# Change to cp1252
resp.encoding = 'cp1252'

In [5]:
# Save the original html file
with open(path, 'w') as f:
    f.write(resp.text)

### Use regular expression to select lines we want.

***

In [6]:
# Compile regular expression for matching lines.
re_course = re.compile(r'([A-Z]{2}[0-9]{3})  (.*)([0-9]{3})(\*?) *')

### Loop through the lines of the response.
***

In [9]:
# Keep track of how many courses we process.
no_lines = 0

# Writing sorted CAO data to a CSV file. 
path = 'data/cao2021_csv_' + nowstr + '.csv'

with open (path, 'w') as f:
    # The loop itself.
    for line in resp.iter_lines():
        # Decode the line, using the wrong encoding!
        dline = line.decode('cp1252')
        # Match only the lines representing courses.
        if re_course.fullmatch(dline):
            # Add one to the lines counter. 
            no_lines = no_lines +1
            # Uncomment line below to check if choosen lines are correct.
            # print(line)
            # Split the line on two or more spaces.
            linesplit = re.split( '  +', dline)
            # Rejoin the substrings with commas in between
            f.write(','.join(linesplit) + '\n')
        
        
# Print total number of processed lines.        
print(f"Total number of lines is {no_lines}.")

Total number of lines is 922.


### REFERENCES:
    1.
    2.
    3.
    