In [1]:
# = = = = = = = = = = = = = 
# Created:       15:09:2020
# Last Updated:  21:09:2020
#
# Description:  Webscraper targeted at reading table format of fuel prices provided by AIP and preserve them in CSV format
#               Focused on using Requests library
# = = = = = = = = = = = = = 

In [2]:
import datetime
import os
import csv

# Focusing on only using Requests
import requests

In [3]:
# Input/s:  String, datetime
# Output/s: Boolean
#
# Description: Checks if file exists and then checks if current week has been recorded. Returns true is current week recorded, else false
#
def check_week(fFileName, fCurr_time):
    if os.path.exists(fFileName):
        with open(fFileName) as fd:
            # Read CSV
            reader = csv.reader(fd, delimiter=',')
            # Skip header line
            next(reader)
            for element in reader:
                # Compare and exit if we have completed this timestamp before
                if float(element[0]) == fCurr_time.timestamp():
                    # Return that we found that we have done this week
                    return True
    # Create CSV if not exists
    else:
        with open(fFileName, 'w', newline='') as fd:
            writer = csv.writer(fd)
            # Write Headers
            writer.writerow(["Timestamp (Unix)","Date (YYYY-MM-DD)","Location", "Weekly Average", "Weekly Change", "Variation", "Weekly Low", "Weekly High"])
    
    # Return false as timestamp doesn't already exists
    return False
    

In [4]:
# Fuel data CSV
fileName = "fuel_data.csv"

# Target Cities
targets = ['canberra', 'sydney', 'batemans bay', 'cooma', 'goulburn', 'coffs harbour', 'wollongong', 'yass']

# Link to the JSON that contains week values
url = requests.get('https://aip.com.au/aip-api-request?api-path=public/api&call=nswUlpTable&fuelType=undefined')

In [10]:
# Get current date
curr_day = datetime.date.today()

# Calculate day subtraction to retrieve Sunday
idx = (curr_day.weekday() +1) % 7

# Get datetime.date of last sunday
curr_week = curr_day - datetime.timedelta(idx)

# Convert to datetime object
curr_time = datetime.datetime(curr_week.year,curr_week.month,curr_week.day)

0


-9.0


In [23]:
class Weekly_Price:
    #This object requires values to be made
    #"Timestamp (Unix)","Date (YYYY-MM-DD)","Location", "Weekly Average", "Weekly Change", "Variation", "Weekly Low", "Weekly High", "flagged (y/n)"
    def __init__(self, time, date, loc, avg, chng, var, low, high, flag):
        self.time = float(time)
        self.date = date
        self.loc = loc
        self.avg = float(avg)
   
        self.low = float(low)
        self.high = float(high)
        
        self.flag = flag
        
        # Below values have the potential to be "-"
        # Check if chng has a value else it's empty
        self.chng = float(chng) if isinstance(chng, float) else 0.0
        # Check if var has a value else it's empty
        self.var = float(var) if isinstance(var, float) else 0.0
       
    # Convert object to a useable array to be written to file
    def Write_Array():
        return [str(self.time),
               self.date,
               self.loc,
               str(self.avg),
               str(self.chng),
               str(self.var),
               str(self.low),
               str(self.high),
               self.flag]
        

In [26]:
array_stuff = []

if os.path.exists(fileName):
    with open(fileName) as fd:
        # Read CSV
        reader = csv.reader(fd, delimiter=',')
        # Skip header line
        next(reader)
        for element in reader:
            
            # I'm sorry, it's ugly
            # This goes through 
            array_stuff.append(Weekly_Price(element[0],
                                           element[1],
                                           element[2],
                                           element[3],
                                           element[4],
                                           element[5],
                                           element[6],
                                           element[7],
                                           (element[8] if len(element) == 9 else 'n')))
            
# Create CSV if not exists
else:
    with open(fFileName, 'w', newline='') as fd:
        writer = csv.writer(fd)
        # Write Headers
        writer.writerow(["Timestamp (Unix)","Date (YYYY-MM-DD)","Location", "Weekly Average", "Weekly Change", "Variation", "Weekly Low", "Weekly High", "flagged (y/n)"])
            
for element in array_stuff:
    print (element.time, element.loc, element.avg)

1599919200.0 Sydney 124.4
1599919200.0 Canberra 122.2
1599919200.0 Batemans Bay 122.6
1599919200.0 Coffs Harbour 119.5
1599919200.0 Cooma 123.5
1599919200.0 Goulburn 119.0
1599919200.0 Wollongong 123.9
1599919200.0 Yass 120.0
1600524000.0 Sydney 124.4
1600524000.0 Canberra 122.2
1600524000.0 Batemans Bay 122.6
1600524000.0 Coffs Harbour 119.5
1600524000.0 Cooma 123.5
1600524000.0 Goulburn 119.0
1600524000.0 Wollongong 123.9
1600524000.0 Yass 120.0
1601128800.0 Sydney 113.8
1601128800.0 Canberra 122.0
1601128800.0 Batemans Bay 122.6
1601128800.0 Coffs Harbour 119.3
1601128800.0 Cooma 123.1
1601128800.0 Goulburn 117.3
1601128800.0 Wollongong 122.2
1601128800.0 Yass 120.8
1601733600.0 Sydney 114.1
1601733600.0 Canberra 120.8
1601733600.0 Batemans Bay 122.6
1601733600.0 Coffs Harbour 119.5
1601733600.0 Cooma 122.6
1601733600.0 Goulburn 116.6
1601733600.0 Wollongong 119.9
1601733600.0 Yass 119.8
1602334800.0 Sydney 132.0
1602334800.0 Canberra 120.9
1602334800.0 Batemans Bay 122.7
1602334800

In [21]:
# Gets skipped if we already have this weeks valeus
if (not check_week(fileName, curr_time)):
    ## The request made to get the raw infromation used for populating the table
    ## Found in the Networks tab of inspect element
    

    ## The URL is a JSON file, thus we will be just reading it as a JSON
    data = url.json()
        
    with open(fileName, 'a', newline='') as fd:
        writer = csv.writer(fd)
        ## Iterate through JSON objects
        for item in data:
            if data[item]['location'].lower() in targets:
                # Following syntax
                element = [
                    curr_time.timestamp(),
                    curr_time.strftime("%Y-%m-%d"),
                    data[item]['location'],
                    data[item]['weeklyAverage'],
                    data[item]['weeklyChange'],
                    data[item]['diff'],
                    data[item]['weeklyLow'],
                    data[item]['weeklyHigh']
                ]
                
                writer.writerow(element)