In [5]:
import urllib.request
import urllib.parse
import json
import pandas as pd
import numpy as np

# Base URL and resource ID
base_url = 'https://www.data.qld.gov.au/api/3/action/datastore_search'
resource_id = '2bbef99e-9974-49b9-a316-57402b00609c'

# Define the filter query for the site "Mooloolaba"
filters = {
    "Site": "Mooloolaba"
}

# Define the parameters, including the resource ID and the filters
params = {
    'resource_id': resource_id,
    'limit': 24,  # 24 records for the last 12 hours with 30 min intervals
    'q': json.dumps(filters)  # Convert the filters dictionary to a JSON string
}

# Encode the parameters and create the full URL
url = f"{base_url}?{urllib.parse.urlencode(params)}"

# Initialize the dictionary to store results
result_dict = {}

# Make the request
try:
    fileobj = urllib.request.urlopen(url)
    response = fileobj.read()
    data = json.loads(response)
    
    # Store the records in the dictionary
    for record in data.get('result', {}).get('records', []):
        record_id = record.get('_id')
        result_dict[record_id] = record
    
    df = pd.DataFrame.from_dict(result_dict, orient='index')
    
except urllib.error.HTTPError as e:
    print(f"HTTPError: {e.code} - {e.reason}")
except Exception as e:
    print(f"Error: {e}")


In [18]:
def fetch_data():
    # Base URL and resource ID
    base_url = 'https://www.data.qld.gov.au/api/3/action/datastore_search'
    resource_id = '2bbef99e-9974-49b9-a316-57402b00609c'

    # Define the filter query for the site "Mooloolaba"
    filters = {
        "Site": "Mooloolaba"
    }

    # Define the parameters, including the resource ID and the filters
    params = {
        'resource_id': resource_id,
        'limit': 48,  # 48 records for the last 24 hours with 30 min intervals
        'q': json.dumps(filters),  # Convert the filters dictionary to a JSON string                
        'sort': '_id desc'  # Sort by record ID in descending order to get the latest records
    }

    # Encode the parameters and create the full URL
    url = f"{base_url}?{urllib.parse.urlencode(params)}"

    # Initialize the dictionary to store results
    result_dict = {}

    # Make the request
    try:
        fileobj = urllib.request.urlopen(url)
        response = fileobj.read()
        data = json.loads(response)
        
        # Store the records in the dictionary
        for record in data.get('result', {}).get('records', []):
            record_id = record.get('_id')
            result_dict[record_id] = record
                
    except urllib.error.HTTPError as e:
        print(f"HTTPError: {e.code} - {e.reason}")
    except Exception as e:
        print(f"Error: {e}")

    return result_dict

In [19]:
result_dict = fetch_data()
result_dict

{1126: {'_id': 1126,
  'Site': 'Mooloolaba',
  'SiteNumber': '4',
  'Seconds': '1724061600',
  'DateTime': '2024-08-19T20:00:00',
  'Latitude': '-26.56615',
  'Longitude': '153.18368',
  'Hsig': '1.089',
  'Hmax': '1.660',
  'Tp': '4.350',
  'Tz': '4.301',
  'SST': '21.25',
  'Direction': '140.60',
  'Current Speed': '-99.90',
  'Current Direction': '-99.90',
  'rank Site': 0.057308756},
 1125: {'_id': 1125,
  'Site': 'Mooloolaba',
  'SiteNumber': '4',
  'Seconds': '1724059800',
  'DateTime': '2024-08-19T19:30:00',
  'Latitude': '-26.56619',
  'Longitude': '153.18368',
  'Hsig': '1.056',
  'Hmax': '1.770',
  'Tp': '7.690',
  'Tz': '4.444',
  'SST': '21.30',
  'Direction': '126.60',
  'Current Speed': '-99.90',
  'Current Direction': '-99.90',
  'rank Site': 0.057308756},
 1124: {'_id': 1124,
  'Site': 'Mooloolaba',
  'SiteNumber': '4',
  'Seconds': '1724058000',
  'DateTime': '2024-08-19T19:00:00',
  'Latitude': '-26.56614',
  'Longitude': '153.18366',
  'Hsig': '1.143',
  'Hmax': '1.9

In [20]:
def preprocess_data(result_dict):
    
    # Conversion to dataframe
    df = pd.DataFrame.from_dict(result_dict, orient='index')

    # Renaming
    df.rename(columns = {
        'DateTime':'datetime',
        'Hmax':'wave_height',
        'Tz':'wave_period',
        'Direction': 'wave_direction'
    }, inplace = True)

    # Timestamp format
    df['datetime'] = pd.to_datetime(df['datetime'])
    # df['DateTime'] = df['DateTime'].dt.strftime('%Y-%m-%d %H:%M:%S')    
    df.set_index(keys = 'datetime', inplace=True)
    df = df.asfreq('30T')

    # Keep only desired variables
    target_vars = ['wave_height', 'wave_period', 'wave_direction']
    df = df[target_vars]

    # Null values
    df = df.replace(-99.9, np.nan)

    return df

In [21]:
data = preprocess_data(result_dict)
data 

Unnamed: 0_level_0,wave_height,wave_period,wave_direction
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-08-18 20:30:00,0.87,5.714,105.5
2024-08-18 21:00:00,1.02,5.714,95.6
2024-08-18 21:30:00,0.83,5.882,99.8
2024-08-18 22:00:00,0.79,5.797,95.6
2024-08-18 22:30:00,0.89,6.061,104.1
2024-08-18 23:00:00,0.91,5.97,95.6
2024-08-18 23:30:00,0.89,5.714,98.4
2024-08-19 00:00:00,0.94,5.882,104.1
2024-08-19 00:30:00,0.95,5.882,104.1
2024-08-19 01:00:00,1.25,6.061,105.5


## Inference

In [22]:
import pickle 

# Step 1: Load the trained model from the pickle file
def load_model(model_path):
    with open(model_path, 'rb') as f:
        model = pickle.load(f)
    return model

# Step 4: Make predictions using the loaded model
def make_predictions(model, processed_data):
    predictions = model.predict(steps = 24, last_window=processed_data)
    return predictions

In [23]:
model = load_model('../../Models/mooloolaba/forecaster_mool.pkl')
preds = make_predictions(model, data)



In [24]:
preds.index

DatetimeIndex(['2024-08-19 20:30:00', '2024-08-19 21:00:00',
               '2024-08-19 21:30:00', '2024-08-19 22:00:00',
               '2024-08-19 22:30:00', '2024-08-19 23:00:00',
               '2024-08-19 23:30:00', '2024-08-20 00:00:00',
               '2024-08-20 00:30:00', '2024-08-20 01:00:00',
               '2024-08-20 01:30:00', '2024-08-20 02:00:00',
               '2024-08-20 02:30:00', '2024-08-20 03:00:00',
               '2024-08-20 03:30:00', '2024-08-20 04:00:00',
               '2024-08-20 04:30:00', '2024-08-20 05:00:00',
               '2024-08-20 05:30:00', '2024-08-20 06:00:00',
               '2024-08-20 06:30:00', '2024-08-20 07:00:00',
               '2024-08-20 07:30:00', '2024-08-20 08:00:00'],
              dtype='datetime64[ns]', freq='30T')

In [25]:
preds

Unnamed: 0,wave_direction,wave_height,wave_period
2024-08-19 20:30:00,131.877779,1.766191,4.411009
2024-08-19 21:00:00,133.015773,1.775648,4.42998
2024-08-19 21:30:00,132.59812,1.766395,4.442225
2024-08-19 22:00:00,132.140536,1.762294,4.461894
2024-08-19 22:30:00,131.766408,1.762882,4.487052
2024-08-19 23:00:00,132.004712,1.759738,4.511823
2024-08-19 23:30:00,132.156774,1.75111,4.548699
2024-08-20 00:00:00,132.120199,1.755396,4.57098
2024-08-20 00:30:00,132.53489,1.751724,4.606757
2024-08-20 01:00:00,131.497044,1.756342,4.627883
