In [1]:
import urllib.request
import urllib.parse
import json
import pandas as pd
import numpy as np

# Base URL and resource ID
base_url = 'https://www.data.qld.gov.au/api/3/action/datastore_search'
resource_id = '2bbef99e-9974-49b9-a316-57402b00609c'

# Define the filter query for the site "Mooloolaba"
filters = {
    "Site": "Mooloolaba"
}

# Define the parameters, including the resource ID and the filters
params = {
    'resource_id': resource_id,
    'limit': 24,  # 24 records for the last 12 hours with 30 min intervals
    'q': json.dumps(filters)  # Convert the filters dictionary to a JSON string
}

# Encode the parameters and create the full URL
url = f"{base_url}?{urllib.parse.urlencode(params)}"

# Initialize the dictionary to store results
result_dict = {}

# Make the request
try:
    fileobj = urllib.request.urlopen(url)
    response = fileobj.read()
    data = json.loads(response)
    
    # Store the records in the dictionary
    for record in data.get('result', {}).get('records', []):
        record_id = record.get('_id')
        result_dict[record_id] = record
    
    df = pd.DataFrame.from_dict(result_dict, orient='index')
    
except urllib.error.HTTPError as e:
    print(f"HTTPError: {e.code} - {e.reason}")
except Exception as e:
    print(f"Error: {e}")


In [62]:
def fetch_data():
    # Base URL and resource ID
    base_url = 'https://www.data.qld.gov.au/api/3/action/datastore_search'
    resource_id = '2bbef99e-9974-49b9-a316-57402b00609c'

    # Define the filter query for the site "Mooloolaba"
    filters = {
        "Site": "Mooloolaba"
    }

    # Define the parameters, including the resource ID and the filters
    params = {
        'resource_id': resource_id,
        'limit': 48,  # 48 records for the last 24 hours with 30 min intervals
        'q': json.dumps(filters)  # Convert the filters dictionary to a JSON string
    }

    # Encode the parameters and create the full URL
    url = f"{base_url}?{urllib.parse.urlencode(params)}"

    # Initialize the dictionary to store results
    result_dict = {}

    # Make the request
    try:
        fileobj = urllib.request.urlopen(url)
        response = fileobj.read()
        data = json.loads(response)
        
        # Store the records in the dictionary
        for record in data.get('result', {}).get('records', []):
            record_id = record.get('_id')
            result_dict[record_id] = record
                
    except urllib.error.HTTPError as e:
        print(f"HTTPError: {e.code} - {e.reason}")
    except Exception as e:
        print(f"Error: {e}")

    return result_dict

In [63]:
result_dict = fetch_data()
result_dict

{732: {'_id': 732,
  'Site': 'Mooloolaba',
  'SiteNumber': '4',
  'Seconds': '1723039200',
  'DateTime': '2024-08-08T00:00:00',
  'Latitude': '-26.56525',
  'Longitude': '153.18415',
  'Hsig': '1.112',
  'Hmax': '2.000',
  'Tp': '9.090',
  'Tz': '4.598',
  'SST': '20.85',
  'Direction': '99.80',
  'Current Speed': '-99.90',
  'Current Direction': '-99.90',
  'rank Site': 0.057308756},
 733: {'_id': 733,
  'Site': 'Mooloolaba',
  'SiteNumber': '4',
  'Seconds': '1723041000',
  'DateTime': '2024-08-08T00:30:00',
  'Latitude': '-26.56517',
  'Longitude': '153.18439',
  'Hsig': '1.123',
  'Hmax': '1.880',
  'Tp': '6.250',
  'Tz': '4.598',
  'SST': '20.85',
  'Direction': '119.50',
  'Current Speed': '-99.90',
  'Current Direction': '-99.90',
  'rank Site': 0.057308756},
 734: {'_id': 734,
  'Site': 'Mooloolaba',
  'SiteNumber': '4',
  'Seconds': '1723042800',
  'DateTime': '2024-08-08T01:00:00',
  'Latitude': '-26.56513',
  'Longitude': '153.18448',
  'Hsig': '1.108',
  'Hmax': '1.990',
  

In [64]:
def preprocess_data(result_dict):
    
    # Conversion to dataframe
    df = pd.DataFrame.from_dict(result_dict, orient='index')

    # Renaming
    df.rename(columns = {
        'DateTime':'datetime',
        'Hmax':'wave_height',
        'Tz':'wave_period',
        'Direction': 'wave_direction'
    }, inplace = True)

    # Timestamp format
    df['datetime'] = pd.to_datetime(df['datetime'])
    # df['DateTime'] = df['DateTime'].dt.strftime('%Y-%m-%d %H:%M:%S')    
    df.set_index(keys = 'datetime', inplace=True)
    df = df.asfreq('30T')

    # Keep only desired variables
    target_vars = ['wave_height', 'wave_period', 'wave_direction']
    df = df[target_vars]

    # Null values
    df = df.replace(-99.9, np.nan)

    return df

In [65]:
data = preprocess_data(result_dict)
data 

Unnamed: 0_level_0,wave_height,wave_period,wave_direction
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-08-08 00:00:00,2.0,4.598,99.8
2024-08-08 00:30:00,1.88,4.598,119.5
2024-08-08 01:00:00,1.99,4.598,99.8
2024-08-08 01:30:00,2.27,4.494,113.9
2024-08-08 02:00:00,1.9,4.651,108.3
2024-08-08 02:30:00,2.42,4.494,108.3
2024-08-08 03:00:00,2.0,4.878,122.3
2024-08-08 03:30:00,1.91,4.651,112.5
2024-08-08 04:00:00,1.88,4.598,106.9
2024-08-08 04:30:00,1.84,4.545,119.5


## Inference

In [70]:
import pickle 

# Step 1: Load the trained model from the pickle file
def load_model(model_path):
    with open(model_path, 'rb') as f:
        model = pickle.load(f)
    return model

# Step 4: Make predictions using the loaded model
def make_predictions(model, processed_data):
    predictions = model.predict(steps = 24, last_window=processed_data)
    return predictions

In [72]:
model = load_model('../../Models/mooloolaba/forecaster_mool.pkl')
preds = make_predictions(model, data)





In [73]:
preds.index

DatetimeIndex(['2024-08-09 00:30:00', '2024-08-09 01:00:00',
               '2024-08-09 01:30:00', '2024-08-09 02:00:00',
               '2024-08-09 02:30:00', '2024-08-09 03:00:00',
               '2024-08-09 03:30:00', '2024-08-09 04:00:00',
               '2024-08-09 04:30:00', '2024-08-09 05:00:00',
               '2024-08-09 05:30:00', '2024-08-09 06:00:00',
               '2024-08-09 06:30:00', '2024-08-09 07:00:00',
               '2024-08-09 07:30:00', '2024-08-09 08:00:00',
               '2024-08-09 08:30:00', '2024-08-09 09:00:00',
               '2024-08-09 09:30:00', '2024-08-09 10:00:00',
               '2024-08-09 10:30:00', '2024-08-09 11:00:00',
               '2024-08-09 11:30:00', '2024-08-09 12:00:00'],
              dtype='datetime64[ns]', freq='30T')