## Visualise Predictions and actuals on Map
There are two results supported. The regression of the ground ozone value prediction and prediction as low=0, medium=1, high=2 and very high = 4

### Import Libraries

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import geopandas as gpd
import geodatasets, os
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from scipy.interpolate import griddata
from scipy.spatial import cKDTree
from datetime import datetime, timedelta

### Declare Constants

In [2]:
dataset_folder = "datasets/UK/"
model_dataset = "aurn_cam"
shapely_file = "datasets/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp"
models = ["lstm", "bi_lstm", "gru", "bi_gru", "ensemble"]
start_date = '2024-01-05'
period = 7  # in days
lon_min, lon_max, lat_min, lat_max = -8.5, 1.8, 49.5, 60.8  # Longitude bounds for the UK

### Create Output Directory

In [3]:
def create_output_dir(path):
    """Creates output directory if not exists."""
    os.makedirs(path, exist_ok=True)

### The Function that Plots the Prediction

In [4]:
def plot_go3(df, out_column=None, output_dir=None, output_file=None, vmin=1, vmax=238, plot_v2=False):
    """
    General plotting function for GO3 (Ozone).
    Allows optional `plot_v2` for the second version of the plot with integer ticks.
    """
    # Create plot with constrained_layout for consistent spacing
    fig, ax = plt.subplots(subplot_kw={'projection': ccrs.PlateCarree()})

    # Set map extent to the UK area
    ax.set_extent([lon_min, lon_max, lat_min, lat_max])

    # Add map features
    ax.add_feature(cfeature.COASTLINE, edgecolor='gray')
    ax.add_feature(cfeature.BORDERS, linestyle=":", edgecolor='gray')

    # Convert the values in the 'out_column' to integers if `plot_v2` is True
    if plot_v2:
        df[out_column] = df[out_column].astype(int)

    # Plot actual data points with larger circles and matching edge color
    scatter = ax.scatter(
        df["Longitude"], df["Latitude"], 
        c=df[out_column], cmap='rainbow_r',  # Using the 'rainbow_r' colormap (inverse rainbow)
        linewidth=0.8, vmin=vmin, vmax=vmax, s=100, 
        edgecolors='none'  # Remove edge colors so they match the inside color
    )

    # Create colorbar attached to the figure
    cbar = fig.colorbar(scatter, ax=ax, orientation="vertical")
    cbar.set_label("GO3 (Ozone)", rotation=270, labelpad=20)

    # Set color bar ticks to be integers (for version 2)
    if plot_v2:
        cbar.set_ticks(range(vmin, vmax + 1, 1))  # Adjust tick interval as needed
        cbar.ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{int(x)}'))  # Format ticks as integers

    # Create output directory if not exists
    create_output_dir(output_dir)

    # Save plot
    plt.savefig(output_dir / Path(output_file), dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Plot saved as {output_dir}/{output_file}")



### The Function that process the model data for plotting

In [5]:
def process_model_data(model, start_date, period):
    """
    Process each model's predictions, plot them and save to respective directories.
    """
    results_csv = Path(dataset_folder) / Path(model_dataset) / Path('results') / Path(f"{model}") / Path(f"{model}_predictions.csv")
    output_dir = Path(dataset_folder) / Path(model_dataset) / Path('results') / Path(f"{model}") / Path('visualise')
    
    # Create the base output directory
    create_output_dir(output_dir)

    # Load the results CSV into a DataFrame
    df = pd.read_csv(results_csv, parse_dates=['Time'], dayfirst=True)

    # Convert start_date string to datetime object
    if isinstance(start_date, str):
        start_date = datetime.strptime(start_date, '%Y-%m-%d')

    for i in range(period):
        current_date = start_date + timedelta(days=i)
        df['Time'] = pd.to_datetime(df['Time']).dt.date
        
        # Filter DataFrame for the current date and drop NaN values
        filtered_df = df[df['Time'] == current_date.date()].dropna()

        # Plot predictions, actual values, deciles for predicted and actual values
        plot_go3(filtered_df, out_column="Predicted_Value", output_dir=output_dir / 'Predicted', 
                 output_file=f"{current_date.date()}.png", vmin=1, vmax=238)

        plot_go3(filtered_df, out_column="True_Value", output_dir=output_dir / 'Actual', 
                 output_file=f"{current_date.date()}.png", vmin=1, vmax=238)

        plot_go3(filtered_df, out_column="Predicted_Value_Decile", output_dir=output_dir / 'Predicted_Decile', 
                 output_file=f"{current_date.date()}.png", vmin=0, vmax=4, plot_v2=True)

        plot_go3(filtered_df, out_column="True_Value_Decile", output_dir=output_dir / 'Actual_Decile', 
                 output_file=f"{current_date.date()}.png", vmin=0, vmax=4, plot_v2=True)


### The Main Loop to Generate all the plots for the various models

In [6]:
# Main execution loop for all models
for model in models:
    process_model_data(model, start_date, period)


Plot saved as datasets\UK\aurn_cam\results\lstm\visualise\Predicted/2024-01-05.png
Plot saved as datasets\UK\aurn_cam\results\lstm\visualise\Actual/2024-01-05.png
Plot saved as datasets\UK\aurn_cam\results\lstm\visualise\Predicted_Decile/2024-01-05.png
Plot saved as datasets\UK\aurn_cam\results\lstm\visualise\Actual_Decile/2024-01-05.png
Plot saved as datasets\UK\aurn_cam\results\lstm\visualise\Predicted/2024-01-06.png
Plot saved as datasets\UK\aurn_cam\results\lstm\visualise\Actual/2024-01-06.png
Plot saved as datasets\UK\aurn_cam\results\lstm\visualise\Predicted_Decile/2024-01-06.png
Plot saved as datasets\UK\aurn_cam\results\lstm\visualise\Actual_Decile/2024-01-06.png
Plot saved as datasets\UK\aurn_cam\results\lstm\visualise\Predicted/2024-01-07.png
Plot saved as datasets\UK\aurn_cam\results\lstm\visualise\Actual/2024-01-07.png
Plot saved as datasets\UK\aurn_cam\results\lstm\visualise\Predicted_Decile/2024-01-07.png
Plot saved as datasets\UK\aurn_cam\results\lstm\visualise\Actual_De