## Visualise Predictions and actuals on Map
There are two results supported. The regression of the ground ozone value prediction and prediction as low=0, medium=1, high=2 and very high = 4

### Import Libraries

In [5]:
import pandas as pd
import numpy as np
from pathlib import Path
import geopandas as gpd
import geodatasets, os
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from scipy.interpolate import griddata
from scipy.spatial import cKDTree
from datetime import datetime, timedelta

import folium
import seaborn as sns

### Declare Constants

In [2]:
dataset_folder = "datasets/UK/"
model_dataset = "aurn_cams_respiratory"
shapely_file = "datasets/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp"
models = ["lstm", "bi_lstm", "gru", "bi_gru", "ensemble"]
start_date = '2024-01-05'
period = 7  # in days
lon_min, lon_max, lat_min, lat_max = -8.5, 1.8, 49.5, 60.8  # Longitude bounds for the UK

### Create Output Directory

In [3]:
def create_output_dir(path):
    """Creates output directory if not exists."""
    os.makedirs(path, exist_ok=True)

### The Function that Plots the Prediction

In [None]:
def plot_health_cases(df, out_column=None, output_dir=None, output_file=None, vmin=1, vmax=200, plot_v2=False):
    
    # Create figure with PlateCarree projection
    fig, ax = plt.subplots(figsize=(10, 6), subplot_kw={'projection': ccrs.PlateCarree()})
    
    # Set map extent to focus on the UK
    ax.set_extent([lon_min, lon_max, lat_min, lat_max])
    
    # Add map features
    ax.add_feature(cfeature.COASTLINE, edgecolor='gray')
    ax.add_feature(cfeature.BORDERS, linestyle=":", edgecolor='gray')
    
    # Define colormap
    cmap = plt.get_cmap('rainbow_r')
    norm = matplotlib.colors.Normalize(vmin=df[out_column].min(), vmax=df[out_column].max())

    # Plot the data points with colors representing case numbers
    scatter = ax.scatter(
        df["Longitude"], df["Latitude"], 
        c=df[out_column], cmap=cmap,  # Dynamic color mapping
        linewidth=0.8, s=100, edgecolors='black',  
        vmin=vmin, vmax=vmax  # Use specified color range
    )
    
    # Add case numbers as text inside circles with matching background color
    for x, y, case in zip(df["Longitude"], df["Latitude"], df[out_column]):
        case_color = cmap(norm(case))  # Get corresponding color from colormap
        plt.text(
            x, y, str(case), fontsize=12, ha="center", va="center",
            color="white", fontweight="bold",
            bbox=dict(facecolor=case_color, edgecolor="black", boxstyle="circle,pad=0.3")  
        )
    
    # Add colorbar
    cbar = fig.colorbar(scatter, ax=ax, orientation="vertical")
    cbar.set_label("Number of Cases", rotation=270, labelpad=15)

    # Add small text with output file name at bottom-left of the map
    if output_file:
        plt.text(
            0.02, 0.02, f"File: {output_file}", transform=ax.transAxes,
            fontsize=8, color="black", ha="left", va="bottom",
            bbox=dict(facecolor="white", alpha=0.5, edgecolor="none")
        )

    # Save plot
    plt.savefig(output_dir / Path(output_file), dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Plot saved as {output_dir}/{output_file}")


In [31]:
def plot_health_cases_folium(df, out_column=None, output_dir=None, output_file=None):
    
    results_csv = Path(dataset_folder) / Path(model_dataset) / Path('results') / Path(f"{model}") / Path(f"{model}_predictions.csv")
    output_dir = Path(dataset_folder) / Path(model_dataset) / Path('results') / Path(f"{model}") / Path('visualise')
    
    # Create the base output directory
    create_output_dir(output_dir)

    
    # Define the center of the map based on the average location
    map_center = [df["Latitude"].mean(), df["Longitude"].mean()]
    
    # Create a Folium map centered on the average location
    m = folium.Map(location=map_center, zoom_start=6, tiles="cartodbpositron")

    # Add circle markers to the map
    for _, row in df.iterrows():
        folium.CircleMarker(
            location=[row["Latitude"], row["Longitude"]],
            radius=max(5, row[out_column] / 2),  # Adjust circle size
            color="blue",
            fill=True,
            fill_color="blue",
            fill_opacity=0.6,
            popup=folium.Popup(f"Cases: {row[out_column]}", parse_html=True),
        ).add_to(m)

        # Add the case number as text on the map (tooltip)
        folium.Marker(
            location=[row["Latitude"], row["Longitude"]],
            icon=folium.DivIcon(html=f'<div style="font-size: 12px; color: black; font-weight: bold;">{row[out_column]}</div>')
        ).add_to(m)

    # Save the map to an HTML file
    m.save(output_dir / Path(output_file))
    print(f"Interactive map saved as {output_dir / Path(output_file)}")

    return m  # Return the map object for display in Jupyter Notebook (optional)



In [27]:
# def plot_health_cases(df, out_column=None, output_dir=None, output_file=None, vmin=1, vmax=200, plot_v2=False):
    
#     # Create figure with PlateCarree projection
#     fig, ax = plt.subplots(figsize=(10, 6), subplot_kw={'projection': ccrs.PlateCarree()})
    
#     # Set map extent to focus on the UK
#     ax.set_extent([lon_min, lon_max, lat_min, lat_max])
    
#     # Add map features
#     ax.add_feature(cfeature.COASTLINE, edgecolor='gray')
#     ax.add_feature(cfeature.BORDERS, linestyle=":", edgecolor='gray')
    
#     # Plot the data points with colors representing case numbers
#     scatter = ax.scatter(
#         df["Longitude"], df["Latitude"], 
#         c=df[out_column], cmap='rainbow_r',  # Colormap (inverse rainbow for better contrast)
#         linewidth=0.8, s=100, edgecolors='black',  # Circle size and edges
#         vmin=df[out_column].min(), vmax=df[out_column].max()  # Color range
#     )
    
#     # Add case numbers as text inside circles
#     for x, y, case in zip(df["Longitude"], df["Latitude"], df[out_column]):
#         plt.text(
#             x, y, str(case), fontsize=12, ha="center", va="center",
#             color="white", fontweight="bold",
#             bbox=dict(facecolor="blue", edgecolor="black", boxstyle="circle,pad=0.3")  # Circular text background
#         )
    
#     # Add colorbar
#     cbar = fig.colorbar(scatter, ax=ax, orientation="vertical")
#     cbar.set_label("Number of Cases", rotation=270, labelpad=15)

#     # Create output directory if not exists
#     create_output_dir(output_dir)

#     # Save plot
#     plt.savefig(output_dir / Path(output_file), dpi=300, bbox_inches='tight')
#     plt.close()
#     print(f"Plot saved as {output_dir}/{output_file}")
    


### The Function that process the model data for plotting

In [32]:
def process_model_data(model, start_date, period):
    """
    Process each model's predictions, plot them and save to respective directories.
    """
    results_csv = Path(dataset_folder) / Path(model_dataset) / Path('results') / Path(f"{model}") / Path(f"{model}_predictions.csv")
    output_dir = Path(dataset_folder) / Path(model_dataset) / Path('results') / Path(f"{model}") / Path('visualise')
    
    # Create the base output directory
    create_output_dir(output_dir)

    # Load the results CSV into a DataFrame
    df = pd.read_csv(results_csv, parse_dates=['Time'], dayfirst=True)

    # Convert start_date string to datetime object
    if isinstance(start_date, str):
        start_date = datetime.strptime(start_date, '%Y-%m-%d')

    for i in range(period):
        current_date = start_date + timedelta(days=i)
        df['Time'] = pd.to_datetime(df['Time']).dt.date
        
        # Filter DataFrame for the current date and drop NaN values
        filtered_df = df[df['Time'] == current_date.date()].dropna()

        # Plot predictions, actual values, deciles for predicted and actual values
        plot_health_cases(filtered_df, out_column="Predicted_Value", output_dir=output_dir / 'Predicted', 
                 output_file=f"{current_date.date()}.png", vmin=1, vmax=238)

        plot_health_cases(filtered_df, out_column="True_Value", output_dir=output_dir / 'Actual', 
                 output_file=f"{current_date.date()}.png", vmin=1, vmax=238)

        # plot_health_cases_folium(
        #     filtered_df, 
        #     out_column="True_Value", 
        #     output_dir=output_dir / 'Actual', 
        #     output_file=f"{current_date.date()}.html"
        # )

    

### The Main Loop to Generate all the plots for the various models

In [35]:
# Main execution loop for all models
for model in models:
    process_model_data(model, start_date, period)


Interactive map saved as datasets\UK\aurn_cams_respiratory\results\lstm\visualise\2024-01-05.html
Interactive map saved as datasets\UK\aurn_cams_respiratory\results\lstm\visualise\2024-01-06.html
Interactive map saved as datasets\UK\aurn_cams_respiratory\results\lstm\visualise\2024-01-07.html
Interactive map saved as datasets\UK\aurn_cams_respiratory\results\lstm\visualise\2024-01-08.html
Interactive map saved as datasets\UK\aurn_cams_respiratory\results\lstm\visualise\2024-01-09.html
Interactive map saved as datasets\UK\aurn_cams_respiratory\results\lstm\visualise\2024-01-10.html
Interactive map saved as datasets\UK\aurn_cams_respiratory\results\lstm\visualise\2024-01-11.html
Interactive map saved as datasets\UK\aurn_cams_respiratory\results\bi_lstm\visualise\2024-01-05.html
Interactive map saved as datasets\UK\aurn_cams_respiratory\results\bi_lstm\visualise\2024-01-06.html
Interactive map saved as datasets\UK\aurn_cams_respiratory\results\bi_lstm\visualise\2024-01-07.html
Interactive