# Insitu Validation

Select a group of stations from Fluxnet and or ISMN, retrieve their time series, longitude and latitude coordinates.   

Load the whole windsat dataset and select for the lat-lon of the station/s

Load a pre-trained model and predict surface temperature

Produce some statistics + plots, comparing ERA5 Surtep, Insitu Surtep and Predicted Surtep

In [27]:
# 
from ismn.interface import ISMN_Interface
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
import numpy as np
import pandas as pd

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__name__), '../..')))


from src.processing import windsat_datacube, model_preprocess
from src.model import transform_batch, plot_history


models_folder = "../../models/"
windsat_folder = "../../data/raw/Daily_Windsat/"
ISMN_path =  "../../data/raw/Data_separate_files_header_20170101_20171231_10665_0t0D_20240415.zip"
fluxnet_folder = "../../data/raw/FLUXNET_CH4_has_2017_data/"



In [15]:
#Print available models:
print("Filenames in YYYYmmddTHHMMSS format: ")
for filename in os.listdir(models_folder):
    if filename.endswith(".keras"):
        print(filename)

Filenames in YYYYmmddTHHMMSS format: 
2024_05_28T103102.keras
2024_05_28T112040.keras
2024_05_28T112132.keras
2024_05_28T114946.keras
2024_05_28T140627.keras
2024_05_28T140851.keras
2024_05_28T150756.keras
2024_05_29T105032.keras
2024_05_29T111241.keras
2024_05_29T114725.keras
checkpoint.keras


In [16]:
# Load model
model_datestring = "2024_05_29T114725"
model_path = os.path.join(models_file, model_datestring) + ".keras"
model = load_model(model_path)
model.summary()

In [58]:
# Load the windsat dataset:
ds = windsat_datacube(windsat_folder)

# Process the dataset an select ascending passes
ascds = model_preprocess(ds)
ascds

# Convert lat and lon into coordinates

# # TODO: this is not necessary ... conver the values inside the coordinates
# def lonlat2grid(lon:float, lat:float):
#     """ 
#     using ascds lat and lon dvars, convert the coordinates into pixel location
#     """

ascds = ascds.assign_coords(
    {
        "latitude_grid": ascds.lat.values,
        "longitude_grid": ascds.lon.values
    }
)
ascds

Unnamed: 0,Array,Chunk
Bytes,83.06 MiB,0.99 MiB
Shape,"(21, 720, 1440)","(1, 360, 720)"
Dask graph,84 chunks in 133 graph layers,84 chunks in 133 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 83.06 MiB 0.99 MiB Shape (21, 720, 1440) (1, 360, 720) Dask graph 84 chunks in 133 graph layers Data type float32 numpy.ndarray",1440  720  21,

Unnamed: 0,Array,Chunk
Bytes,83.06 MiB,0.99 MiB
Shape,"(21, 720, 1440)","(1, 360, 720)"
Dask graph,84 chunks in 133 graph layers,84 chunks in 133 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.81 kiB,2.81 kiB
Shape,"(720,)","(720,)"
Dask graph,1 chunks in 100 graph layers,1 chunks in 100 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.81 kiB 2.81 kiB Shape (720,) (720,) Dask graph 1 chunks in 100 graph layers Data type float32 numpy.ndarray",720  1,

Unnamed: 0,Array,Chunk
Bytes,2.81 kiB,2.81 kiB
Shape,"(720,)","(720,)"
Dask graph,1 chunks in 100 graph layers,1 chunks in 100 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.62 kiB,5.62 kiB
Shape,"(1440,)","(1440,)"
Dask graph,1 chunks in 180 graph layers,1 chunks in 180 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 5.62 kiB 5.62 kiB Shape (1440,) (1440,) Dask graph 1 chunks in 180 graph layers Data type float32 numpy.ndarray",1440  1,

Unnamed: 0,Array,Chunk
Bytes,5.62 kiB,5.62 kiB
Shape,"(1440,)","(1440,)"
Dask graph,1 chunks in 180 graph layers,1 chunks in 180 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,83.06 MiB,450.00 kiB
Shape,"(21, 720, 1440)","(1, 240, 480)"
Dask graph,336 chunks in 351 graph layers,336 chunks in 351 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 83.06 MiB 450.00 kiB Shape (21, 720, 1440) (1, 240, 480) Dask graph 336 chunks in 351 graph layers Data type float32 numpy.ndarray",1440  720  21,

Unnamed: 0,Array,Chunk
Bytes,83.06 MiB,450.00 kiB
Shape,"(21, 720, 1440)","(1, 240, 480)"
Dask graph,336 chunks in 351 graph layers,336 chunks in 351 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,83.06 MiB,450.00 kiB
Shape,"(21, 720, 1440)","(1, 240, 480)"
Dask graph,336 chunks in 351 graph layers,336 chunks in 351 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 83.06 MiB 450.00 kiB Shape (21, 720, 1440) (1, 240, 480) Dask graph 336 chunks in 351 graph layers Data type float32 numpy.ndarray",1440  720  21,

Unnamed: 0,Array,Chunk
Bytes,83.06 MiB,450.00 kiB
Shape,"(21, 720, 1440)","(1, 240, 480)"
Dask graph,336 chunks in 351 graph layers,336 chunks in 351 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,83.06 MiB,450.00 kiB
Shape,"(21, 720, 1440)","(1, 240, 480)"
Dask graph,336 chunks in 351 graph layers,336 chunks in 351 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 83.06 MiB 450.00 kiB Shape (21, 720, 1440) (1, 240, 480) Dask graph 336 chunks in 351 graph layers Data type float32 numpy.ndarray",1440  720  21,

Unnamed: 0,Array,Chunk
Bytes,83.06 MiB,450.00 kiB
Shape,"(21, 720, 1440)","(1, 240, 480)"
Dask graph,336 chunks in 351 graph layers,336 chunks in 351 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,83.06 MiB,450.00 kiB
Shape,"(21, 720, 1440)","(1, 240, 480)"
Dask graph,336 chunks in 351 graph layers,336 chunks in 351 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 83.06 MiB 450.00 kiB Shape (21, 720, 1440) (1, 240, 480) Dask graph 336 chunks in 351 graph layers Data type float32 numpy.ndarray",1440  720  21,

Unnamed: 0,Array,Chunk
Bytes,83.06 MiB,450.00 kiB
Shape,"(21, 720, 1440)","(1, 240, 480)"
Dask graph,336 chunks in 351 graph layers,336 chunks in 351 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,166.11 MiB,900.00 kiB
Shape,"(21, 720, 1440)","(1, 240, 480)"
Dask graph,336 chunks in 309 graph layers,336 chunks in 309 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 166.11 MiB 900.00 kiB Shape (21, 720, 1440) (1, 240, 480) Dask graph 336 chunks in 309 graph layers Data type float64 numpy.ndarray",1440  720  21,

Unnamed: 0,Array,Chunk
Bytes,166.11 MiB,900.00 kiB
Shape,"(21, 720, 1440)","(1, 240, 480)"
Dask graph,336 chunks in 309 graph layers,336 chunks in 309 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,166.11 MiB,900.00 kiB
Shape,"(21, 720, 1440)","(1, 240, 480)"
Dask graph,336 chunks in 309 graph layers,336 chunks in 309 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 166.11 MiB 900.00 kiB Shape (21, 720, 1440) (1, 240, 480) Dask graph 336 chunks in 309 graph layers Data type float64 numpy.ndarray",1440  720  21,

Unnamed: 0,Array,Chunk
Bytes,166.11 MiB,900.00 kiB
Shape,"(21, 720, 1440)","(1, 240, 480)"
Dask graph,336 chunks in 309 graph layers,336 chunks in 309 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [21]:
# Load ISMN data
ismn_data = ISMN_Interface(ISMN_path, parallel=True)

Found existing ismn metadata in ..\..\data\raw\python_metadata\Data_separate_files_header_20170101_20171231_10665_0t0D_20240415.csv.


In [23]:
# Select the stations that have surface temperature
# Get the ids for the USCRN network and subset from the whole dataset.
ids = ismn_data.get_dataset_ids(
    variable=["surface_temperature"],
    filter_meta_dict={"network": "USCRN"},
)
uscrn_data = ismn_data.subset_from_ids(ids)
uscrn_data

Found existing ismn metadata in ..\..\data\raw\python_metadata\Data_separate_files_header_20170101_20171231_10665_0t0D_20240415.csv.


ismn.base.IsmnRoot Zip at ..\..\data\raw\Data_separate_files_header_20170101_20171231_10665_0t0D_20240415.zip
with Networks[Stations]:
------------------------
  USCRN: ['Aberdeen_35_WNW', 'Arco_17_SW', 'Asheville_13_S', 'Asheville_8_SSW', 'Austin_33_NW', 'Avondale_2_N', 'Baker_5_W', 'Batesville_8_WNW', 'Bedford_5_WNW', 'Blackville_3_W', 'Bodega_6_WSW', 'Boulder_14_W', 'Bowling_Green_21_NNE', 'Brigham_City_28_WNW', 'Bronte_11_NNE', 'Brunswick_23_S', 'Buffalo_13_ESE', 'Cape_Charles_5_ENE', 'Champaign_9_SW', 'Charlottesville_2_SSE', 'Chatham_1_SE', 'Chillicothe_22_ENE', 'Coos_Bay_8_SW', 'Cortez_8_SE', 'Corvallis_10_SSW', 'Crossville_7_NW', 'Darrington_21_NNE', 'Denio_52_WSW', 'Des_Moines_17_E', 'Dillon_18_WSW', 'Dinosaur_2_E', 'Durham_11_W', 'Durham_2_N', 'Durham_2_SSW', 'Edinburg_17_NNE', 'Elgin_5_S', 'Elkins_21_ENE', 'Everglades_City_5_NE', 'Fairhope_3_NE', 'Fallbrook_5_NE', 'Gadsden_19_N', 'Gaylord_9_SSW', 'Goodridge_12_NNW', 'Goodwell_2_E', 'Goodwell_2_SE', 'Harrison_20_SSE', 'Holly_

In [25]:
# Extract the longitude and latitude of the sensors in the uscrn network
valid_sensors = []
for network, station, sensor in uscrn_data.collection.iter_sensors(depth=[0, 0]):
    if (
        np.min(sensor.data["surface_temperature"]) > -100
    ):  # Some data will have badly recovered LST
        valid_sensors.append((network, station, sensor))

len(valid_sensors)

  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = 

113

In [49]:
# Retrieve the data into a dataframe

data_list = []
for network, station, sensor in valid_sensors[:] : 

    sensor_obj = uscrn_data[network.name][station.name][sensor.name]
    metadata = sensor_obj.metadata.to_pd()

    sensor_lat = metadata["latitude"]["val"]
    sensor_lon = metadata["longitude"]["val"]

    data_list.append(
        (
            sensor_obj.data,
            (sensor_lon,sensor_lat)
        )
    )

data_list[0]

  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = lambda f: pd.read_csv(
  readf = 

(                     surface_temperature  ... surface_temperature_orig_flag
 date_time                                 ...                              
 2017-01-01 00:00:00                 -6.8  ...                             0
 2017-01-01 01:00:00                 -8.5  ...                             0
 2017-01-01 02:00:00                 -8.9  ...                             0
 2017-01-01 03:00:00                 -8.7  ...                             0
 2017-01-01 04:00:00                 -8.9  ...                             0
 ...                                  ...  ...                           ...
 2017-12-30 20:00:00                -22.4  ...                             0
 2017-12-30 21:00:00                -21.7  ...                             0
 2017-12-30 22:00:00                -22.9  ...                             0
 2017-12-30 23:00:00                -23.7  ...                             0
 2017-12-31 00:00:00                -24.6  ...                             0

In [50]:
# sensor_obj = uscrn_data[network.name][station.name][sensor.name]
# sensor_lat = sensor_obj.metadata.to_pd()["latitude"]["val"]
# sensor_lon = sensor_obj.metadata.to_pd()["longitude"]["val"]
# print(sensor_lat,sensor_lon)

In [63]:
# Testing with a single sensor
sensor_data, sensor_coords = data_list[0]
subset = ascds.sel(longitude_grid = sensor_coords[0], latitude_grid =sensor_coords[1], method="nearest" )

ValueError: index must be monotonic increasing or decreasing