# Description

[WORK IN PROGRESS] This notebook is used to define and test a custom Python class called LocationEvaluator which can be used to retrieve archived data (or run inference if necessary) for a certain initial time and number of timesteps. The object that is returned will include the predictions, ACC, RMSE for a given set of variables for a given set of locations. 

# Setup

In [None]:
# Install required dependencies (Wall time: 3min 15s) 
!pip install torch
!pip install torchvision
!pip install wandb
!conda install ruamel.yaml -y
!pip install timm
!pip install einops
!pip install apex
!pip install zarr

In [74]:
import s3fs
import h5py
import numpy as np
import matplotlib.pyplot as plt
import subprocess
import zarr

In [75]:
# Set up S3 bucket file system
s3 = s3fs.S3FileSystem()

## ZARR files

In [76]:
z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4')
z

<zarr.core.Array (10000, 10000) int32>

## H5 files

In [77]:
baseline_path = '../override_dir/baselines/baseline_30-timesteps_vis.h5'
f = h5py.File(baseline_path, 'r')
pred = f['predicted']
print(type(pred.attrs.items()))
print(pred.attrs.keys())
f.close()

<class 'h5py._hl.base.ItemsViewHDF5'>
<KeysViewHDF5 []>


In [45]:
inf_data_path = 's3://climate-ai-data-science-sandbox-fourcastnet/data/out_of_sample/2018.h5'
valid_data_full = h5py.File(s3.open(inf_data_path, 'rb'), 'r')
print(valid_data_full.keys())
valid_data_small = valid_data_full['fields']
print(valid_data_small.attrs.keys())

<KeysViewHDF5 ['fields', 'small']>
<KeysViewHDF5 []>


# Location-based Evaluation Tool

In [54]:
def latlon_to_rowcol(coords):
    lat, lon = coords
    mid_lat =  720//2
    mid_lon = 1440//2
    
    row = mid_lat - lat*4  # each pixel is 0.25°
    col = lon*4 if lon>=0 else 1440+lon*4
    print(f'Lat of {lat} => row {row}')
    print(f'Lon of {lon} => col {col}')
    return row, col

lat, lon = -30, 40
coords = (lat, lon)
latlon_to_rowcol(coords)

Lat of -30 => row 480
Lon of 40 => col 160


In [67]:
class LocationEvaluator:
    """Runs global inference using NVIDIA's FourCastNet, stores location-specific predictions.
    
    Parameters:
    -----------
    locations : list of tuples of ints
        The locations being predicted (y-axis).
    
    """
    
    def __init__(self, locations, prediction_length, predict_variables=[]):
        self.locations = locations
        self.prediction_length = prediction_length
        self.all_vars = ['U10', 'V10', 'T2m', 'sp', 'mslp', 'U1000', 'V1000', 'Z1000', 'T850', 'U850', 'V850', 'Z850', 'RH850', 'T500', 'U500', 'V500', 'Z500', 'RH500', 'Z50', 'TCWV']
        self.predict_variables = self.all_vars if predict_variables == [] else predict_variables
        self.idx2var_dict = {0: 'U10', 1: 'V10', 2: 'T2m', 3: 'sp', 4: 'mslp', 5: 'U1000', 6: 'V1000', 7: 'Z1000', 8: 'T850', 9: 'U850', 10: 'V850', 11: 'Z850', 12: 'RH850', 13: 'T500', 14: 'U500', 15: 'V500', 16: 'Z500', 17: 'RH500', 18: 'Z50', 19: 'TCWV'}
        self.var2idx_dict = {'U10': 0, 'V10': 1, 'T2m': 2, 'sp': 3, 'mslp': 4, 'U1000': 5, 'V1000': 6, 'Z1000': 7, 'T850': 8, 'U850': 9, 'V850': 10, 'Z850': 11, 'RH850': 12, 'T500': 13, 'U500': 14, 'V500': 15, 'Z500': 16, 'RH500': 17, 'Z50': 18, 'TCWV': 19}
        self.file_path = '../override_dir/baselines/baseline_30-timesteps_vis.h5'
        
    def __str__(self):
        return f"Locations          : {self.locations}\nPrediction length  : {self.prediction_length}\nVariables          : {self.predict_variables}"
    
    def archive_exists(self, file_path):
        try:
            f = h5py.File(file_path, 'r')
            print("Archived run exists. Closing file....")
            self.file_path = file_path
            f.close()
            return True
        except FileNotFoundError:
            print('Archived run does not exist.')
            return False
            
    def run_inference(self, h5_output_path, zarr_output_path):
        pass
    
    def extract_data(self):
        """Take data from the H5 file and store relevant predictions in 
        """
        # f = h5py.File(self.file_path, 'r')
        
        for lat, lon in self.locations:
            row, col = latlon_to_rowcol((lat, lon))
        
        return
    

In [68]:
# Example run of LocationEvaluator
locations = [(37, 40)] # start with just one prediction location
predict_variables = ['U10', 'T2m']
prediction_length = 30
baseline_path = '../override_dir/baselines/baseline_30-timesteps_vis.h5'

# TODO: start time as meta data 
# start_time = ?


In [69]:
loc = LocationEvaluator(locations, prediction_length, predict_variables)

In [70]:
loc.extract_data()

Lat of 37 => row 212
Lon of 40 => col 160


TypeError: cannot unpack non-iterable NoneType object

In [17]:
print(loc)

Locations          : [(37, 40)]
Prediction length  : 30
Variables          : ['U10', 'T2m']
