In [1]:
import tensorflow as tf
import numpy as np
from netCDF4 import Dataset
import os
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import datetime as dt
from sklearn.model_selection import train_test_split

In [2]:
#set the paths
data_path = '/Users/justinhudson/Documents/HW/ATS_780/Homework_2/DATA/'
figure_path = '/Users/justinhudson/Documents/HW/ATS_780/Homework_2/FIGURES/'
root = '/Users/justinhudson/Documents/HW/ATS_780/Homework_2/CODE'

In [3]:
# set the seed
rand_seed = 144
np.random.seed(rand_seed)
tf.random.set_seed(rand_seed)

### DATA PREPARATION

In [4]:
#bring in the data
os.chdir(data_path)
e5data = Dataset('ERA5_200hpa_U_V_relVort_Colorado_2021_2022.nc')
u = e5data.variables['u'][:]
v = e5data.variables['v'][:]
vort = e5data.variables['vo'][:]
lats = e5data.variables['latitude'][:]
lons = e5data.variables['longitude'][:]
time = e5data.variables['time'][:]
#convert times to usable dates
ref_date = dt.datetime(1900,1,1)
dates = np.array( [ ref_date + dt.timedelta(hours = int(t)) for t in time ] )

In [15]:
# normalize the data
def normalize_data(dataset:np.ndarray) -> np.ndarray:
    '''
        Normalizes data so that it is on the range 0-1 and then returns
        the normalized data.

        Inputs:
            dataset (np.ndarray): The dataset to be normalized
        
        Outputs:
            norm_data (np.ndarray): The normalized data
    '''

    data_min = np.nanmin(dataset)
    data_max = np.nanmax(dataset)

    norm_data = (dataset[:] - data_min) / (data_max - data_min)

    return norm_data, data_min, data_max

def denormalize_data(normed_data:np.ndarray,orig_min:float,orig_max:float) -> np.ndarray:
    '''
        De-normalizes a dataset using the min and max from the
        original dataset.

        Inputs:
            normed_data (np.ndarray): The dataset to be de-normalized, should
                have a range of [0-1]
            orig_min (float): The original minima of the dataset which was
                used to normalize it
            orig_max (float): The original maxima of the dataset which was
                used to normalize it
        
        Returns:
            denormed_data (np.ndarray): The denormalized dataset which should
                have a range of [orig_min,orig_max]
    '''

    denormed_data = (normed_data * (orig_max - orig_min)) + orig_min

    return denormed_data

u_norm,u_min,u_max = normalize_data(u)
v_norm,v_min,v_max = normalize_data(v)
vort_norm,vort_min,vort_max = normalize_data(vort)

In [16]:
# Combine u and v such that they are images with 2 "channels"
uv_data = np.empty((u.shape[0],u.shape[1],u.shape[2],2))
uv_data[:,:,:,0] = u_norm[:]
uv_data[:,:,:,1] = v_norm[:]
# uv_data is a 4D array of shape (time,lat,lon,channel)

In [17]:
# Split the data in testing/validation/training
# 10% will be testing
# 20% of the remainder will be validation
# the remainder of that will be training data


x_train,x_test,y_train,y_test = train_test_split(uv_data,vort_norm,test_size=0.1,
                                                 shuffle=True, random_state= rand_seed)
#now split off the validation from the training
x_train,x_val,y_train,y_val = train_test_split(x_train,y_train,test_size=0.2,
                                               shuffle = True,random_state= rand_seed)

# print the shapes to verify things look right
print("TRAINING, VALIDATION, AND TESTING SHAPES:")
print(f'XTRAIN: {x_train.shape}, XVAL: {x_val.shape}, XTEST: {x_test.shape}')
print(f'YTRAIN: {y_train.shape}, YVAL: {y_val.shape}, YTEST: {y_test.shape}')

TRAINING, VALIDATION, AND TESTING SHAPES:
XTRAIN: (12614, 25, 41, 2), XVAL: (3154, 25, 41, 2), XTEST: (1752, 25, 41, 2)
YTRAIN: (12614, 25, 41), YVAL: (3154, 25, 41), YTEST: (1752, 25, 41)


### Custom Loss Functions Are Defined Here

We want our loss function to be done pixel wise because the goal is to recreate the entire relative vorticity field.