# Importing packages

In [1]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import os
import re
from sklearn.model_selection import train_test_split
from tqdm import tqdm

np.random.seed(2022)

# Relhum conversion functions

In [2]:
def eliq(T):
    a_liq = np.float32(np.array([-0.976195544e-15,-0.952447341e-13,\
                                 0.640689451e-10,\
                      0.206739458e-7,0.302950461e-5,0.264847430e-3,\
                      0.142986287e-1,0.443987641,6.11239921]));
    c_liq = np.float32(-80.0)
    T0 = np.float32(273.16)
    return np.float32(100.0)*np.polyval(a_liq,np.maximum(c_liq,T-T0))

def eice(T):
    a_ice = np.float32(np.array([0.252751365e-14,0.146898966e-11,0.385852041e-9,\
                      0.602588177e-7,0.615021634e-5,0.420895665e-3,\
                      0.188439774e-1,0.503160820,6.11147274]));
    c_ice = np.float32(np.array([273.15,185,-100,0.00763685,0.000151069,7.48215e-07]))
    T0 = np.float32(273.16)
    return np.where(T>c_ice[0],eliq(T),\
                   np.where(T<=c_ice[1],np.float32(100.0)*(c_ice[3]+np.maximum(c_ice[2],T-T0)*\
                   (c_ice[4]+np.maximum(c_ice[2],T-T0)*c_ice[5])),\
                           np.float32(100.0)*np.polyval(a_ice,T-T0)))

def esat(T):
    T0 = np.float32(273.16)
    T00 = np.float32(253.16)
    omtmp = (T-T00)/(T0-T00)
    omega = np.maximum(np.float32(0.0),np.minimum(np.float32(1.0),omtmp))
    return np.where(T>T0,eliq(T),np.where(T<T00,eice(T),(omega*eliq(T)+(1-omega)*eice(T))))

def RH(T,qv,P0,PS,hyam,hybm):
    R = np.float32(287.0)
    Rv = np.float32(461.0)
    p = P0 * hyam + PS[:, None] * hybm # Total pressure (Pa)
    
    T = np.float32(T)
    qv = np.float32(qv)
    p = np.float32(p)
    
    return Rv*p*qv/(R*esat(T))

# Data processing functions

In [5]:
def doMonth(month):
    datasets = !ls
    n = str(month)
    datasets = [x for x in datasets if "h1.0000-" + n.zfill(2) in x]
    return xr.open_mfdataset(datasets)

def make_nninput(month, family = "relative"):
    spData = doMonth(month)
    print("read in data")
    nntbp = np.float32(spData["NNTBP"].values)
    nnqbp = np.float32(spData["NNQBP"].values)
    p0 = np.float32(spData["P0"].values)
    ps = np.float32(spData["NNPS"].values)
    hyam = np.float32(spData["hyam"].values)
    hybm = np.float32(spData["hybm"].values)
    relhum = np.float32(spData["RELHUM"].values)
    tphystnd = np.float32(spData["TPHYSTND"].values)
    phq = np.float32(spData["PHQ"].values)

    p0 = np.float32(np.array(list(set(p0))))
    print("loaded in data")
    newhum = np.float32(np.zeros((spData["time"].shape[0],\
                                  spData["lev"].shape[0], \
                                  spData["lat"].shape[0], \
                                  spData["lon"].shape[0])))
    lats = spData["lat"]
    lons = spData["lon"]
    print("starting for loop")
    for i in tqdm(range(len(lats))):
        for j in range(len(lons)):
            latIndex = i
            lonIndex = j
            R = np.float32(287.0)
            Rv = np.float32(461.0)
            p = np.float32(p0 * hyam + ps[:, None, latIndex, lonIndex] * hybm) # Total pressure (Pa)

            T = np.float32(nntbp[:, :, latIndex, lonIndex])
            qv = np.float32(nnqbp[:, :, latIndex, lonIndex])
            p = np.float32(p)
            newhum[:,:, latIndex, lonIndex] = np.float32(Rv*p*qv/(R*esat(T)))
    
    nntbp = np.float32(np.moveaxis(nntbp[1:,:,:,:],0,1))
    print("nntbp")
    print(nntbp.shape)
    
    nnqbp = np.float32(np.moveaxis(nnqbp[1:,:,:,:],0,1))
    print("nnqbp")
    print(nnqbp.shape)
    
    lhflx = np.float32(spData["LHFLX"].values[np.newaxis,:-1,:,:])
    print("lhflx")
    print(lhflx.shape)
    
    shflx = np.float32(spData["SHFLX"].values[np.newaxis,:-1,:,:])
    print("shflx")
    print(shflx.shape)
    
    ps = np.float32(spData["NNPS"].values[np.newaxis,1:,:,:])
    print("ps")
    print(ps.shape)
    
    solin = np.float32(spData["SOLIN"].values[np.newaxis,1:,:,:])
    print("solin")
    print(solin.shape)
    
    newhum = np.float32(np.moveaxis(newhum[1:,:,:,:],0,1))
    print("newhum")
    print(newhum.shape)
    
    oldhum = np.float32(np.moveaxis(relhum[1:,:,:,:],0,1))
    print("oldhum")
    print(oldhum.shape)
    
    tphystnd = np.float32(np.moveaxis(tphystnd[1:,:,:,:],0,1))
    print("tphystnd")
    print(tphystnd.shape)
    
    phq = np.float32(np.moveaxis(phq[1:,:,:,:],0,1))
    print("phq")
    print(phq.shape)
        
    if family == "specific":
        nnInput = np.float32(np.concatenate((nntbp, \
                                             nnqbp, \
                                             ps, \
                                             solin, \
                                             shflx, \
                                             lhflx)))
        
        nnTarget = np.float32(np.concatenate((tphystnd, phq)))
        
        nnInput = nnInput[:,:-1,:,:]
        nnTarget = nnTarget[:,:-1,:,:]
    
    elif family == "relative":
        nnInput = np.float32(np.concatenate((nntbp, \
                                         newhum, \
                                         ps, \
                                         solin, \
                                         shflx, \
                                         lhflx)))
                             
        nnTarget = np.float32(np.concatenate((tphystnd, phq)))
        
        nnInput = nnInput[:,:-1,:,:]
        nnTarget = nnTarget[:,:-1,:,:]
    
    print("nnInput")
    nnInput.shape
    
    print("nnTarget")
    nnTarget.shape
    
    errors = (newhum-oldhum/100).flatten()
    result = "Mean relative humidity conversion error: " + str(np.mean(errors)) + "\n"
    result = result + "Variance for relative humidity conversion error: " + str(np.var(errors)) + "\n"
    result = result + "nntbp.shape: " + str(nntbp.shape) + "\n"
    result = result + "nnqbp.shape: " + str(nnqbp.shape) + "\n"
    result = result + "lhflx.shape: " + str(lhflx.shape) + "\n"
    result = result + "shflx.shape: " + str(shflx.shape) + "\n"
    result = result + "ps.shape: " + str(ps.shape) + "\n"
    result = result + "solin.shape: " + str(solin.shape) + "\n"
    result = result + "newhum.shape: " + str(newhum.shape) + "\n"
    result = result + "oldhum.shape: " + str(oldhum.shape) + "\n"
    result = result + "tphystnd.shape: " + str(tphystnd.shape) + "\n"
    result = result + "phq.shape: " + str(phq.shape) + "\n"
    result = result + "nnInput.shape: " + str(nnInput.shape) + "\n"
    print(result)

    diagnostics = 'diagnostics' + suffix + '.txt'
    with open(diagnostics, 'a') as fp:
        fp.write(result)
    
    return nnInput, nnTarget
                         
def sampleIndices(size, spacing, fixed = True):
    numIndices = np.round(size/spacing)
    if fixed:
        indices = np.array([int(x) for x in np.round(np.linspace(1,size,int(numIndices)))])-1
    else:
        indices = list(range(size))
        np.random.shuffle(indices)
        indices = indices[0:int(numIndices)]
    return indices

def reshapeInput(nnData, subsample = False, spacing = 5):
    if subsample:
        nnData = nnData[:,:,:,sampleIndices(nnData.shape[3], spacing, True)]
    nnData = nnData.ravel(order = 'F').reshape(64,-1,order = 'F')
    return nnData

def reshapeTarget(nnData, subsample = False, spacing = 5):
    if subsample:
        nnData = nnData[:,:,:,sampleIndices(nnData.shape[3], spacing, True)]
    nnData = nnData.ravel(order = 'F').reshape(60,-1,order = 'F')
    return nnData



In [54]:
hmm = make_nninput_relative(11)

read in data
loaded in data
starting for loop


100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [00:04<00:00, 14.07it/s]


nntbp
(30, 95, 64, 128)
nnqbp
(30, 95, 64, 128)
lhflx
(1, 95, 64, 128)
shflx
(1, 95, 64, 128)
ps
(1, 95, 64, 128)
solin
(1, 95, 64, 128)
newhum
(30, 95, 64, 128)
oldhum
(30, 95, 64, 128)
tphystnd
(30, 95, 64, 128)
phq
(30, 95, 64, 128)
nnInput
Mean relative humidity conversion error: 0.0041737375
Variance for relative humidity conversion error: 0.00046795444
nntbp.shape: (30, 95, 64, 128)
nnqbp.shape: (30, 95, 64, 128)
lhflx.shape: (1, 95, 64, 128)
shflx.shape: (1, 95, 64, 128)
ps.shape: (1, 95, 64, 128)
solin.shape: (1, 95, 64, 128)
newhum.shape: (30, 95, 64, 128)
oldhum.shape: (30, 95, 64, 128)
tphystnd.shape: (30, 95, 64, 128)
phq.shape: (30, 95, 64, 128)
nnInput.shape: (184, 94, 64, 128)



In [55]:
hmm.shape

(184, 94, 64, 128)

In [76]:
wot = shrinkArray(hmm, 250)

In [77]:
wot.shape

(184, 6016)

In [59]:
datasets = []
for arr in tqdm(files):
    datasets.append(shrinkArray(nnData, 5))
combinedData = np.concatenate(datasets, axis = 1)

(184, 72192)

# run the code

In [None]:
#saveNNInput(1)

In [None]:
#saveNNInput(2)

In [None]:
#saveNNInput(3)

In [None]:
#saveNNInput(4)

In [None]:
#saveNNInput(5)