# Importing packages

In [1]:
import numpy as np
import xarray as xr
import math
from tqdm import tqdm
import scipy.integrate as integrate

# Loading training data

In [2]:
with open('training/training_data/train_input.npy', 'rb') as f:
    train_input = np.load(f)

with open('training/training_data/train_target.npy', 'rb') as f:
    train_target = np.load(f)

In [3]:
print(train_input.shape)
print(train_target.shape)

(17927168, 64)
(17927168, 60)


In [4]:
bias = np.ones((train_input.shape[0], 1))
train_input = np.concatenate((train_input, bias), axis=1)
train_input.shape

(17927168, 65)

# Train MLR model

In [5]:
train_input_transpose = np.transpose(train_input)
X_transpose_X = train_input_transpose@train_input
X_transpose_X_inverse = np.linalg.inv(X_transpose_X)


In [12]:
X_transpose_X_inverse.shape

(65, 65)

In [6]:
X_transpose_X.shape

(65, 65)

In [7]:
train_input_transpose.shape

(65, 17927168)

In [8]:
train_target.shape

(17927168, 60)

In [10]:
X_transpose_y = train_input_transpose@train_target

In [11]:
X_transpose_y.shape

(65, 60)

In [13]:
mlr_weights = X_transpose_X_inverse@X_transpose_y

In [14]:
mlr_weights.shape

(65, 60)

In [16]:
mlr_pred = train_input@mlr_weights

In [17]:
mlr_pred.shape

(17927168, 60)

In [20]:
print("training mse")
np.mean((train_target-mlr_pred)**2)

training mse


0.0020987695984448136

# Loading testing data

In [21]:
subsampling = 3
def sample_indices(size, spacing, fixed = True):
    numIndices = np.round(size/spacing)
    if fixed:
        indices = np.array([int(x) for x in np.round(np.linspace(1,size,int(numIndices)))])-1
    else:
        indices = list(range(size))
        np.random.shuffle(indices)
        indices = indices[0:int(numIndices)]
    return indices

data_path = "offlinetesteval/testing_data/"
norm_path = "offlinetesteval/norm_files/"

num_models = 330

inpsub = np.loadtxt(norm_path + "inp_sub.txt")
inpdiv = np.loadtxt(norm_path + "inp_div.txt")

heatScale = 1004
moistScale = 2.5e6
outscale = np.concatenate((np.repeat(heatScale, 30), np.repeat(moistScale, 30)))

with open(data_path + 'test_input.npy', 'rb') as f:
    test_input = np.load(f)[:,sample_indices(336, subsampling),:,:]
    
with open(data_path + 'test_target.npy', 'rb') as f:
    test_target = np.load(f)[:,sample_indices(336, subsampling),:,:]
    
assert test_input.shape[1]==test_target.shape[1]

timesteps = test_input.shape[1]
    
nn_input = (test_input-inpsub[:,np.newaxis,np.newaxis,np.newaxis])/inpdiv[:,np.newaxis,np.newaxis,np.newaxis]

spData = xr.open_mfdataset(["/ocean/projects/atm200007p/jlin96/longSPrun_o3/AndKua_aqua_Base_training.cam2.h1.0001-01-19-00000.nc", \
                            "/ocean/projects/atm200007p/jlin96/longSPrun_o3/AndKua_aqua_Base_training.cam2.h1.0001-01-20-00000.nc"], \
                            decode_times = False)

assert test_input.shape[1] == test_target.shape[1]

#Creating mass weights
def createPressureGrid(h1Data):
    hyam = np.array(h1Data["hyam"])
    hybm = np.array(h1Data["hybm"])
    ps = np.array(h1Data["NNPS"])
    lats = np.array(h1Data["lat"])
    lons = np.array(h1Data["lon"])
    levs = 30
    times = np.array(range(len(ps)))
    pressureGrid = np.zeros([len(times), 30, len(lats), len(lons)])
    for t in range(len(times)):
        for lat in range(len(lats)):
            for lon in range(len(lons)):
                pressureGrid[t, :, lat, lon]  = hyam[t]*1e5 + ps[t][lat][lon]*hybm[t]
    return np.diff(pressureGrid, axis = 1)
pressures = np.mean(createPressureGrid(spData), axis = 0)#[11:29]
mass_weights = pressures/sum(pressures.flatten())

#Creating area weights
r = 6371
def integrand(t):
    return math.sin(t)

def surfArea(lat1, lat2, lon1, lon2):
    lat1 = lat1 + 90
    lat2 = lat2 + 90
    lat1 = min(lat1,lat2)*math.pi/180
    lat2 = max(lat1, lat2)*math.pi/180
    lons = (max(lon1, lon2) - min(lon1, lon2))*math.pi/180
    a = integrate.quad(integrand, lat1, lat2)
    #max error is 2nd arg for a
    return lons*r*r*a[0]

# Longitudes are equidistant so we can simplify surfArea
def weight_area(lat1, lat2):
    lat1 = lat1 + 90
    lat2 = lat2 + 90
    lat1 = min(lat1,lat2)*math.pi/180
    lat2 = max(lat1, lat2)*math.pi/180
    weight = integrate.quad(integrand, lat1, lat2)
    return weight[0]

lats = np.array(spData["lat"])
assert(90+lats[0]==90-lats[63])
last_lat_mdiff = 90+lats[0]
lat_mdiff = np.diff(lats)/2
lat_buff = np.append(lat_mdiff, last_lat_mdiff)
lat_edges = lat_buff + lats
lat_edges = np.append(-90, lat_edges)
area_weights = []
for i in range(len(lats)):
    area_weights.append(weight_area(lat_edges[i],lat_edges[i+1]))
area_weights = np.array(area_weights)
area_weights = area_weights[np.newaxis,:,np.newaxis]

error_weights = area_weights * pressures
error_weights = error_weights/sum(error_weights.flatten())

In [27]:
nn_input.shape

(64, 112, 64, 128)

In [24]:
test_target.shape

(60, 112, 64, 128)

In [29]:
mlr_input = np.reshape(nn_input, (64, -1)).transpose()
bias = np.ones((mlr_input.shape[0], 1))
mlr_input = np.concatenate((mlr_input, bias), axis=1)
mlr_input.shape

(917504, 65)

In [30]:
mlr_pred_test = mlr_input@mlr_weights

In [31]:
mlr_pred_test.shape

(917504, 60)

In [33]:
outscale = np.concatenate((np.repeat(heatScale, 30), np.repeat(moistScale, 30)))[:, np.newaxis]
outscale.shape

(60, 1)

In [41]:
mlr_pred_test = mlr_pred_test.transpose()/outscale
mlr_pred_test.shape

(60, 917504)

In [43]:
mlr_pred_test = np.reshape(mlr_pred_test, (60, 112, 64, 128))
mlr_pred_test.shape

(60, 112, 64, 128)

In [44]:
def squared_error(prediction, target):
    se = (prediction-target)**2
    se_T = np.mean(se[0:30,:,:,:], axis = 1)
    se_Q = np.mean(se[30:60,:,:,:], axis = 1)
    return se_T, se_Q

def weight_error(se):
    # return se[12:30]*error_weights
    return se*error_weights

def root_error(wse):
    return np.sum(wse)**.5

def get_rmse(prediction, target):
    se_T, se_Q = squared_error(prediction, target)
    rmse_T = root_error(weight_error(se_T))
    rmse_Q = root_error(weight_error(se_Q))
    return rmse_T, rmse_Q

In [45]:
get_rmse(mlr_pred_test, test_target)

(4.663343097627819e-05, 3.1025109112159054e-08)