In [32]:
import numpy as np # linear algebra
import pandas as pd
import torch
import os
import ml_eke
from tqdm import tqdm

In [33]:
def get_samples(start, end, columns, model_data, predictands=['EKE_sfc']):

    samples = []
    targets = []
    for sample in tqdm(range(start, end), desc="Extracting samples"):
        X, Y, mask = model_data.extract_sample_from_time(predictors=columns, predictands=predictands, sample_idx=sample)
        samples.append(pd.DataFrame(X.data, columns=columns))
        targets.append(pd.DataFrame(Y.data, columns=["EKE"]))

    data = pd.concat(samples).reset_index()
    data.drop("index", inplace=True, axis=1)
    targets = pd.concat(targets).reset_index()
    targets.drop("index", inplace=True, axis=1)
    data["EKE"] = targets["EKE"]
    
    return data


In [46]:
columns_6_features = ['MKE_sfc',
                      'slope_z',
                      'Rd_dx_z',
                      'relative_vorticity_sfc',
                      'divergence_sfc',
                      'deformation_sfc'
                      ]

columns_4_features = columns_6_features[0:4]

columns_5_features = columns_4_features + ['grad_SSH_sfc']

columns = columns_4_features

model_path = './ml_eke/nn/pytorch/trained_models/ResNetSmall_4_customuncapped_cf_all_4_feat.pkl'
model_name = os.path.basename(model_path).split('.')[0]

model_mse = torch.load(model_path, map_location=torch.device('cpu'))

datapaths_2_3 = ('/lus/scratch/ashao/data/2_3/', '/lus/scratch/ashao/data/2_3_SSH/')
first_suffixes_2_3 = ('_01_001.nc', '_16_001.nc')
datapaths_1_4 = ('/lus/scratch/ashao/data/1_4/', '/lus/scratch/ashao/data/1_4_SSH/')
first_suffixes_1_4 = ('_1915_001.nc', '_18_001.nc')

datapaths = datapaths_2_3
first_suffixes = first_suffixes_2_3

model_data = ml_eke.pop_data(datapaths[0], datapaths[0], skip_vars = ['x','y','depth','depth_stdev'], extra_pref=None, first_suffix=first_suffixes[0])
model_data.extend_inventory(datapaths[1],first_suffix=first_suffixes[1])

X_train = np.load('./ml_eke/nn/data/X_train_cf_all_4_feat.npy')
y_train = np.load('./ml_eke/nn/data/y_train_cf_all_4_feat.npy')

num_samples = 365 # one year should be enough.
dataset = get_samples(0, num_samples+1, columns, model_data, predictands=['MEKE_z'])

Extracting samples: 100%|██████████| 366/366 [00:13<00:00, 27.62it/s]


In [58]:
feats = dataset.values.copy()[:, 0:len(columns)]

feats[:,[0,1,3]] = np.log(np.abs(feats[:,[0,1,3]]))*np.sign(feats[:,[0,1,3]])+36.0*np.sign(feats[:,[0,1,3]])

feat_avg = np.mean(feats, axis=0, dtype=np.float64)
feat_sd  = np.std(feats, axis=0, dtype=np.float64)

targ_avg = np.mean(y_train, dtype=np.float64)
targ_sd  = np.std(y_train, dtype=np.float64)

In [None]:
chunk_size = 1000
preds = np.zeros((feats.shape[0],1))

for chunk in tqdm(range(feats.shape[0]//chunk_size)):
    loc_feats = torch.tensor(feats[chunk*chunk_size:(chunk+1)*chunk_size])
    preds[chunk*chunk_size:(chunk+1)*chunk_size] = model_mse(loc_feats).detach().numpy()
    
if feats.shape[0]%chunk_size != 0:
    chunk = feats.shape[0]//chunk_size
    loc_feats = torch.tensor(feats[chunk*chunk_size:])
    preds[chunk*chunk_size:] = model_mse(loc_feats).detach().numpy()
    

 58%|█████▊    | 29962/51847 [13:54<10:08, 35.95it/s]

In [None]:
pred_avg = np.mean(preds, dtype=np.float64)
pred_sd  = np.std(preds, dtype=np.float64)

In [None]:
print(pred_avg, targ_avg, feat_avg)
print(pred_sd,  targ_sd,  feat_sd)

In [64]:
# torchscript
model_mse.eval()

class InferenceCell(torch.nn.Module):
    
    @torch.no_grad()
    def __init__(self):
        super(InferenceCell, self).__init__()
        self.model = model_mse

        self.feat_avg = torch.tensor([feat_avg], dtype=torch.float32)
        self.feat_sd = torch.tensor([feat_sd], dtype=torch.float32)
        self.targ_avg = torch.tensor([targ_avg], dtype=torch.float32)
        self.targ_sd = torch.tensor([targ_sd], dtype=torch.float32)
        self.pred_avg = torch.tensor([pred_avg], dtype=torch.float32)
        self.pred_sd = torch.tensor([pred_sd], dtype=torch.float32)
        
        
    @torch.no_grad()
    def forward(self, x):
        x[:,[0,1,3]] = torch.log(torch.abs(x[:,[0,1,3]]))*torch.sign(x[:,[0,1,3]])+36.0*torch.sign(x[:,[0,1,3]])
        
        x = (x - self.feat_avg) / self.feat_sd
        print(x, 'in')
        x = self.model(x)
        print(x, 'post-model')
        x = (x-self.pred_avg)/self.pred_sd*self.targ_sd+self.targ_avg
        return x

inference_cell = InferenceCell()
x = torch.tensor(dataset.values.copy()[0:2,0:len(columns)])
print(f'{x} out')
traced_cell = torch.jit.trace(inference_cell, (x))
#print(traced_cell.graph)
x = torch.tensor(dataset.values.copy()[0:4,0:len(columns)])
print(f'{x} out')
print("return: ", traced_cell(x))
traced_cell.save(f'./ml_eke/nn/pytorch/trained_models/{model_name}_for_2_3.pt')

tensor([[9.0608e-03, 8.4608e-04, 1.0457e-01, 1.8002e-06],
        [1.1456e-02, 7.3167e-04, 1.0674e-01, 3.2575e-07]]) out
tensor([[ 0.2169, -0.3312, -0.7533,  1.0755],
        [ 0.3542, -0.3975, -0.7505,  0.9942]]) in
tensor([[-15.0814],
        [-13.3178]]) post-model
tensor([[ 4.9864,  4.4309, -0.7533,  1.8533],
        [ 4.9908,  4.4286, -0.7505,  1.8496]]) in
tensor([[-17.2108],
        [-17.2354]]) post-model
tensor([[ 4.9864,  4.4309, -0.7533,  1.8533],
        [ 4.9908,  4.4286, -0.7505,  1.8496]]) in
tensor([[-17.2108],
        [-17.2354]]) post-model
tensor([[9.0608e-03, 8.4608e-04, 1.0457e-01, 1.8002e-06],
        [1.1456e-02, 7.3167e-04, 1.0674e-01, 3.2575e-07],
        [1.2707e-02, 6.2016e-04, 1.0481e-01, 2.1509e-07],
        [1.3541e-02, 4.3923e-04, 1.0443e-01, 2.0890e-07]]) out
return:  tensor([[-16.1620],
        [-14.1003],
        [-13.2681],
        [-11.7515]], grad_fn=<AddBackward0>)
