# Ulmo LL on DINEOF

In [28]:
# imports
import os
import glob
import numpy as np

import torch
import h5py

import pickle

from tqdm.auto import tqdm

from ulmo import io as ulmo_io
from ulmo.preproc import utils as pp_utils
from ulmo.plotting import plotting
from ulmo.models import autoencoders, ConditionalFlow
from ulmo import ood

# Load data

## Table

In [12]:
tbl_path = os.path.join(os.getenv('OS_OGCM'), 'LLC', 'Enki', 'Tables')
tbl_file = os.path.join(tbl_path, 'Enki_LLC_DINOEF.parquet')
dineof_tbl = ulmo_io.load_main_table(tbl_file)

Read main table: /media/xavier/crucial/Oceanography/OGCM/LLC/Enki/Tables/Enki_LLC_DINOEF.parquet


In [14]:
dineof_tbl.head()

Unnamed: 0,lat,lon,row,col,datetime,filename,pp_file,pp_root,field_size,pp_idx,pp_type,mean_temperature,Tmin,Tmax,T90,T10,DT
0,21.003527,117.989586,9005,7455,2011-09-27,/data/Projects/Oceanography/data/LLC/ThetaUVSa...,s3://llc/mae/PreProc/Enki_LLC_DINEOF_preproc.h5,llc_144,64,125,0,27.872849,27.015001,28.167002,28.053001,27.636002,0.417
1,21.003527,117.989586,9005,7455,2011-09-28,/data/Projects/Oceanography/data/LLC/ThetaUVSa...,s3://llc/mae/PreProc/Enki_LLC_DINEOF_preproc.h5,llc_144,64,144,0,27.727337,27.038,28.095001,27.900002,27.522001,0.378
2,21.003527,117.989586,9005,7455,2011-09-29,/data/Projects/Oceanography/data/LLC/ThetaUVSa...,s3://llc/mae/PreProc/Enki_LLC_DINEOF_preproc.h5,llc_144,64,2,0,27.608845,26.865002,27.933001,27.85,27.302,0.548
3,21.003527,117.989586,9005,7455,2011-09-30,/data/Projects/Oceanography/data/LLC/ThetaUVSa...,s3://llc/mae/PreProc/Enki_LLC_DINEOF_preproc.h5,llc_144,64,8,0,27.713804,27.044001,28.058001,27.963001,27.415001,0.548
4,21.003527,117.989586,9005,7455,2011-10-01,/data/Projects/Oceanography/data/LLC/ThetaUVSa...,s3://llc/mae/PreProc/Enki_LLC_DINEOF_preproc.h5,llc_144,64,134,0,27.738844,27.0,28.245001,27.998001,27.438002,0.559999


## Now the data file

In [36]:
preproc_path = os.path.join(os.getenv('OS_OGCM'), 'LLC', 'Enki', 'PreProc')
preproc_file = os.path.join(preproc_path, 'Enki_LLC_DINEOF_preproc.h5')
f = h5py.File(preproc_file, 'r')

In [19]:
f['valid'].shape

(180, 1, 64, 64)

# Load model

In [6]:
# Load model
model_path = os.path.join(os.getenv('OS_SST'), 'MODIS_L2', 'Ulmo')
print("Loading model in {}".format(model_path))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dcae = autoencoders.DCAE.from_file(os.path.join(model_path, 'autoencoder.pt'),
                                       image_shape=(1, 64, 64),
                                       latent_dim=512)

Loading model in /media/xavier/crucial/Oceanography/SST/MODIS_L2/Ulmo


## Flow

In [7]:
flow = ConditionalFlow(
    dim=512,
    context_dim=None,
    transform_type='autoregressive',
    n_layers=10,
    hidden_units=256,
    n_blocks=2,
    dropout=0.2,
    use_batch_norm=False,
    tails='linear',
    tail_bound=10,
    n_bins=5,
    min_bin_height=1e-3,
    min_bin_width=1e-3,
    min_derivative=1e-3,
    unconditional_transform=False,
    encoder=None)
flow.load_state_dict(torch.load(os.path.join(model_path, 'flow.pt'), map_location=device))
pae = ood.ProbabilisticAutoencoder(dcae, flow, 'tmp/', device=device, skip_mkdir=True)
print("Model loaded!")

Using device=cpu
Wrote model parameters to ./model.json
Model loaded!


# Prep

In [9]:
pae.autoencoder.eval()
pae.flow.eval()

ConditionalFlow(
  (flow): Flow(
    (_transform): CompositeTransform(
      (_transforms): ModuleList(
        (0-9): 10 x CompositeTransform(
          (_transforms): ModuleList(
            (0): RandomPermutation()
            (1): MaskedPiecewiseRationalQuadraticAutoregressiveTransform(
              (autoregressive_net): MADE(
                (initial_layer): MaskedLinear(in_features=512, out_features=256, bias=True)
                (blocks): ModuleList(
                  (0-1): 2 x MaskedResidualBlock(
                    (linear_layers): ModuleList(
                      (0-1): 2 x MaskedLinear(in_features=256, out_features=256, bias=True)
                    )
                    (dropout): Dropout(p=0.2, inplace=False)
                  )
                )
                (final_layer): MaskedLinear(in_features=256, out_features=7168, bias=True)
              )
            )
          )
        )
      )
    )
    (_distribution): StandardNormal()
    (_embedding_net): Identit

# Latents

In [37]:
pp_field = f['valid'][100, ...]

In [None]:
# Latent
pp_field.resize(1, 1, 64, 64)
dset = torch.utils.data.TensorDataset(torch.from_numpy(pp_field).float())
loader = torch.utils.data.DataLoader(
    dset, batch_size=1, shuffle=False,
    drop_last=False, num_workers=16)
with torch.no_grad():
    latents = [pae.autoencoder.encode(data[0].to(device)).detach().cpu().numpy()
               for data in tqdm(loader, total=len(loader), unit='batch', desc='Computing latents')]
print("Latents generated!")

Computing latents:   0%|          | 0/1 [00:00<?, ?batch/s]

In [None]:
# Scaler
scaler_path = glob.glob(os.path.join(model_path, '*scaler.pkl'))[0]
with open(scaler_path, 'rb') as f:
    scaler = pickle.load(f)
latents = scaler.transform(np.concatenate(latents))

# LL

In [None]:
# LL
dset = torch.utils.data.TensorDataset(torch.from_numpy(latents).float())
loader = torch.utils.data.DataLoader(
    dset, batch_size=1024, shuffle=False,
    drop_last=False, num_workers=16)

with torch.no_grad():
    log_prob = [pae.flow.log_prob(data[0].to(pae.device)).detach().cpu().numpy()
                    for data in tqdm(loader, total=len(loader), unit='batch', desc='Computing log probs')]
print("Log probabilities generated!")

print("The LL for the field is: {}".format(float(log_prob[0])))