# Flow latents

In [1]:
# imports
from importlib import reload
import os
os.environ['ENDPOINT_URL'] = 'https://s3.nautilus.optiputer.net'

import numpy as np

import h5py 
import torch
from tqdm.auto import tqdm

from ulmo import io as ulmo_io
from ulmo.models import io as models_io
from ulmo.utils import HDF5Dataset

Cartopy not installed.  Some plots will not work!


# Load model

In [2]:
pae = models_io.load_modis_l2()

Using device=cuda
Loading autoencoder model from: s3://modis-l2/Models/R2019_2010_128x128_std/autoencoder.pt
Loading flow model from: s3://modis-l2/Models/R2019_2010_128x128_std/flow.pt
scaler loaded from: s3://modis-l2/Models/R2019_2010_128x128_std/MODIS_R2019_2010_95clear_128x128_preproc_std_scaler.pkl




# Load latents

In [3]:
latent_file = '/home/jovyan/Oceanography/SST-OOD/Test/Evaluations/MODIS_R2019_2012_95clear_128x128_latents_std.h5'
#latent_h5.keys()
latent_h5 = h5py.File(latent_file, 'r')
latents = latent_h5['latents'][:]

In [4]:
latents.shape

(730634, 512)

## Load LL

In [5]:
LL_file = '/home/jovyan/Oceanography/SST-OOD/Test/Evaluations/MODIS_R2019_2012_95clear_128x128_log_prob_std.h5'
LL_h5 = h5py.File(LL_file, 'r')
#LL_h5.keys()
LL = LL_h5['valid'][:]

In [6]:
LL[0]

44.542786

# Apply ulmo

## Data Loader

In [7]:
dset_l = torch.utils.data.TensorDataset(torch.from_numpy(latents[0:1]).float())
loader = torch.utils.data.DataLoader(
    dset_l, batch_size=1024, shuffle=False, 
    drop_last=False, num_workers=16)

## Log prob

In [8]:
#with h5py.File(output_file, 'w') as f:
with torch.no_grad():
    log_prob = [pae.flow.log_prob(data[0].to(pae.device)).detach().cpu().numpy()
            for data in tqdm(loader, total=len(loader), unit='batch', desc='Computing log probs')]

Computing log probs:   0%|          | 0/1 [00:02<?, ?batch/s]

In [9]:
log_prob[0]

array([43.5448], dtype=float32)

In [10]:
data = [data for data in loader]

In [11]:
data[0][0].shape

torch.Size([1, 512])

In [12]:
embedded_contet = pae.flow.flow._embedding_net(None)

In [13]:
noise, logabsdet = pae.flow.flow._transform(data[0][0].to(pae.device))

In [14]:
noise.shape

torch.Size([1, 512])

In [15]:
logabsdet

tensor([744.8929], device='cuda:0', grad_fn=<AddBackward0>)

In [16]:
embedded_contet

In [17]:
pae.flow.flow._distribution.log_prob(noise)

tensor([-699.0179], device='cuda:0', grad_fn=<SubBackward0>)

In [23]:
noise[0][0:5]

tensor([ 0.4312, -0.9127,  1.0792,  0.8318,  0.5057], device='cuda:0',
       grad_fn=<SliceBackward>)

## Batch

In [25]:
dset_l = torch.utils.data.TensorDataset(torch.from_numpy(latents).float())
loader = torch.utils.data.DataLoader(
    dset_l, batch_size=1024, shuffle=False, 
    drop_last=False, num_workers=16)

In [26]:
with torch.no_grad():
    items = [pae.flow.flow._transform(data[0].to(pae.device))
            for data in tqdm(loader, total=len(loader), unit='batch', desc='Computing noise')]

Computing noise:   0%|          | 0/714 [00:01<?, ?batch/s]

In [27]:
items[0][0]

tensor([[ 0.3828, -0.8272,  1.0355,  ...,  0.6660, -0.0166,  1.1914],
        [ 0.4339, -0.6442,  0.2424,  ...,  1.2570, -1.2401,  0.6977],
        [-0.5705, -1.2546, -0.8328,  ...,  0.0827, -0.3387,  0.4422],
        ...,
        [ 0.6506, -1.6423,  3.5291,  ..., -1.9398, -0.1663, -0.7613],
        [-0.5718, -0.1999,  0.6853,  ...,  0.4397, -0.5380,  1.9431],
        [-1.2321, -0.1346,  0.6722,  ...,  0.6570, -1.4293,  0.7946]],
       device='cuda:0')

----

In [28]:
items[0][1]

tensor([ 746.6124, 1063.5111, 1119.2800,  ...,  295.2014,  603.0554,
         756.0487], device='cuda:0')

# Testing

In [7]:
pp_file = '/home/jovyan/Oceanography/SST-OOD/Test/PreProc/MODIS_R2019_2012_95clear_128x128_preproc_std.h5'

In [11]:
pae.eval_data_file(pp_file, 'valid', '/home/jovyan/Oceanography/SST-OOD/Test/Evaluations/tmp_log_prob.h5', csv=False)

Calculating latents..
Scaling..
Probabilities now


Computing log probs:   0%|          | 0/714 [00:02<?, ?batch/s]

Wrote latents to /home/jovyan/Oceanography/SST-OOD/Test/Evaluations/tmp_latents.h5
Log probabilities saved to /home/jovyan/Oceanography/SST-OOD/Test/Evaluations/tmp_log_prob.h5.


array([ 44.542786, 340.43127 , 390.7146  , ..., 108.63623 , 295.42798 ,
       334.82202 ], dtype=float32)

In [12]:
latent_file2 = '/home/jovyan/Oceanography/SST-OOD/Test/Evaluations/tmp_latents.h5'
#latent_h5.keys()
latent_h5_2 = h5py.File(latent_file2, 'r')
latents2 = latent_h5_2['latents'][:]

In [18]:
np.all(np.isclose(latents2[0], latents[0]))

True