In [12]:
import cbh_torch_lstm # defined in directory (model definition)
import cbh_data_definitions
import zarr
import pathlib
import os
import numpy as np
import torch

## Define the evaluation data

In [4]:
root_data_directory = pathlib.Path(os.environ['SCRATCH']) / 'cbh_data'
test_data_path = root_data_directory / 'analysis_ready' / 'dev.zarr' # must change with available test data

In [5]:
test_input, test_labels, test_cloud_volume = cbh_data_definitions.load_data_from_zarr(test_data_path)

Loaded zarr, file information:
 Name        : /
Type        : zarr.hierarchy.Group
Read-only   : False
Store type  : zarr.storage.DirectoryStore
No. members : 3
No. arrays  : 3
No. groups  : 0
Arrays      : cloud_volume_fraction_y.zarr, humidity_temp_pressure_x.zarr,
            : onehot_cloud_base_height_y.zarr
 



In [6]:
collate_fn = cbh_data_definitions.dataloader_collate_with_dask
batch_size = 1000
workers_on_system = 0
test_dataloader = cbh_data_definitions.define_data_get_loader(test_input,
                                                              test_cloud_volume,
                                                              test_labels, 
                                                              batch_size=batch_size, 
                                                              shuffle=False, 
                                                              num_workers = workers_on_system, 
                                                              collate_fn=collate_fn)

## Define the network

In [14]:
checkpoint = torch.load('final_out_lab.ckpt')




dict_keys(['epoch', 'global_step', 'pytorch-lightning_version', 'state_dict', 'loops', 'callbacks', 'optimizer_states', 'lr_schedulers'])


In [18]:
layers = 3
input_size = test_input.shape[2] # input size is the cell input (feat dim)
output_size = 1 # for each height layer, predict one value for cloud base prob
hidden_size = 32
embed_size = 3
BILSTM = True
batch_first = True
learn_rate = 0.001
height_dim = test_input.shape[1]

model = cbh_torch_lstm.CloudBaseLSTM(input_size, layers, hidden_size, output_size, height_dim, embed_size)
model.state_dict = checkpoint['state_dict']

# model = cbh_torch_lstm.CloudBaseLSTM.load_from_checkpoint('final_out_lab.ckpt', )

## Get model predictions for Eval

In [20]:
all_preds = np.empty((0,70))
all_targs = np.empty((0,70))
for i, sample_batch in enumerate(test_dataloader):
    if i > 0: break # temp get only the first batch
    all_targs = np.concatenate((all_targs, sample_batch['cloud_base_target']), axis=0)
    height = sample_batch['height_vector']
    x = sample_batch['x']
    batch_preds = model(x, height) # self call = forward
    all_preds = np.concatenate((all_preds,batch_preds[0].detach().numpy()), axis=0) # get second value only (since model doesn't support the other return per how it is currently defined)

print(all_targs.shape)
print(all_preds.shape)

(1000, 70)
(1000, 70)


In [21]:
# view my model's gross abuse of generalized data statistics
print(all_preds[7])
print(all_targs[7])

[ 0.02812776 -0.07781146  0.0626948  -0.08563723 -0.11669876 -0.10598429
  0.03966087 -0.09225918 -0.04651425  0.1156247   0.11421951  0.07058091
 -0.0844821   0.08742284 -0.10865832 -0.09843732  0.06928282  0.0083699
  0.01322776 -0.02690658 -0.0898084  -0.08008031  0.04124816  0.06330191
 -0.09607642  0.00219133  0.08416506  0.02324097  0.06305565  0.05053411
  0.11220004  0.10415665  0.03736757  0.10131148 -0.06242672  0.10829218
 -0.10935006  0.04064533  0.0276012   0.06404807 -0.05122092  0.06662233
 -0.05878032  0.0359249  -0.05011643 -0.08382751 -0.10241412  0.07698797
 -0.08534922 -0.07265964  0.03688104 -0.08927614  0.02857562 -0.03174012
 -0.00939858 -0.03466484  0.09406193 -0.09890064  0.01117128  0.09528607
 -0.0998457  -0.06829891  0.03459731  0.01586575 -0.05767401 -0.03828694
 -0.06396797 -0.05973816 -0.12091554  0.10869594]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0

### convert model outs to the task (labels)

In [23]:
def calc_cloudbase_return_both_class_lab_and_onehot(cloud_vol_array):
    cloud_threshold = 2./8.
    cloud_over_threshold = np.where(cloud_vol_array>cloud_threshold)
    sample_with_cloud = cloud_over_threshold[0]
    index_on_sample = cloud_over_threshold[1]
    _, first_duplicate_indicies = np.unique(sample_with_cloud, return_index=True)
    # encode the cloud in onehot vector
    one_hot_encoded_bases = np.zeros(cloud_vol_array.shape)
    one_hot_encoded_bases[sample_with_cloud[first_duplicate_indicies],index_on_sample[first_duplicate_indicies]] = 1
    # mark the end (final layer) if no cloud base detected
    flip = lambda booleanVal: not booleanVal
    vflip = np.vectorize(flip)
    one_hot_encoded_bases[np.where(vflip(np.any(one_hot_encoded_bases, axis=1)))[0], -1] = 1
    # Now reduce vectors as if each height layer is treated as a class where the model will predict, onehot -> class label e.g. 0,0,1,0, -> 2
    class_label_encoded_bases = np.argmax(one_hot_encoded_bases, axis=1)
    
    return one_hot_encoded_bases, class_label_encoded_bases

def calc_lab(tar_lab, pred_lab):
    return np.argmax(tar_lab, axis=1), np.argmax(pred_lab, axis=1)
    
# tar_onehot, tar_lab = calc_cloudbase_return_both_class_lab_and_onehot(all_targs)
# pred_onehot, pred_lab = calc_cloudbase_return_both_class_lab_and_onehot(all_preds)

tar_lab, pred_lab = calc_lab(all_targs, all_preds)

# print(tar_onehot[0])
print(all_targs[0])
print(tar_lab[0])
print(pred_lab
print(tar_lab.shape)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
16
(1000,)


## Evaluate model returns

In [13]:
def old_cbh_eval(preds, targs):
    # for all tested height layers
    abcd_list = []
    for k in np.arange(0,53,1):
            a=0.0
            b=0.0
            c=0.0
            d=0.0
            for i in np.arange(0,targs.shape[1]):
                # Is the cloud-base at this level or below.
                if   targs[0,i]<=np.float64(k) and preds[0,i] <=np.float64(k):
                    # Hit
                    a=a+1.0
                elif targs[0,i]> np.float64(k) and preds[0,i] <=np.float64(k):
                    # False alarm
                    b=b+1.0
                elif targs[0,i]<=np.float64(k) and preds[0,i] > np.float64(k):
                    # Miss
                    c=c+1.0
                else:
                    # Correct negative
                    d=d+1.0
            # if a == 0.0 or b == 0.0:
            #     a += 1 
            #     b += 1
            abcd_list.append((a,b,c,d))
    return abcd_list

import typing
# new function
def new_cbh_eval(preds: typing.Iterable, targs: typing.Iterable, test_up_to_layer=53) -> (list[int], list[int]):
    abcd_list = []
    preds = preds[:test_up_to_layer]
    targs = targs[:test_up_to_layer]
    print(np.where(preds<=targs))
    # a =
    
    return
    return abcd_list

In [14]:
new_cbh_eval(pred_lab, tar_lab)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52]),)


In [11]:
rearange_for_old_eval_target = np.expand_dims(tar_lab, 0)
rearange_for_old_eval_pred = np.expand_dims(pred_lab, 0)
abcd_old = old_cbh_eval(rearange_for_old_eval_pred, rearange_for_old_eval_target)
assert all([x in [1000,1002] for x in list(map(sum, abcd_old))])

(1, 1000)
(1, 1000)


In [12]:
print(abcd_old)

[(54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (54.0, 946.0, 0.0, 0.0), (77.0, 923.0, 0.0, 0.0), (852.0, 148.0, 0.0, 0.0), (860.0, 140.0, 0.0, 0.0), (860.0, 140.0, 0.0, 0.0), (860.0, 140.0, 0.0, 0.0), (860.0, 140.0, 0.0, 0.0), (860.0, 140.0, 0.0, 0.0), (860.0, 140.0, 0.0, 0.0), (860.0, 140.0, 0.0, 0.0), (860.0, 140.0, 0.0, 0.0), (860.0, 140.0, 0.0, 0.0), (860.0, 140.0, 0.0, 0.0), (860.0, 140.0, 0.0, 0.0), (860.0, 140.0, 0.0, 0.0), (966.0, 34.0, 0.0, 0.0), (1000.0, 0.0, 0.0, 0.0), (1000.0, 0.0, 0.0, 0.0), (1000.0, 0.0, 0.0, 0.0), (1000.0, 0.0, 0.0, 0.0), (1000.0, 0.0, 0.0, 0.0), (1000.0, 0.0, 0.0, 0.0), (1000.0, 0.0, 0.0, 0.0), (1000.0, 0.0, 0.0, 0.0), (1000.0, 0.0, 0.0, 0.0), (1000.0, 0.

In [None]:
print(np.unique(tar_lab, return_counts=True))
print(np.unique(pred_lab, return_counts=True))

In [81]:
# check accuracy for sanity
correct_preds = tar_lab==pred_lab
correct = np.sum(correct_preds)
print(correct, 'correct')
acc = 100 * (correct / len(tar_lab))
print(str(acc) + '%')

54 correct
5.4%


## Present Evaluation