# AutoICE - test model and prepare upload package
This notebook tests the 'best_model', created in the quickstart notebook, with the tests scenes exempt of reference data. The model outputs are stored per scene and chart in an xarray Dataset in individual Dataarrays. The xarray Dataset is saved and compressed in an .nc file ready to be uploaded to the AI4EO.eu platform. Finally, the scene chart inference is shown.

The first cell imports necessary packages:

In [1]:
# -- Built-in modules -- #
import os
import gc
import sys

# -- Third-part modules -- #
import json
import matplotlib.pyplot as plt
import numpy as np
import torch
import xarray as xr
from tqdm.notebook import tqdm
import psutil
import tracemalloc

# --Proprietary modules -- #
from functions import chart_cbar, r2_metric, f1_metric, compute_metrics
from loaders import AI4ArcticChallengeTestDataset
from unet import UNet
from utils import CHARTS, SIC_LOOKUP, SOD_LOOKUP, FLOE_LOOKUP, SCENE_VARIABLES, colour_str
from unet import FeatureMap
%store -r train_options

### Setup of the GPU resources

In [2]:
# Get GPU resources.
if torch.cuda.is_available():
    print(colour_str('GPU available!', 'green'))
    print('Total number of available devices: ', colour_str(torch.cuda.device_count(), 'orange'))
    device = torch.device(f"cuda:{train_options['gpu_id']}")

else:
    print(colour_str('GPU not available.', 'red'))
    device = torch.device('cpu')

[0;31mGPU not available.[0m


### Load the model and stored parameters

In [3]:
print('Loading model.')
# Setup U-Net model, adam optimizer, loss function and dataloader.
net = UNet(options=train_options).to(device)

model_path = 'models/PizzaQuattroFormaggi_34_best_model'
if train_options['loss_sic'] == 'classification':
    net.load_state_dict(torch.load(model_path)['model_state_dict'])
elif train_options['loss_sic'] == 'regression':
    net.sic_feature_map = unet.FeatureMap(input_n=train_options['unet_conv_filters'][0],
                                                  output_n=1,)
    net.load_state_dict(torch.load(model_path)['model_state_dict'])

print('Model successfully loaded.')


Loading model.
Model successfully loaded.


### Prepare the scene list, dataset and dataloaders

In [4]:
with open(train_options['path_to_env'] + 'datalists/testset.json') as file:
    train_options['test_list'] = json.loads(file.read())
train_options['test_list'] = [file[17:32] + '_' + file[77:80] + '_prep.nc' for file in train_options['test_list']]
train_options['path_to_processed_data'] += 'test_data'  # The test data is stored in a separate folder inside the training data.
upload_package = xr.Dataset()  # To store model outputs.
dataset = AI4ArcticChallengeTestDataset(options=train_options, files=train_options['test_list'], test=True)
asid_loader = torch.utils.data.DataLoader(dataset, batch_size=None, num_workers=train_options['num_workers_val'], shuffle=False)
print('Setup ready')

Setup ready


In [5]:
def format_bytes(size):
    # 2**10 = 1024
    power = 2**10
    n = 0
    power_labels = {0 : '', 1: 'kilo', 2: 'mega', 3: 'giga', 4: 'tera'}
    while size > power:
        size /= power
        n += 1
    return size, power_labels[n]+'bytes'

In [6]:
print('Testing.')
os.makedirs('inference', exist_ok=True)
net.eval()

process = psutil.Process(os.getpid())
tracemalloc_snapshots = []
tracemalloc.start()
plotting = False

for inf_x, _, masks, scene_name in tqdm(iterable=asid_loader, total=len(train_options['test_list']), colour='green', position=0):
    scene_name = scene_name[:19]  # Removes the _prep.nc from the name.
    print(f"scene name: {scene_name}")
#    if device.type  == 'cpu':
#        n_bytes, form = format_bytes(process.memory_info().rss)
#        n_bytes_pack, form_pack = format_bytes(upload_package.nbytes)
#        print(f"Beginning {n_bytes:.2f} {form} with {n_bytes_pack:.2f} {form_pack} being the upload package")
    
    if device.type == 'cuda':
        inf_x = inf_x.to(device, non_blocking=True)

    with torch.no_grad(), torch.cuda.amp.autocast():
        output = net(inf_x)
    
#    if device.type  == 'cpu':
#        n_bytes, form = format_bytes(process.memory_info().rss)
#        print(f"After Prediction {n_bytes:.2f} {form}") 

    for chart in train_options['charts']:
        if chart == 'SIC' and train_options['loss_sic'] == 'regression':
            output[chart] = output[chart].squeeze().cpu().numpy()
        else:
            output[chart] = torch.argmax(output[chart], dim=1).squeeze().cpu().numpy()
        upload_package[f"{scene_name}_{chart}"] = xr.DataArray(name=f"{scene_name}_{chart}", data=output[chart].astype('uint8'),
                                                               dims=(f"{scene_name}_{chart}_dim0", f"{scene_name}_{chart}_dim1"))
        del chart

    if plotting == True:
        # - Show the scene inference.
        fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(10, 10))
        for idx, chart in enumerate(train_options['charts']):
            ax = axs[idx]
            output[chart] = output[chart].astype(float)
            output[chart][masks] = np.nan
            ax.imshow(output[chart], vmin=0, vmax=train_options['n_classes'][chart] - 2, cmap='jet', interpolation='nearest')
            ax.set_xticks([])
            ax.set_yticks([])
            chart_cbar(ax=ax, n_classes=train_options['n_classes'][chart], chart=chart, cmap='jet')
            del chart, idx

        plt.suptitle(f"Scene: {scene_name}", y=0.65)
        plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0.5, hspace=-0)
        fig.savefig(f"inference/{scene_name}.png", format='png', dpi=128, bbox_inches="tight")
        plt.close('all')

        plt.cla()
        plt.clf()
    
    del inf_x, masks, output #, fig, ax, axs  # Free memory.
    gc.collect()
    torch.cuda.empty_cache()
    
#    if device.type  == 'cpu':
#        n_bytes, form = format_bytes(process.memory_info().rss)
#        n_bytes_pack, form_pack = format_bytes(upload_package.nbytes)
#        print(f"End {n_bytes:.2f} {form} with {n_bytes_pack:.2f} {form_pack} being the upload package\n")
        
#    # memory leak analysis
#    snapshot = tracemalloc.take_snapshot()
#    tracemalloc_snapshots.append(snapshot)
#    if len(tracemalloc_snapshots) > 2:
#        top_stats = tracemalloc_snapshots[-1].compare_to(tracemalloc_snapshots[-2],'traceback')
#        print("[ Top 5 differences ]")
#        for stat in top_stats[:10]:
#            print(stat)
#        print("\n\n")
        
#    if device == 'cuda':
#        torch.cuda.memory_summary(device=device, abbreviated=False)


# - Save upload_package with zlib compression.
print('Saving upload_package. Compressing data with zlib.')
compression = dict(zlib=True, complevel=1)
encoding = {var: compression for var in upload_package.data_vars}
upload_package.to_netcdf('upload_package.nc', mode='w', format='netcdf4', engine='netcdf4', encoding=encoding)
print('Testing completed.')

Testing.


  0%|          | 0/20 [00:00<?, ?it/s]

scene name: 20180124T194759_dmi




scene name: 20210430T205436_dmi
scene name: 20210506T075557_dmi
scene name: 20201013T080448_dmi
scene name: 20210328T202742_dmi
scene name: 20210410T201933_dmi
scene name: 20190810T110422_dmi
scene name: 20211212T211242_dmi
scene name: 20180623T114935_cis
scene name: 20180707T113313_cis
scene name: 20200719T123046_cis
scene name: 20190406T102029_cis
scene name: 20180903T123331_cis
scene name: 20180716T110418_cis
scene name: 20191011T131651_cis
scene name: 20200217T102731_cis
scene name: 20210512T214149_cis
scene name: 20200701T114012_cis
scene name: 20200319T101935_cis
scene name: 20180917T121813_cis
Saving upload_package. Compressing data with zlib.
Testing completed.


In [7]:
print(f"problematic scene: 20200701T114012_cis, 20200319T101935_cis")

problematic scene: 20200701T114012_cis, 20200319T101935_cis
