In [20]:
import os
# -- Environmental variables -- #
os.environ['AI4ARCTIC_DATA'] = 'data'  # Fill in directory for data location.
os.environ['AI4ARCTIC_ENV'] = ''  # Fill in
!echo ${AI4ARCTIC_DATA}

866.26s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


data


In [21]:
# -- Built-in modules -- #
import gc
import os
import os.path
import sys
import mlflow

In [22]:
from pathlib import Path
sys.path.append(str(Path.home().joinpath("AI4ArcticSeaIceChallenge/")))

In [23]:
# -- Third-part modules -- #
import json
import matplotlib.pyplot as plt
import numpy as np
import torch
import xarray as xr
from tqdm.notebook import tqdm  # Progress bar

# --Proprietary modules -- #
from functions import chart_cbar, r2_metric, f1_metric, compute_metrics  # Functions to calculate metrics and show the relevant chart colorbar.
from loaders import AI4ArcticChallengeDataset, AI4ArcticChallengeTestDataset, get_variable_options  # Custom dataloaders for regular training and validation.
from unet import UNet  # Convolutional Neural Network model
from unet_transfomers import TransformerUNet
from utils import CHARTS, SIC_LOOKUP, SOD_LOOKUP, FLOE_LOOKUP, SCENE_VARIABLES, colour_str

train_options = {
    # -- Training options -- #
    'path_to_processed_data': os.environ['AI4ARCTIC_DATA'],
    'lr': 0.0001,  # Optimizer learning rate.
    'epochs': 3,  # Number of epochs before training stop.
    'epoch_len': 15,  # Number of batches for each epoch.
    'patch_size': 256,  # Size of patches sampled. Used for both Width and Height.
    'batch_size': 8,  # Number of patches for each batch.
    'loader_upsampling': 'nearest',  # How to upscale low resolution variables to high resolution.

    # -- Data prepraration lookups and metrics.
    'train_variables': SCENE_VARIABLES,  # Contains the relevant variables in the scenes.
    'charts': CHARTS,  # Charts to train on.
    'n_classes': {  # number of total classes in the reference charts, including the mask.
        'SIC': SIC_LOOKUP['n_classes'],
        'SOD': SOD_LOOKUP['n_classes'],
        'FLOE': FLOE_LOOKUP['n_classes']
    },
    'pixel_spacing': 80,  # SAR pixel spacing. 80 for the ready-to-train AI4Arctic Challenge dataset.
    'train_fill_value': 0,  # Mask value for SAR training data.
    'class_fill_values': {  # Mask value for class/reference data.
        'SIC': SIC_LOOKUP['mask'],
        'SOD': SOD_LOOKUP['mask'],
        'FLOE': FLOE_LOOKUP['mask'],
    },

    # -- Validation options -- #
    'chart_metric': {  # Metric functions for each ice parameter and the associated weight.
        'SIC': {
            'func': r2_metric,
            'weight': 2,
        },
        'SOD': {
            'func': f1_metric,
            'weight': 2,
        },
        'FLOE': {
            'func': f1_metric,
            'weight': 1,
        },
    },
    'num_val_scenes': 2,  # Number of scenes randomly sampled from train_list to use in validation.

    # -- GPU/cuda options -- #
    'gpu_id': 0,  # Index of GPU. In case of multiple GPUs.
    'num_workers': 10,  # Number of parallel processes to fetch data.
    'num_workers_val': 1,  # Number of parallel processes during validation.

    # -- U-Net Options -- #
    'unet_conv_filters': [24, 16, 32, 64],  # Number of filters in the U-Net.
    'conv_kernel_size': (3, 3),  # Size of convolutional kernels.
    'conv_stride_rate': (1, 1),  # Stride rate of convolutional kernels.
    'conv_dilation_rate': (1, 1),  # Dilation rate of convolutional kernels.
    'conv_padding': (1, 1),  # Number of padded pixels in convolutional layers.
    'conv_padding_style': 'zeros',  # Style of padding.

    # --Transformer Options -- #
    'is_residual': True,
    'num_heads': 2,
    'bias': False,
    'dtype': torch.float32
}
# Get options for variables, amsrenv grid, cropping and upsampling.
get_variable_options = get_variable_options(train_options)
# To be used in test_upload.
%store train_options

# Load training list.
with open('datalists/dataset_prep.json') as file:
    train_options['train_list'] = json.loads(file.read())
# Convert the original scene names to the preprocessed names.
train_options['train_list'] = [file[17:32] + '_' + file[77:80] + '_prep.nc' for file in train_options['train_list']]
# Select a random number of validation scenes with the same seed. Feel free to change the seed.et
np.random.seed(0)
train_options['validate_list'] = np.random.choice(np.array(train_options['train_list']), size=train_options['num_val_scenes'], replace=False)
# Remove the validation scenes from the train list.
train_options['train_list'] = [scene for scene in train_options['train_list'] if scene not in train_options['validate_list']]
print('Options initialised')


Stored 'train_options' (dict)
Options initialised


In [24]:
# Get GPU resources.
if torch.cuda.is_available():
    print(colour_str('GPU available!', 'green'))
    print('Total number of available devices: ', colour_str(torch.cuda.device_count(), 'orange'))
    device = torch.device(f"cuda:{train_options['gpu_id']}")

else:
    print(colour_str('GPU not available.', 'red'))
    device = torch.device('cpu')

train_options['device'] = device


[0;32mGPU available![0m
Total number of available devices:  [0;33m1[0m


In [25]:

# Custom dataset and dataloader.
dataset = AI4ArcticChallengeDataset(files=train_options['train_list'], options=train_options)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=None, shuffle=True, num_workers=train_options['num_workers'], pin_memory=True)


In [42]:

len(train_options['full_variables'])+len(train_options['amsrenv_variables'])
# scene['SIC'].values.shape[0]
# scene['SIC'].values.shape[1]
x_dim = scene['SIC'].values.shape[0]
y_dim = scene['SIC'].values.shape[1]
# Split the scene into patches
row_col_indices = np.array(np.meshgrid(np.arange(0, x_dim - train_options['patch_size'], train_options['patch_size']),
                                            np.arange(0, y_dim - train_options['patch_size'], train_options['patch_size']))).T.reshape(-1, 2)

In [50]:
from loaders import AI4ArcticChallengeDatasetNoRandomCrop
dataset_crop = AI4ArcticChallengeDatasetNoRandomCrop(files=train_options['train_list'], options=train_options)


ImportError: cannot import name 'AI4ArcticChallengeDatasetNoRandomCrop' from 'loaders' (/home/leonie/anani/AI4ArcticSeaIceChallenge/loaders.py)

In [32]:
files = train_options['train_list']
scene_id = np.random.randint(low=0, high=len(files), size=1).item()
# print(scene_id) # hard to 2
scene = xr.open_dataset(os.path.join(train_options['path_to_processed_data'], files[2]))

def scene_to_patches(scene, train_options):
    patch = np.zeros((len(train_options['full_variables']) + len(train_options['amsrenv_variables']),
                          train_options['patch_size'], train_options['patch_size']))
    

# Convert scene in patches
patches = scene_to_patches(scene, train_options['patch_size'], train_options['patch_size'], train_options['train_variables'], train_options['amsrenv_variables']

4999

In [34]:


# - Setup of the validation dataset/dataloader. The same is used for model testing in 'test_upload.ipynb'.
dataset_val = AI4ArcticChallengeTestDataset(options=train_options, files=train_options['validate_list'])
dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=None, num_workers=train_options['num_workers_val'], shuffle=False)


In [39]:
dataset_val.__getitem__(0).shape()

AttributeError: 'tuple' object has no attribute 'shape'

In [None]:
print(time)