# Nowcasting

In [None]:
# Import
import h5py
import numpy as np
import os

import tensorflow as tf
import keras
from keras.callbacks import ModelCheckpoint
from keras.utils import Sequence

In [None]:
# Global Variables
DATA_DIR = os.path.abspath('../Data')
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
RESULT_DIR = os.path.abspath('../Results')
TRAIN_FILES_LIST = [os.path.join(TRAIN_DIR, i) for i in os.listdir(TRAIN_DIR)]
TEST_FILE = os.path.join(DATA_DIR, '2022-test-public.hdf5')

## Data Generator

In [None]:
class RadarDataset(Sequence):
    def __init__(self, list_of_files, in_seq_len=4, out_seq_len=12, batch_size = 4, mode='overlap', with_time=False):
        self.in_seq_len = in_seq_len
        self.out_seq_len = out_seq_len
        self.seq_len = in_seq_len + out_seq_len
        self.with_time = with_time
        self.batch_size = batch_size
        self.__prepare_timestamps_mapping(list_of_files)
        self.__prepare_sequences(mode)

    def __len__(self):
        return int(np.ceil(len(self.sequences) / self.batch_size))

    def __getitem__(self, index):
        intensity = np.empty((self.batch_size, self.seq_len, 256, 256, 1))
        radial_velocity = np.empty((self.batch_size, self.seq_len, 256, 256, 10))

        for seq_id, seq in enumerate(self.sequences[index * self.batch_size: (index + 1) * self.batch_size]):
            for timestamp_id, timestamp in enumerate(seq):
                with h5py.File(self.timestamp_to_file[timestamp]) as d:
                    intensity[seq_id][timestamp_id] = np.pad(np.expand_dims(d[timestamp]["intensity"][:], axis=-1), ((2, 2), (2, 2), (0, 0)), 'constant', constant_values=[-2e6])
                    radial_velocity[seq_id][timestamp_id] = np.pad(d[timestamp]["radial_velocity"][:].reshape((252, 252, 10)), ((2, 2), (2, 2), (0, 0)), 'constant', constant_values=[-2e6])

        intensity[intensity == -1e6] = -1

        targets = np.copy(intensity[:, self.in_seq_len:])

        intensity[intensity == -2e6] = 0

        radial_velocity[radial_velocity == -1e6] = 0
        radial_velocity[radial_velocity == -2e6] = 0

        intensity = intensity / 30
        intensity[intensity < 0] = -1
        
        radial_velocity = (radial_velocity + 35) / 35 - 1

        d = np.append(intensity, radial_velocity, axis=-1)
        inputs = d[:, :self.in_seq_len]

        if self.with_time:
            return (inputs, self.sequences[index][-1]), targets
        else:
            return inputs, targets

    def __prepare_timestamps_mapping(self, list_of_files):
        self.timestamp_to_file = {}
        for filename in list_of_files:
            with h5py.File(filename) as d:
                self.timestamp_to_file = {
                    **self.timestamp_to_file,
                    **dict(map(lambda x: (x, filename), d.keys()))
                }

    def __prepare_sequences(self, mode):
        timestamps = np.unique(sorted(self.timestamp_to_file.keys()))
        if mode == 'sequentially':
            self.sequences = [
                timestamps[index * self.seq_len: (index + 1) * self.seq_len]
                for index in range(len(timestamps) // self.seq_len)
            ]
        elif mode == 'overlap':
            self.sequences = [
                timestamps[index: index + self.seq_len]
                for index in range(len(timestamps) - self.seq_len + 1)
            ]
        else:
            raise Exception(f'Unknown mode {mode}')
        self.sequences = list(filter(
            lambda x: int(x[-1]) - int(x[0]) == (self.seq_len - 1) * 600,
            self.sequences
        ))

## Training

In [None]:
# Making Metric Function
def make_RMSE_metric():
    def RMSE_metric(y_true, y_pred):
        a = tf.boolean_mask(y_true, y_true != -2e6)
        b = tf.boolean_mask(y_pred, y_true != -2e6)
        return tf.sqrt(tf.reduce_mean(tf.square(a - b)))

    return RMSE_metric

RMSE = make_RMSE_metric()

# Data Generator
train_gen = RadarDataset([TRAIN_FILES_LIST[0]], in_seq_len=4, out_seq_len=12, batch_size=1)
val_gen = RadarDataset([TEST_FILE], in_seq_len=4, out_seq_len=12, batch_size=1)

# Model Callbacks
model_checkpoint = ModelCheckpoint(
    save_best_only=True,
    save_weights_only=True,
    monitor='RMSE_metric',
    mode='min',
    filepath=os.path.join(RESULT_DIR, 'saved_weights/{epoch}-weights.hdf5')
)

# Model
from model import make_unet_conv_LSTM_v2
conv_LSTM = make_unet_conv_LSTM_v2((4, 256, 256, 11), 32, (3, 3), 12)

conv_LSTM.compile(loss=RMSE, metrics=[RMSE], optimizer='adam')

# Training Loop
conv_LSTM.fit(train_gen, validation_data=val_gen, batch_size=1, validation_batch_size=1, epochs=1, callbacks=[model_checkpoint])