In [171]:
import tensorflow as tf
import numpy as np
import os
from wradlib.io import read_opera_hdf5
import xarray as xr
import pandas as pd


## Uncomment the next lines if pyproj is needed for the importer.
try:
    import pyproj

    PYPROJ_IMPORTED = True
except ImportError:
    PYPROJ_IMPORTED = False



"""
Generates and preprocessing input that will be used for prediction on the model


When run, this file creates two sets of inputs. One set of .nf files 
which can be used in plotting.ipynb for visualization with cartopy, 
and a set of .npy files which can be used in 
pred_on_colab.ipynb to generate the predictions.
"""





def r_to_dbz(r):
    '''
    Convert mm/h to dbz
    '''
    # Convert to dBZ
    return 10 * tf_log10(200*r**(8/5)+1)

def dbz_to_r(dbz):
    '''
    Convert dbz to mm/h
    '''
    r = ((10**(dbz/10)-1)/200)**(5/8)
    return r

def tf_log10(x):
    numerator = tf.math.log(x)
    denominator = tf.math.log(tf.constant(10, dtype=numerator.dtype))
    return numerator / denominator

def minmax(x, norm_method='minmax', convert_to_dbz = True, undo = False):
    '''
    Performs minmax scaling to scale the images to range of 0 to 1.
    norm_method: 'minmax' or 'minmax_tanh'. If tanh is used than scale to -1 to 1 as tanh
                is used for activation function generator, else scale values to be between 0 and 1
    '''
    assert norm_method == 'minmax' or norm_method == 'minmax_tanh'
    
    # define max intensity as 100mm
    MIN = 0
    MAX = 128
    
    if not undo:
        if convert_to_dbz:
            MAX = 55
            x = r_to_dbz(x)
        # Set values over 128mm/h to 128mm/h
        x = tf.clip_by_value(x, MIN, MAX)
        if norm_method == 'minmax_tanh':
            x = (x - MIN - MAX/2)/(MAX/2 - MIN) 
        else:
            x = (x - MIN)/(MAX- MIN)
    else:
        if convert_to_dbz:
            MAX = 55
        if norm_method == 'minmax_tanh':
            x = x*(MAX/2 - MIN) + MIN + MAX/2
        else:
            x = x*(MAX - MIN) + MIN           
    return x

def pad_along_axis( x, pad_size = 3, axis = 2):

  '''
    Pad input to be divisible by 2.
    height of 765 to 768
        '''
  if pad_size <= 0:
    return x

  npad = [(0, 0)] * x.ndim
  npad[axis] = (0, pad_size)

  return tf.pad(x, paddings=npad, constant_values=0)






def get_data_as_xarray(data_folder,start_time_frame=None):
    '''Code by Simon De Kock <simon.de.kock@vub.be>
    Generate the input to be trained by the model based in the file folder
   
    
    Parameters
    ----------
    data_folder : String
        contains path to the hdf files.
    start_time_frame: string
         contains the time of the frame from which you wish to start
         e.g(202311062000500)
         that is '%Year%month%day%Hour%Minute%Second'

    
    Returns
    -------
    X_array : xr.Data_array
    '''
    fns = None
    # A slice of the files was selected to produce nowcasts with DGMR and LDCast
    # Such that those nowcast start as close as possible to the startime of the PySTEPS and INCA nowcasts
    fne=None
    for i, filename in enumerate(sorted(os.listdir(data_folder))):
     
      time_str = os.path.splitext(os.path.basename(filename))[0].split('.', 1)[0]
        
      if filename.endswith('.hdf') and start_time_frame==time_str:
          fns=[f"{data_folder}/{file_name}" for file_name in sorted(os.listdir(data_folder))]
      elif start_time_frame==None   and filename.endswith('.hdf'):
        fns=[f"{data_folder}/{file_name}" for file_name in sorted(os.listdir(data_folder))]
    
    dataset = []
    for i, file_name in enumerate(fns):
        # Read the content
        file_content = read_opera_hdf5(file_name)

        # Extract time information
        time_str = os.path.splitext(os.path.basename(file_name))[0].split('.', 1)[0]
        time = pd.to_datetime(time_str, format='%Y%m%d%H%M%S')

        # Extract quantity information
        try:
            quantity = file_content['dataset1/data1/what']['quantity'].decode()
        except:
            quantity = file_content['dataset1/data1/what']['quantity']

        # Set variable properties based on quantity
        if quantity == 'RATE':
            short_name = 'precip_intensity'
            long_name = 'instantaneous precipitation rate'
            units = 'mm h-1'
        else:
            raise Exception(f"Quantity {quantity} not yet implemented.")

        # Create the grid
        projection = file_content.get("where", {}).get("projdef", "")
        if type(projection) is not str:
            projection = projection.decode("UTF-8")

        gridspec = file_content.get("dataset1/where", {})

        x = np.linspace(gridspec.get('UL_x', 0),
                        gridspec.get('UL_x', 0) + gridspec.get('xsize', 0) * gridspec.get('xscale', 0),
                        num=gridspec.get('xsize', 0), endpoint=False)
        x += gridspec.get('xscale', 0)
        y = np.linspace(gridspec.get('UL_y', 0),
                        gridspec.get('UL_y', 0) - gridspec.get('ysize', 0) * gridspec.get('yscale', 0),
                        num=gridspec.get('ysize', 0), endpoint=False)
        y -= gridspec.get('yscale', 0) / 2

        x_2d, y_2d = np.meshgrid(x, y)

        pr = pyproj.Proj(projection)
        
        lon, lat = pr(x_2d.flatten(), y_2d.flatten(), inverse=True)
        lon = lon.reshape(gridspec.get('ysize', 0), gridspec.get('xsize', 0))
        lat = lat.reshape(gridspec.get('ysize', 0), gridspec.get('xsize', 0))
        
        # Build the xarray dataset
        ds = xr.Dataset(
            data_vars={
                short_name: (['x', 'y'], file_content.get("dataset1/data1/data", np.nan),
                            {'long_name': long_name, 'units': units})
            },
            coords={
                'x': (['x'], x, {'axis': 'X', 'standard_name': 'projection_x_coordinate',
                                'long_name': 'x-coordinate in Cartesian system', 'units': 'm'}),
                'y': (['y'], y, {'axis': 'Y', 'standard_name': 'projection_y_coordinate',
                                'long_name': 'y-coordinate in Cartesian system', 'units': 'm'}),
                'lon': (['y', 'x'], lon, {'standard_name': 'longitude', 'long_name': 'longitude coordinate',
                                        'units': 'degrees_east'}),
                'lat': (['y', 'x'], lat, {'standard_name': 'latitude', 'long_name': 'latitude coordinate',
                                        'units': 'degrees_north'})
            }
        )
        ds['time'] = time

        # Append the dataset to the list
        dataset.append(ds)
        
    # Concatenate datasets along the time dimension
    dataset = xr.concat(dataset, dim='time')
    final_dataset=dataset.sortby(dataset.time)
    return final_dataset




# def get_input_array(field,downscale256=True) -> np.ndarray:
#     '''
#     Parameters
#     ----------
#     field : xr.DataArray

    

    
#     Returns
#     -------
#     tensor : np.ndarray
    
#     - Crop xarray data to required dimensions (700x700 to 256x256)
#     - Reshape it to:
#         [B, T, C, H, W] - Batch, Time, Channel, Heigh, Width
#     args:
#         - field: xarray.DataArray
#             The precipitation data variable from the xarray
#     '''
#     arrays= [np.array(path) for path in field['precip_intensity']]

   
#     preprocessed_data=[]

#     for array in arrays:
#       array[np.isnan(array)]=0
#       mask = np.where(arrays[0] == 65535, 1, 0)
#       array[array == mask] = 0
#       array = (array / 100) * 12
#       x = minmax(array, norm_method='minmax', convert_to_dbz = False, undo = False)
#       x = np.expand_dims(x, axis=-1)
#       if downscale256:
#         # First make the images square size
#         x = pad_along_axis(x, axis=0, pad_size=3)
#         x = pad_along_axis(x, axis=1, pad_size=68)
#         x =  tf.image.resize(x, (256, 256))
#         preprocessed_data.append(x)
#     array=np.stack(preprocessed_data)
    
#     return array

def prep(field):
    '''
    - Crop xarray data to required dimensions (700x700 to 256x256)
    - Reshape it to:
        [B, T, C, H, W] - Batch, Time, Channel, Heigh, Width
    - Turn it into a torch.tensor
    args:
        - field: xarray.DataArray
            The precipitation data variable from the xarray
    '''
    # Crop the center of the field and get a 256x256 image
    # Intervals of +/- 256/2 around the center (which is 700/2)
    field=field['precip_intensity'].to_numpy()
    low = (700//2) - (256//2)
    high = (700//2) + (256//2)
    cropped = field[:, low:high, low:high]
    cropped=tf.reshape(cropped, [cropped.shape[0], 256, 256, 1])
    
    return cropped

















In [172]:
# -*- coding: utf-8 -*-
"""
This is a deep learning model for performing nowcasting on radar images.
"""

# Import the needed libraries
import os

import numpy as np
import tensorflow as tf
import tensorflow_hub as hub


### Uncomment the next lines if pyproj is needed for the importer.
# try:
#     import pyproj
#
#     PYPROJ_IMPORTED = True
# except ImportError:
#     PYPROJ_IMPORTED = False





def load_model(input_height,input_width):
    """
    Load the DGMR pre-trained model from a google storage (with tensorflow_hub).
    """
    print("--> Loading model...")
    TFHUB_BASE_PATH=r"C:\Users\user\Desktop\Intersnhip documents\tfhub_snapshots"
    hub_module = hub.load(
      os.path.join(TFHUB_BASE_PATH, f"{input_height}x{input_width}"))
  # Note this has loaded a legacy TF1 model for running under TF2 eager mode.
  # This means we need to access the module via the "signatures" attribute. See
  # https://github.com/tensorflow/hub/blob/master/docs/migration_tf2.md#using-lower-level-apis
  # for more information.
    return hub_module.signatures['default']



  
 

   


def predict(module, input_frames, num_samples=1,
            include_input_frames_in_result=False,**kwargs):
  """
    
    Load the DGMR pre-trained model from a google storage (with tensorflow_hub).
    

    Parameters
    ----------
    size : tupple
        contains the height and the width of the model.
    path: string
        Contains the path where the model is saved

    
    Returns
    -------
    A tensor of shape (num_samples,T_out,H,W,C), where T_out is either 18 or 22
    as described above.
  
  Make predictions from a TF-Hub snapshot of the 'Generative Method' model.

  Args:
    module: One of the raw TF-Hub modules returned by load_module above.
    input_frames: Shape (T_in,H,W,C), where T_in = 4. Input frames to condition
      the predictions on.
    num_samples: The number of different samples to draw.
    include_input_frames_in_result: If True, will return a total of 22 frames
      along the time axis, the 4 input frames followed by 18 predicted frames.
      Otherwise will only return the 18 predicted frames.


    
  """
  NUM_INPUT_FRAMES = 4
  input_frames = tf.math.maximum(input_frames, 0.)
  # Add a batch dimension and tile along it to create a copy of the input for
  # each sample:
  input_frames = tf.expand_dims(input_frames, 0)
  input_frames = tf.tile(input_frames, multiples=[num_samples, 1, 1, 1, 1])

  # Sample the latent vector z for each sample:
  _, input_signature = module.structured_input_signature
  z_size = input_signature['z'].shape[1]
  z_samples = tf.random.normal(shape=(num_samples, z_size))

  inputs = {
      "z": z_samples,
      "labels$onehot" : tf.ones(shape=(num_samples, 1)),
      "labels$cond_frames" : input_frames
  }
  samples = module(**inputs)['default']
  if not include_input_frames_in_result:
    # The module returns the input frames alongside its sampled predictions, we
    # slice out just the predictions:
    samples = samples[:, NUM_INPUT_FRAMES:, ...]

  # Take positive values of rainfall only.
  samples = tf.math.maximum(samples, 0.)
  return samples


In [173]:
import matplotlib
from matplotlib import animation
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import numpy as np


def plot_animation(field,figsize=None,
                   cmap="jet", **imshow_args):
  
  matplotlib.rc('animation', html='jshtml')
  
  fig = plt.figure(figsize=figsize)
  ax = plt.axes()
  ax.set_axis_off()
  plt.close() # Prevents extra axes being plotted below animation
  # vmax = np.max(field)
  # vmin = np.min(field)
  img = ax.imshow(field[0, :,:], norm=LogNorm(vmin=0.1, vmax=200), cmap=cmap, **imshow_args)
  cb = fig.colorbar(img, ax=ax)
  tx = ax.set_title('Frame 0')

  def animate(frame):
    img.set_data(field[frame])
    # vmax     = np.max(field[frame])
    # vmin     = np.min(field[frame])
    # img.set_clim(vmin, vmax)
    tx.set_text(f'Frame {frame}')
    return (img,)

  return animation.FuncAnimation(
      fig, animate, frames=field.shape[0], interval=4, blit=False)
  
def plot_subplot(input, output, figsize=None,
                  vmin=0, vmax=10, cmap="jet", **imshow_args):

  fig, axes = plt.subplots(2, 4, figsize=figsize)
  if str(type(output)) == "<class 'torch.Tensor'>":
    output = output.detach().numpy()
  for i in range(4):
    im1 = axes[0, i].imshow(input[0, i], cmap=cmap, vmin=vmin, vmax= vmax, **imshow_args)
    plt.colorbar(im1, ax=axes[0, i])
    
    im2 = axes[1, i].imshow(output[0, i], cmap=cmap, vmin=vmin, vmax= vmax, **imshow_args)
    plt.colorbar(im2, ax=axes[1, i])
  plt.show()
  
  return None

In [174]:
DATAFILE = r"C:\Users\user\Desktop\meteo_france_data"

data=get_data_as_xarray(DATAFILE)
whole_data=prep(data)

dgmr_input=whole_data[:4]
observation=whole_data[4:]

# data['precip_intensity'].to_numpy()

dgmr_input.shape,observation.shape



(TensorShape([4, 256, 256, 1]), TensorShape([18, 256, 256, 1]))

In [175]:
model=load_model(256,256)
forecast=predict(model,dgmr_input)

--> Loading model...


In [176]:
forecast=np.array(forecast)
forecast[forecast < 0.1] = 0


np.max(forecast)


28.321686

In [177]:
plot_animation(dgmr_input)

In [178]:
plot_animation(observation)

In [179]:
plot_animation(forecast[0])