# Setup

Import necessary modules and do some basic setup.

In [None]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# TensorFlow ≥2.0 is required
import tensorflow as tf
assert tf.__version__ >= "2.0"
from tensorflow import keras

# Common imports
import os
import numpy as np
import pandas as pd
import xarray as xr

# To make this notebook's output stable across runs
np.random.seed(42)

# Config matplotlib
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Custom utils
from utils_data import *
from utils_ml import *

Define some paths and constants.

In [None]:
# Paths
DATADIR = os.getcwd() + '/../data'

# Some constants
G = 9.80665
CH_CENTER = [46.818, 8.228]
CH_BOUNDING_BOX = [45.66, 47.87, 5.84, 10.98]
DATE_START = '1979-01-01'
DATE_END = '2020-12-31'

# Artificial Neural Networks

## Getting started with the data

**Dataset**: RhiresD, which is a gridded daily precipitation dataset over Switzerland provided by MeteoSwiss. It is based on a spatial interpolation of rain-gauge data. The grid resolution is 1 km, but the effective resolution is in the order of 15-20 km.


**Aggregations levels**: The gridded dataset has been averaged over different regions:
* 12 climatic regions
* 5 aggregated regions
* the whole country

In [None]:
# Read precipitation file and get events over threshold
precip = pd.read_csv(DATADIR + '/MeteoSwiss/precip_regions.csv')

df_time = pd.to_datetime({
    'year': precip.year,
    'month': precip.month,
    'day': precip.day})
precip.insert(0, "date", df_time, True)

precip = precip[(precip.date >= DATE_START) & (precip.date <= DATE_END)]

precip_p95 = precip.copy()
precip_p99 = precip.copy()

for key, ts in precip.iteritems():
    if key in ['date', 'year', 'month', 'day']: continue
    precip_p95[key] = ts > ts.quantile(0.95)
    precip_p99[key] = ts > ts.quantile(0.99)

In [None]:
# Open data and get the mean value over Switzerland
z = get_era5_data(DATADIR + '/ERA5/geopotential/*.nc', DATE_START, DATE_END)
z500_mean = get_data_mean_over_CH_box(z, 500)
mslp = get_era5_data(DATADIR + '/ERA5/mslp/*.nc', DATE_START, DATE_END)
mslp_mean = get_data_mean_over_CH_box(mslp)
t2m = get_era5_data(DATADIR + '/ERA5/Daymean_era5_T2M_EU_19790101-20210905.nc', DATE_START, DATE_END)
t2m_mean = get_data_mean_over_CH_box(t2m)

# Convert to geopotential height and hPa
z500_mean['z'] = z500_mean['z'] / G
mslp_mean['MSL'] = mslp_mean['MSL'] / 100

In [None]:
# Plot the time series
fig, axs = plt.subplots(ncols=3, figsize=(20,5))
z500_mean.z.plot(ax=axs[0])
axs[0].set_title('Geopotentiel 500hPa')
mslp_mean.MSL.plot(ax=axs[1])
axs[1].set_title('Sea level pressure')
t2m_mean.T2MMEAN.plot(ax=axs[2])
axs[2].set_title('Temperature 2m')

In [None]:
from sklearn.model_selection import train_test_split

full_set = [z500_mean, mslp_mean, t2m_mean, precip]

train_set, test_set = train_test_split(precip, test_size=0.2, random_state=42)



## Prediction of precipitation values

### Using time series

In [31]:
# Clear session and set tf seed
keras.backend.clear_session()
tf.random.set_seed(42)

# ANN using timeseries to predict precipitation
ann_ts_precip = keras.models.Sequential([
    keras.layers.Dense(300, activation="relu", input_dim=5),
    keras.layers.Dense(100, activation="relu"), # try with elu
    keras.layers.Dense(1, activation="relu")
])

ann_ts_precip.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 300)               1800      
_________________________________________________________________
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 101       
Total params: 32,001
Trainable params: 32,001
Non-trainable params: 0
_________________________________________________________________


In [None]:
ann_ts_precip.compile(loss="mse",
                      optimizer="sgd",
                      metrics=["accuracy"])

history = ann_ts_precip.fit(X_train, y_train, epochs=30,
                    validation_data=(X_valid, y_valid))

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
save_fig("keras_learning_curves_plot")
plt.show()

In [None]:
model.evaluate(X_test, y_test)

In [None]:
# ANN using timeseries to predict extreme events
ann_ts_precip = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, activation="relu"),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(2, activation="tanh") # try with sigmoid to have the probability !
])

model.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])

### Using gridded data

In [None]:
# ANN using gridded data to predict precipitation
ann_ts_precip = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, activation="relu"),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(10, activation="softmax")
])

# ANN using gridded data to predict extreme events
ann_ts_precip = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, activation="relu"),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(10, activation="softmax")
])

## Prediction of extreme events