# Wind Downscaling

## Prerequisites

* Conda environment
* Get a Copernicus API key from: https://cds.climate.copernicus.eu/api-how-to
  * create a file at \$HOME/.cdsapirc with the required UID and key
* Create a .env file in the same folder as this notebook, and add the COSMO_USERNAME and COSMO_PASSWORD to connect to the UNI-BE server

## Install the required packages

In [1]:
from datetime import date
from pathlib import Path
import os

In [2]:
if Path('./setup.py').exists():
    !pip install -e .
else:
    !pip install -U git+https://github.com/OpheliaMiralles/WindDownscaling_EPFL_UNIBE.git

Obtaining file:///Users/Boubou/Documents/GitHub/WindDownscaling_EPFL_UNIBE
Installing collected packages: downscaling
  Attempting uninstall: downscaling
    Found existing installation: downscaling 1.0
    Uninstalling downscaling-1.0:
      Successfully uninstalled downscaling-1.0
  Running setup.py develop for downscaling
Successfully installed downscaling-1.0


In [3]:
!conda install -y -c conda-forge gdal tensorflow xarray numpy=1.19.5 pandas pysftp cdsapi elevation rasterio dask python-dotenv

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 4.10.1
  latest version: 4.10.3

Please update conda by running

    $ conda update -n base conda



# All requested packages already installed.



In [4]:
!pip install topo-descriptors



In [5]:
from dotenv import load_dotenv
load_dotenv()
import numpy as np
import tensorflow as tf
import tensorflow.keras.callbacks as cb
print(f"Num GPUs Available: {len(tf.config.list_physical_devices('GPU'))}")

Num GPUs Available: 0


## Set configuration

In [6]:
DATA_ROOT = Path('./data')
ERA5_DATA_FOLDER = DATA_ROOT / 'ERA5'
COSMO1_DATA_FOLDER = DATA_ROOT / 'COSMO1'
DEM_DATA_FILE = DATA_ROOT / 'dem/Switzerland-90m-DEM.tif'
PROCESSED_DATA_FOLDER = DATA_ROOT / 'img_prediction_files'

DATA_ROOT.mkdir(parents=True, exist_ok=True)
ERA5_DATA_FOLDER.mkdir(exist_ok=True)
COSMO1_DATA_FOLDER.mkdir(exist_ok=True)
DEM_DATA_FILE.parent.mkdir(exist_ok=True)
PROCESSED_DATA_FOLDER.mkdir(exist_ok=True)

In [7]:
ERA5_PREDICTORS_SURFACE = ('u10', 'v10', 'blh', 'fsr', 'sp', 'sshf',
                               'u100', 'v100')
ERA5_PREDICTORS_Z500 = ('d', 'z', 'u', 'v', 'w', 'vo')
TOPO_PREDICTORS = ('tpi_500', 'ridge_index_norm', 'ridge_index_dir',
                   'we_derivative', 'sn_derivative',
                   'slope', 'aspect')
ALL_INPUTS = ERA5_PREDICTORS_SURFACE + ERA5_PREDICTORS_Z500 + TOPO_PREDICTORS
ALL_INPUTS = ('u10', 'v10')

In [10]:
# Start and end date for the data - should be in the 2016-2020 range
START_DATE = date(2016,4,1)
END_DATE = date(2018,4,1)
NUM_DAYS = (END_DATE-START_DATE).days + 1
# Number of consecutive images to form a sequence
SEQUENCE_LENGTH = 6
# Size of the high resolution image to be produced
IMG_SIZE = 64
# Number of noise channels to add to the image
NOISE_CHANNELS = 4
# Number of sequences per batch
BATCH_SIZE = 16
# Number of workers to run to process the data to create the batches
BATCH_WORKERS = 8
# Latent dimension for the autoencoder
USE_AUTOENCODER = True
AUTOENCODER_OUTPUT_FEATURES = 8

## Data Loading

In [None]:
from data.download_ERA5 import download_ERA5
download_ERA5(ERA5_DATA_FOLDER, START_DATE, END_DATE)
print('Done')

In [None]:
if not DEM_DATA_FILE.exists():
    dest = str(DEM_DATA_FILE)
    !eio --product SRTM3 clip -o {dest} --bounds 5.27 45.46 11.02 48.15

In [None]:
from data.data_processing import process_topographic_variables_file
process_topographic_variables_file(DEM_DATA_FILE)

In [None]:
username = os.getenv('COSMO_USERNAME')
password = os.getenv('COSMO_PASSWORD')
from data import download_COSMO1
download_COSMO1(username, password, COSMO1_DATA_FOLDER, START_DATE, END_DATE)

In [None]:
from data.data_processing import process_imgs
process_imgs(PROCESSED_DATA_FOLDER, ERA5_DATA_FOLDER, COSMO1_DATA_FOLDER, DEM_DATA_FILE.parent,
             surface_variables_included=ERA5_PREDICTORS_SURFACE,
             z500_variables_included=ERA5_PREDICTORS_Z500,
             topo_variables_included=TOPO_PREDICTORS,
             cosmo_variables_included=('U_10M', 'V_10M'),
             start_date=START_DATE, end_date=END_DATE)
print('Done')

In [None]:
from data.data_generator import BatchGenerator, NaiveDecoder

batch_gen = BatchGenerator(path_to_data=PROCESSED_DATA_FOLDER, decoder=NaiveDecoder(normalize=True),
                           sequence_length=SEQUENCE_LENGTH,
                           patch_length_pixel=IMG_SIZE, batch_size=BATCH_SIZE,
                           input_variables=ALL_INPUTS,
                           start_date=START_DATE, end_date=END_DATE,
                           num_workers=BATCH_WORKERS)

inputs = []
outputs = []
with batch_gen as batch:
    for b in range(NUM_DAYS):
        print(f'Creating batch {b+1}/{NUM_DAYS}')
        x, y = next(batch)
        inputs.append(x)
        outputs.append(y)
inputs = np.concatenate(inputs, axis=0)
outputs = np.concatenate(outputs, axis=0)
print(f"Inputs: {inputs.shape}")
print(f"Outputs: {outputs.shape}")

In [None]:
INPUT_CHANNELS = len(ALL_INPUTS)
if USE_AUTOENCODER:
    checkpoint_path_weights = Path('./checkpoints/autoencoder/weights.ckpt')
    if not checkpoint_path_weights.exists():
        print("No autoencoder weights found!")
    else:
        autoencoder = AutoEncoder(nb_channels_in=len(ALL_INPUTS), nb_channels_out=OUTPUT_FEATURES,
                           time_steps=SEQUENCE_LENGTH, img_size=IMG_SIZE)
        autoencoder.load_weights(checkpoint_path_weights)

        print("Reducing data dimension")
        inputs = autoencoder.encoder.predict(inputs)
        INPUT_CHANNELS = AUTOENCODER_OUTPUT_FEATURES

In [None]:
from data.data_generator import NoiseGenerator
from gan import train
from gan.models import make_generator, make_discriminator

generator = make_generator(image_size=IMG_SIZE, in_channels=INPUT_CHANNELS,
                           noise_channels=NOISE_CHANNELS, out_channels=2,
                           n_timesteps=SEQUENCE_LENGTH)
generator.compile(train.generator_optimizer())
print(f"Generator: {generator.count_params():,} weights")

discriminator = make_discriminator(low_res_size=IMG_SIZE, high_res_size=IMG_SIZE, low_res_channels=INPUT_CHANNELS,
                                   high_res_channels=2, n_timesteps=SEQUENCE_LENGTH)
discriminator.compile(train.discriminator_optimizer(), train.discriminator_loss)
print(f"Discriminator: {discriminator.count_params():,} weights")

In [None]:
from gan.ganbase import  GAN

noise_shape = (BATCH_SIZE, SEQUENCE_LENGTH, IMG_SIZE, IMG_SIZE, NOISE_CHANNELS)
gan = GAN(generator, discriminator, noise_generator=NoiseGenerator(noise_shape))

print(f"Total: {gan.generator.count_params() + gan.discriminator.count_params():,} weights")

In [None]:
checkpoint_path_weights = Path('./checkpoints/gan/weights.ckpt')
checkpoint_path_weights.parent.mkdir(exist_ok=True, parents=True)
log_path = Path('./logs/gan')
if log_path.exists():
    log_path_str = str(log_path)
    !rm -rf {log_path_str}

In [None]:
%load_ext tensorboard
%tensorboard --logdir=logs/gan

In [None]:
callbacks = [
    cb.TensorBoard(log_path, write_images=True, histogram_freq=1, profile_batch=(2, 4)),
    cb.ProgbarLogger('steps'),
    cb.EarlyStopping(min_delta=2e-3, patience=10),
    cb.TerminateOnNaN(),
    cb.ModelCheckpoint(checkpoint_path_weights, monitor='loss', save_best_only=True, save_weights_only=True),
]

In [None]:
#gan.fit(inputs, outputs, callbacks=callbacks, epochs=30, batch_size=BATCH_SIZE, validation_split=0.15, steps_per_epoch=16)

In [None]:
disc = make_discriminator(low_res_size=IMG_SIZE, high_res_size=IMG_SIZE, low_res_channels=INPUT_CHANNELS,
                                   high_res_channels=2, n_timesteps=SEQUENCE_LENGTH)

def loss(y_true, y_pred):
    return tf.reduce_mean(y_true * y_pred)
    return train.discriminator_loss(y_true, y_pred)

disc.compile(train.discriminator_optimizer(), loss)

In [None]:
noise = NoiseGenerator(np.shape(inputs))()
fake_high_res = generator([inputs, noise])

In [None]:
inp = list(zip(inputs, outputs))
inp += list(zip(inputs, fake_high_res))
inp = [np.concatenate((inputs, inputs)), np.concatenate((outputs, fake_high_res))]
print(inp[0][0].shape, inp[0][1].shape, len(inp[0]), len(inp[1]))
labels = np.array([-np.ones((len(inputs), 1)), np.ones((len(inputs), 1))]).reshape(-1)
print(labels.shape)

In [None]:
import matplotlib.pyplot as plt
def show(image, dims=2, legends=None):
    fig = plt.figure(figsize=(10, 10))

    for i in range(dims):
        label = legends[i] if legends is not None else ''
        plt.subplot(1, dims, i+1)
        plt.imshow(image[0, :, :, i], cmap='jet')
        plt.title(label)
        plt.axis('off')
    plt.show()
show(outputs[0])

In [None]:
show(inputs[0])

In [None]:
show(fake_high_res[0])

In [None]:
tf.reduce_mean(disc.predict([inputs, outputs])).numpy()

In [None]:
tf.reduce_mean(disc.predict([inputs, fake_high_res])).numpy()

In [None]:
disc.fit(inp, labels, epochs=100)

In [None]:
disc.predict([inputs[:2], outputs[:2]])

In [None]:
disc.summary()

In [None]:
tf.reduce_mean(disc.predict([inputs, fake_high_res])).numpy()

In [None]:
evals = disc.evaluate(inputs, outputs)
print(evals)