## Commence training

In [None]:
import sys
import time
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
# import importlib
import os
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Nadam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Reloading custom file to incorporate changes dynamically
# importlib.reload(my_classes)

t0 = time.time()
path = '/pf/b/b309170'
path_data = path + '/my_work/icon-ml_data/cloud_cover_parameterization/grid_cell_based_v3/based_on_var_interpolated_data'
path_model = path + '/workspace_icon-ml/cloud_cover_parameterization/grid_cell_based_v3/saved_models'
# Add path with my_classes to sys.path
sys.path.insert(0, path + '/workspace_icon-ml/cloud_cover_parameterization/')

from my_classes import TimeOut

NUM = 1 
timeout = 2120 #Stop after how many minutes
filename = "model_grid_cell_based_v3_final_%d"%NUM

tf.random.set_seed(NUM)
gpus = tf.config.experimental.list_physical_devices('GPU')
# tf.config.experimental.set_visible_devices(gpus[3], 'GPU')
gpus

In [None]:
input_train = np.load(path_data + '/cloud_cover_all_days_input_train_%d.npy'%NUM, mmap_mode='r')
input_valid = np.load(path_data + '/cloud_cover_all_days_input_valid_%d.npy'%NUM)
input_test = np.load(path_data + '/cloud_cover_all_days_input_test_%d.npy'%NUM)
output_train = np.load(path_data + '/cloud_cover_all_days_output_train_%d.npy'%NUM)
output_valid = np.load(path_data + '/cloud_cover_all_days_output_valid_%d.npy'%NUM)
output_test = np.load(path_data + '/cloud_cover_all_days_output_test_%d.npy'%NUM)

In [None]:
model = Sequential()
model.add(Dense(256, activation='relu', input_dim = 6))
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='linear'))

In [None]:
model.compile(loss='mse', optimizer=Nadam())
time_callback = TimeOut(t0, timeout)
history = model.fit(input_train, output_train, batch_size=32, epochs=70, verbose=2,
                    validation_data=(input_valid, output_valid), callbacks=[time_callback])

In [None]:
#Serialize model to YAML
model_yaml = model.to_yaml()
with open(filename+".yaml", "w") as yaml_file:
    yaml_file.write(model_yaml)

#Serialize model and weights to a single HDF5-file
model.save(filename+'.h5')
print('Saved model to disk')

In [None]:
#Plotting the training progress
if len(history.history['loss']) > len(history.history['val_loss']):
    del history.history['loss'][-1]
pd.DataFrame(history.history).plot(figsize=(8,5))
plt.grid(True)
plt.ylabel('Mean Squared Error')
plt.xlabel('Number of epochs')
plt.savefig(filename+'.pdf')
plt.show()

In [None]:
train_loss = model.evaluate(input_train, output_train, verbose=2, batch_size=2000)
valid_loss = model.evaluate(input_valid, output_valid, verbose=2, batch_size=1000)
test_loss = model.evaluate(input_test, output_test, verbose=2, batch_size=1000)
with open(os.path.join(path_model, filename), 'a') as file:
        file.write('\nTraining loss: %.4f\n'%(train_loss))
        file.write('Validation loss: %.4f\n'%(valid_loss))
        file.write('Test loss: %.4f\n'%(test_loss))
        file.write('Training epochs: %d'%(len(history.history['val_loss'])))