# Imports

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from keras.layers.attention.multi_head_attention import activation
import src.dataclass as dataclass
import src.correlation_coefficient as correlation_coefficient
import src.plot_ts as plot_ts
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings("ignore")


2023-06-14 17:15:35.331558: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-14 17:15:35.353133: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-14 17:15:35.354019: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Functions

In [2]:
def build_model(lead_time, seq_length):
    model = tf.keras.models.Sequential()
    if lead_time == 0:
        model.add(tf.keras.layers.Conv1D(10, 5, input_shape=(seq_length,28)))
    else:
        model.add(tf.keras.layers.Conv1D(10, 5, input_shape=(seq_length,29)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.MaxPooling1D())
    model.add(tf.keras.layers.Dropout(0.3))
    model.add(tf.keras.layers.LSTM(64, activation='relu', return_sequences=False))
    model.add(tf.keras.layers.Dropout(0.3))
    model.add(tf.keras.layers.Dense(32, activation='relu'))
    model.add(tf.keras.layers.Dense(1, activation='linear'))

    model.compile(optimizer=Adam(learning_rate=0.001),
                loss=tf.keras.losses.MeanSquaredError(),
                metrics=['mse', correlation_coefficient.correlation_coefficient])
    return model

# Loop

In [5]:
lead_times = [0, 1, 3, 6]
climate_models = ["CESM", "FOCI"]
n_epochs = 100
show_plots = False
model_name = "CNN+LSTM"

early_callback = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0.001,
    patience=5,
    verbose=0,
    mode="auto",
    baseline=None,
    restore_best_weights=True,
    start_from_epoch=0,
)

results = dict()
histories = dict()
for lead_time in lead_times:
    results[f"Lead_time: {lead_time}"] = dict()

for climate in climate_models:
    results[climate] = dict()
    for lead_time in lead_times:
        seq_length = 12
        data = dataclass.Data(lead_time=lead_time, seq_length=seq_length)

        if climate == "CESM":
            x_train = np.array(data.train_input_CESM)
            y_train = np.array(data.train_target_CESM)

            x_test = np.array(data.test_input_CESM)
            y_test = np.array(data.test_target_CESM)
        elif climate == "FOCI":
            x_train = np.array(data.train_input_FOCI)
            y_train = np.array(data.train_target_FOCI)

            x_test = np.array(data.test_input_FOCI)
            y_test = np.array(data.test_target_FOCI)

        model = build_model(lead_time, seq_length)
        history = model.fit(
            x_train,
            y_train,
            epochs=n_epochs,
            batch_size=16,
            shuffle=True,
            verbose=0,
            validation_data=(x_test, y_test),
            callbacks=[early_callback],
        )

        predictions = model.predict(x_test, verbose=0)
        predictions = predictions.reshape((predictions.shape[0],))
        mse = mean_squared_error(predictions, y_test)
        correlation = np.corrcoef(predictions, y_test)[1, 0]
        if show_plots:
            print(f"The MSE is: {mse}, the correlation coefficient is: {correlation}")
            plot_ts.plot_ts(
                true=data.test_target_CESM,
                predicted=predictions,
                ts_start=10794,
                ts_end=10994,
                title=f"CNN + LSTM {climate}, lead time = {lead_time}",
            )
        results[f"Lead_time: {lead_time}"]["Modelname"] = model_name
        results[f"Lead_time: {lead_time}"][f"{climate}_MSE"] = mse
        results[f"Lead_time: {lead_time}"][f"{climate}_Correlation"] = correlation
        histories[f"{climate}_{lead_time}"] = history

df = pd.DataFrame.from_dict(results, orient="index")
df

Unnamed: 0,Modelname,CESM_MSE,CESM_Correlation,FOCI_MSE,FOCI_Correlation
Lead_time: 0,CNN+LSTM,0.772522,0.555355,0.554634,0.55118
Lead_time: 1,CNN+LSTM,0.943968,0.394598,0.718872,0.304757
Lead_time: 3,CNN+LSTM,0.961051,0.377606,0.761129,0.197585
Lead_time: 6,CNN+LSTM,1.013309,0.319963,0.785047,0.130604


In [6]:
df_GRU = pd.read_csv("Results_GRU.csv", sep=';', index_col=0)
df_XGB = pd.read_csv("Results_XGBoost.csv", sep=';', index_col=0)
df_LSTM = pd.read_csv("Results_LSTM.csv", sep=';', index_col=0)
df_total = pd.concat([df, df_GRU, df_XGB, df_LSTM])
df_total.sort_index(axis=0, inplace=True)
df_total

Unnamed: 0,Modelname,CESM_MSE,CESM_Correlation,FOCI_MSE,FOCI_Correlation
Lead_time: 0,CNN+LSTM,0.772522,0.555355,0.554634,0.55118
Lead_time: 0,GRU,0.690352,0.623561,0.519549,0.585833
Lead_time: 0,XGBoost,0.86292,0.483471,0.69454,0.415032
Lead_time: 0,LSTM,0.700556,0.612967,0.526495,0.5802
Lead_time: 1,CNN+LSTM,0.943968,0.394598,0.718872,0.304757
Lead_time: 1,GRU,0.927135,0.412321,0.736909,0.267883
Lead_time: 1,XGBoost,1.032896,0.340832,0.834805,0.218906
Lead_time: 1,LSTM,0.963997,0.37226,0.730262,0.290066
Lead_time: 3,CNN+LSTM,0.961051,0.377606,0.761129,0.197585
Lead_time: 3,GRU,0.967889,0.367299,0.763325,0.190444


In [7]:
df.to_csv(f"Results_{model_name}.csv", sep=';')