# Imports

In [1]:
import pandas as pd
import numpy as np
import src.dataclass as dataclass
import src.correlation_coefficient as correlation_coefficient
import src.plot_ts as plot_ts
import matplotlib.pyplot as plt
import xgboost as xgb
import lightgbm as ltb

from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings("ignore")

2023-06-14 17:37:18.184455: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-14 17:37:18.209515: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-14 17:37:18.210064: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
lead_times = [0, 1, 3, 6]
climate_models = ["CESM", "FOCI"]

model_name = "LightGBM"
results = dict()
show_plots = False

for lead_time in lead_times:
    results[f"Lead_time: {lead_time}"] = dict()

for climate in climate_models:
    for lead_time in lead_times:
        seq_length = 12
        data = dataclass.Data(lead_time=lead_time, seq_length=seq_length)

        if climate == "CESM":
            x_train = np.array(data.train_input_CESM)
            y_train = np.array(data.train_target_CESM)

            x_test = np.array(data.test_input_CESM)
            y_test = np.array(data.test_target_CESM)
        elif climate == "FOCI":
            x_train = np.array(data.train_input_FOCI)
            y_train = np.array(data.train_target_FOCI)

            x_test = np.array(data.test_input_FOCI)
            y_test = np.array(data.test_target_FOCI)

        if seq_length != None:
            x_train = x_train.reshape((x_train.shape[0], x_train.shape[1]*x_train.shape[2]))
            x_test = x_test.reshape((x_test.shape[0], x_test.shape[1]*x_test.shape[2]))

        ltb_model = ltb.LGBMRegressor()
        ltb_model.fit(x_train, y_train)

        predictions = ltb_model.predict(x_test)
        mse = mean_squared_error(predictions, y_test)
        correlation = np.corrcoef(predictions, y_test)[1, 0]
        print(f"The MSE is: {mse}, the correlation coefficient is: {correlation}")

        if show_plots:
            plot_ts.plot_ts(
                true=data.test_target_CESM,
                predicted=predictions,
                ts_start=10794,
                ts_end=10994,
                title=f"XGBoost {climate}, lead time = {lead_time}, window_size = {seq_length}"
            )
        results[f"Lead_time: {lead_time}"]["Modelname"] = model_name
        results[f"Lead_time: {lead_time}"][f"{climate}_MSE"] = mse
        results[f"Lead_time: {lead_time}"][f"{climate}_Correlation"] = correlation
        # results[f"Lead_time: {lead_time}"][f"Leadtime"] = lead_time

df = pd.DataFrame.from_dict(results, orient='index')
df

The MSE is: 0.7952167720998717, the correlation coefficient is: 0.5412154116245839
The MSE is: 0.9698735041916572, the correlation coefficient is: 0.3659096625955792
The MSE is: 0.9894306510414622, the correlation coefficient is: 0.3405322099272156
The MSE is: 1.0461741933352204, the correlation coefficient is: 0.2580136490005475
The MSE is: 0.5910660613309222, the correlation coefficient is: 0.5055903763915569
The MSE is: 0.7506447050836396, the correlation coefficient is: 0.2636373417551652
The MSE is: 0.8007134069440717, the correlation coefficient is: 0.13849913213527523
The MSE is: 0.7929843405622439, the correlation coefficient is: 0.14341085430574388


Unnamed: 0,Modelname,CESM_MSE,CESM_Correlation,FOCI_MSE,FOCI_Correlation
Lead_time: 0,LightGBM,0.795217,0.541215,0.591066,0.50559
Lead_time: 1,LightGBM,0.969874,0.36591,0.750645,0.263637
Lead_time: 3,LightGBM,0.989431,0.340532,0.800713,0.138499
Lead_time: 6,LightGBM,1.046174,0.258014,0.792984,0.143411


In [4]:
df.to_csv(f"Results_{model_name}.csv", sep=';')

In [31]:
df_GRU = pd.read_csv("Results_GRU.csv", sep=';', index_col=0)
df_XGB = pd.read_csv("Results_XGBoost.csv", sep=';', index_col=0)
df_LSTM = pd.read_csv("Results_LSTM.csv", sep=';', index_col=0)
df_CNN_LSTM = pd.read_csv("Results_CNN+LSTM.csv", sep=';', index_col=0)
df_total = pd.concat([df, df_GRU, df_XGB, df_LSTM, df_CNN_LSTM])
df_total['lead time'] = [0,0,0,0,0,1,1,1,1,1,3,3,3,3,3,6,6,6,6,6]
df_total.set_index("lead time", inplace=True)
df_total.sort_index(axis=0, inplace=True)
df_total

Unnamed: 0,Modelname,CESM_MSE,CESM_Correlation,FOCI_MSE,FOCI_Correlation
Lead_time: 0,LightGBM,0.795217,0.541215,0.591066,0.50559
Lead_time: 0,CNN+LSTM,0.772522,0.555355,0.554634,0.55118
Lead_time: 0,GRU,0.690352,0.623561,0.519549,0.585833
Lead_time: 0,LSTM,0.700556,0.612967,0.526495,0.5802
Lead_time: 0,XGBoost,0.818344,0.518608,0.690793,0.405406
Lead_time: 1,CNN+LSTM,0.943968,0.394598,0.718872,0.304757
Lead_time: 1,LSTM,0.963997,0.37226,0.730262,0.290066
Lead_time: 1,XGBoost,1.043164,0.322661,0.85065,0.198264
Lead_time: 1,GRU,0.927135,0.412321,0.736909,0.267883
Lead_time: 1,LightGBM,0.969874,0.36591,0.750645,0.263637


In [6]:
df_total.to_csv("Total_table.csv", sep=';')

In [25]:
df_lstm = pd.read_csv("lstm.csv", sep=',', index_col=0)
df_rf = pd.read_csv("rf.csv", sep=',', index_col=0)
df_rnn_at = pd.read_csv("rnn_attention.csv", sep=',', index_col=0)
df_rnn = pd.read_csv("rnn.csv", sep=',', index_col=0)
df_ada = pd.read_csv("adaboost.csv", sep=',', index_col=0)

df_jannik = pd.concat([df_lstm, df_rf, df_rnn_at, df_rnn, df_ada])
df_jannik.sort_index(axis=0, inplace=True)
df_jannik



Unnamed: 0_level_0,Name,MSE FOCI,MSE CESM,Corr FOCI,Corr CESM
lead time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,LSTM,0.545746,0.725865,0.572786,0.603083
0,adaboost,0.899312,1.031981,0.338823,0.394556
0,RandomForest,0.646675,0.829775,0.450056,0.525338
0,RNN,0.524209,0.725488,0.582959,0.616352
0,Attention RNN,0.58293,0.76726,0.531627,0.570795
1,adaboost,1.370148,1.18906,0.137728,0.252487
1,RNN,0.759406,1.013598,0.25894,0.341081
1,Attention RNN,0.794181,1.121237,0.169935,0.247004
1,RandomForest,0.795104,1.010052,0.21369,0.32964
1,LSTM,0.735424,0.99277,0.273762,0.357914


In [37]:
df_jannik.rename(
    columns={
        "Name": "Modelname",
        "MSE FOCI": "FOCI_MSE",
        "MSE CESM": "CESM_MSE",
        "Corr FOCI": "FOCI_Correlation",
        "Corr CESM": "CESM_Correlation",
    },
    inplace=True,
)

In [58]:
df_linreg = pd.read_csv("linreg.csv", index_col=0)
df_esn = pd.read_csv("esn.csv", index_col=0)
df_cnn = pd.read_csv("cnn.csv", index_col=0)
df_marco = pd.concat([df_linreg, df_cnn, df_esn])
df_marco.sort_index(axis=0, inplace=True)
df_marco.rename(
    columns={
        "Name": "Modelname",
        "MSE FOCI": "FOCI_MSE",
        "MSE CESM": "CESM_MSE",
        "Corr FOCI": "FOCI_Correlation",
        "Corr CESM": "CESM_Correlation",
    },
    inplace=True,
)
df_marco

Unnamed: 0_level_0,Modelname,FOCI_MSE,CESM_MSE,FOCI_Correlation,CESM_Correlation
lead time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,linReg,0.775,0.999,0.24,0.301
1,CNN/fc,0.814,1.022,0.116,0.263
1,ESN,0.842,1.087,0.159,0.202
3,linReg,0.801,1.054,0.145,0.199
3,CNN/fc,0.821,1.05,0.07,0.214
3,ESN,0.873,1.077,0.083,0.209
6,linReg,0.817,1.057,0.071,0.189
6,CNN/fc,0.836,1.055,0.04,0.206
6,ESN,0.883,1.095,0.053,0.162


In [69]:
df_complete = pd.concat([df_total, df_jannik, df_marco])
df_complete.sort_index(axis=0, inplace=True)
df_complete["CESM_MSE"] = np.round(df_complete["CESM_MSE"], 2)
df_complete["CESM_Correlation"] = np.round(df_complete["CESM_Correlation"], 2)
df_complete["FOCI_MSE"] = np.round(df_complete["FOCI_MSE"], 2)
df_complete["FOCI_Correlation"] = np.round(df_complete["FOCI_Correlation"], 2)
df_complete.to_excel("All_models.xlsx")

In [70]:
table_0 = df_complete.loc[0]
table_0.sort_values(by="CESM_MSE", inplace=True, ascending=True)
table_0.to_excel("table_lead_time_0.xlsx")
table_0

Unnamed: 0_level_0,Modelname,CESM_MSE,CESM_Correlation,FOCI_MSE,FOCI_Correlation
lead time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,GRU,0.69,0.62,0.52,0.59
0,LSTM,0.7,0.61,0.53,0.58
0,RNN,0.73,0.62,0.52,0.58
0,LSTM,0.73,0.6,0.55,0.57
0,Attention RNN,0.77,0.57,0.58,0.53
0,CNN+LSTM,0.77,0.56,0.55,0.55
0,LightGBM,0.8,0.54,0.59,0.51
0,XGBoost,0.82,0.52,0.69,0.41
0,RandomForest,0.83,0.53,0.65,0.45
0,adaboost,1.03,0.39,0.9,0.34


In [71]:
table_1 = df_complete.loc[1]
table_1.sort_values(by="CESM_MSE", inplace=True, ascending=True)
table_1.to_excel("table_lead_time_1.xlsx")
table_1

Unnamed: 0_level_0,Modelname,CESM_MSE,CESM_Correlation,FOCI_MSE,FOCI_Correlation
lead time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,GRU,0.93,0.41,0.74,0.27
1,CNN+LSTM,0.94,0.39,0.72,0.3
1,LSTM,0.96,0.37,0.73,0.29
1,LightGBM,0.97,0.37,0.75,0.26
1,LSTM,0.99,0.36,0.74,0.27
1,linReg,1.0,0.3,0.78,0.24
1,RandomForest,1.01,0.33,0.8,0.21
1,RNN,1.01,0.34,0.76,0.26
1,CNN/fc,1.02,0.26,0.81,0.12
1,XGBoost,1.04,0.32,0.85,0.2


In [72]:
table_3 = df_complete.loc[3]
table_3.sort_values(by="CESM_MSE", inplace=True, ascending=True)
table_3.to_excel("table_lead_time_3.xlsx")
table_3

Unnamed: 0_level_0,Modelname,CESM_MSE,CESM_Correlation,FOCI_MSE,FOCI_Correlation
lead time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,CNN+LSTM,0.96,0.38,0.76,0.2
3,GRU,0.97,0.37,0.76,0.19
3,LSTM,0.97,0.36,0.76,0.22
3,LSTM,0.99,0.37,0.77,0.21
3,LightGBM,0.99,0.34,0.8,0.14
3,RandomForest,1.01,0.33,0.8,0.17
3,RNN,1.05,0.28,0.77,0.18
3,linReg,1.05,0.2,0.8,0.14
3,CNN/fc,1.05,0.21,0.82,0.07
3,ESN,1.08,0.21,0.87,0.08


In [73]:
table_6 = df_complete.loc[6]
table_6.sort_values(by="CESM_MSE", inplace=True, ascending=True)
table_6.to_excel("table_lead_time_6.xlsx")
table_6

Unnamed: 0_level_0,Modelname,CESM_MSE,CESM_Correlation,FOCI_MSE,FOCI_Correlation
lead time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
6,GRU,1.0,0.32,0.79,0.11
6,CNN+LSTM,1.01,0.32,0.79,0.13
6,LSTM,1.03,0.28,0.77,0.17
6,RandomForest,1.04,0.29,0.82,0.11
6,LightGBM,1.05,0.26,0.79,0.14
6,linReg,1.06,0.19,0.82,0.07
6,CNN/fc,1.06,0.21,0.84,0.04
6,ESN,1.1,0.16,0.88,0.05
6,XGBoost,1.11,0.24,0.92,0.09
6,adaboost,1.16,0.21,1.42,0.1
