# Imports

In [1]:
import pandas as pd
import numpy as np
import src.dataclass as dataclass
import src.correlation_coefficient as correlation_coefficient
import src.plot_ts as plot_ts
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings("ignore")


2023-06-19 20:33:12.124706: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-19 20:33:12.149268: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-19 20:33:12.149837: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Data

In [56]:
seq_length = 24
data = dataclass.Data(lead_time=1, seq_length=seq_length)

In [57]:
x_train = np.array(data.train_input_CESM)
y_train = np.array(data.train_target_CESM)

x_test = np.array(data.test_input_CESM)
y_test = np.array(data.test_target_CESM)

In [58]:
if seq_length != None:
    x_train = x_train.reshape((x_train.shape[0], x_train.shape[1]*x_train.shape[2]))
    x_test = x_test.reshape((x_test.shape[0], x_test.shape[1]*x_test.shape[2]))

# XG Boost model

In [59]:
xgb_model = xgb.XGBRegressor()
xgb_model.fit(x_train, y_train)

predictions = xgb_model.predict(x_test)
mse = mean_squared_error(predictions, y_test)
correlation = np.corrcoef(predictions, y_test)[1, 0]
print(f"The MSE is: {mse}, the correlation coefficient is: {correlation}")

The MSE is: 1.0328961330899233, the correlation coefficient is: 0.34083183658733535


In [60]:
plot_ts.plot_ts(true=data.test_target_CESM, predicted=predictions, ts_start=10794, ts_end=10994)

# Model loop

In [5]:
lead_times = [0, 1, 3, 6]
climate_models = ["CESM", "FOCI"]

model_name = "XGBoost"
results = dict()
show_plots = False

for lead_time in lead_times:
    results[f"Lead_time: {lead_time}"] = dict()

for climate in climate_models:
    for lead_time in lead_times:
        seq_length = 1
        data = dataclass.Data(lead_time=lead_time, seq_length=seq_length)

        if climate == "CESM":
            x_train = np.array(data.train_input_CESM)
            y_train = np.array(data.train_target_CESM)

            x_test = np.array(data.test_input_CESM)
            y_test = np.array(data.test_target_CESM)
        elif climate == "FOCI":
            x_train = np.array(data.train_input_FOCI)
            y_train = np.array(data.train_target_FOCI)

            x_test = np.array(data.test_input_FOCI)
            y_test = np.array(data.test_target_FOCI)

        if seq_length != None:
            x_train = x_train.reshape((x_train.shape[0], x_train.shape[1]*x_train.shape[2]))
            x_test = x_test.reshape((x_test.shape[0], x_test.shape[1]*x_test.shape[2]))

        xgb_model = xgb.XGBRegressor()
        xgb_model.fit(x_train, y_train)

        predictions = xgb_model.predict(x_test)
        mse = mean_squared_error(predictions, y_test)
        correlation = np.corrcoef(predictions, y_test)[1, 0]
        print(f"The MSE is: {mse}, the correlation coefficient is: {correlation}")

        if show_plots:
            plot_ts.plot_ts(
                true=data.test_target_CESM,
                predicted=predictions,
                ts_start=10794,
                ts_end=10994,
                title=f"XGBoost {climate}, lead time = {lead_time}, window_size = {seq_length}"
            )
        results[f"Lead_time: {lead_time}"]["Modelname"] = model_name
        results[f"Lead_time: {lead_time}"][f"{climate}_MSE"] = mse
        results[f"Lead_time: {lead_time}"][f"{climate}_Correlation"] = correlation
        # results[f"Lead_time: {lead_time}"][f"Leadtime"] = lead_time

df = pd.DataFrame.from_dict(results, orient='index')
df

The MSE is: 0.7685186575515238, the correlation coefficient is: 0.5576319569807041
The MSE is: 1.0720295071050858, the correlation coefficient is: 0.29256033709349244
The MSE is: 1.0225860036766463, the correlation coefficient is: 0.3317605698849864
The MSE is: 1.0811960873326003, the correlation coefficient is: 0.24169871131900927
The MSE is: 0.6188414358500125, the correlation coefficient is: 0.5116074345460325
The MSE is: 0.933976414372388, the correlation coefficient is: 0.1860753636146827
The MSE is: 0.8888395153664072, the correlation coefficient is: 0.11673256933340792
The MSE is: 0.9372704012488485, the correlation coefficient is: 0.08758251611052338


Unnamed: 0,Modelname,CESM_MSE,CESM_Correlation,FOCI_MSE,FOCI_Correlation
Lead_time: 0,XGBoost,0.768519,0.557632,0.618841,0.511607
Lead_time: 1,XGBoost,1.07203,0.29256,0.933976,0.186075
Lead_time: 3,XGBoost,1.022586,0.331761,0.88884,0.116733
Lead_time: 6,XGBoost,1.081196,0.241699,0.93727,0.087583


In [12]:
df.to_csv(f"Results_{model_name}.csv", sep=';')