<div class="alert alert-info" role="alert" 
     style="font-size: 1.6em; font-weight: bold; padding: 10px; margin: 10px 0; text-align: center;">
    
    XGBoost model deployment -- Round 1
<div>

In [None]:
# Data Wrangling
import glob
import pandas as pd
import numpy as np

# Machine Learn
from sklearn.metrics import r2_score, mean_squared_error
import xgboost as xgb
import shap

# Data Plotting
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates

#ignorewarnings
import warnings
warnings.filterwarnings("ignore")

<div class="alert alert-info" role="alert" 
     style="font-size: 1.8em; font-weight: bold; padding: 15px; margin: 10px 0; text-align: center; background-color: #d9edf7; border-color: #bce8f1; color: #31708f; border-radius: 8px;">
    Data Preprocessing
</div>

In [None]:
# Data are imported in Dataframe format
SSH = pd.read_csv("cmems_mod_glo_phy_zos_0.083deg_P1D-m.csv", comment='#') # tells pandas to ignore lines starting with '#'
SST = pd.read_csv("cmems_mod_glo_phy-sst_0.083deg_P1D-m.csv", comment='#')
SSS = pd.read_csv("cmems_mod_glo_phy-sss_0.083deg_P1D-m.csv", comment='#')
VEL = pd.read_csv("cmems_mod_glo_phy-vel_0.083deg_P1D-m.csv", comment='#')
MLD = pd.read_csv("cmems_mod_glo_phy_mld_0.083deg_P1D-m.csv", comment='#')

In [None]:
# The UTC format that our dates (times) are currently in:
SSH['dates'] = pd.to_datetime(SSH['time'], format='%Y-%m-%dT%H:%M:%S.%fZ', utc=True)

# Step1: convert to a more friendly format, add 'dates' as new column 
SSH['dates'] = SSH['dates'].dt.strftime('%Y-%m-%d %H:%M')
# Step2: Remove the times in this case as they contain no real information
SSH['dates'] = pd.to_datetime(SSH['dates'])
# Step3: Drop the now unnecessary 'time' column
SSH = SSH.drop(columns=['time'])

# Extract months+ years (integers)
SSH['mo'] = SSH['dates'].dt.month
SSH['yr'] = SSH['dates'].dt.year

# Re-order columns so that 'dates' is first, followed by 'mo' (month), and then 'zos' (SSH values)
SSH = SSH[['dates', 'zos']]

# Combine vars in new df:
new_df = pd.DataFrame({'Date':SSH['dates'], 'SSH':SSH['zos'], 'SST':SST['thetao'], 
                       'SSS':SSS['so'], 'Vuo':VEL['uo'], 'Vvo':VEL['vo'], 'MLD':MLD['mlotst']})
print(new_df.head(3))

In [None]:
predictors = ['SSH', 'SSS', 'Vuo', 'Vvo', 'MLD'] # Predictor vars
X_new = new_df[predictors].values     # Features
y_new = new_df['SST'].values          # True SST values
# Check reshaped data (n,n)(n,)
print(X_new.shape, y_new.shape)

<div class="alert alert-info" role="alert" 
     style="font-size: 1.5em; font-weight: bold; padding: 15px; margin: 10px 0; text-align: center; background-color: #d9edf7; border-color: #bce8f1; color: #31708f; border-radius: 8px;">
    
    Importing our saved XGBoost model
<div>

In [None]:
sst_model = xgb.XGBRegressor()              # Create new XGBoost regressor
sst_model.load_model("xgb_sst_model1.json")  # Load the model saved to disk

In [None]:
# Saved XGBoost model: predict 'new' SST
sst_pred = sst_model.predict(X_new)

# Evaluate model performance on new data:
new_r2 = r2_score(y_new, sst_pred)
new_rmse = np.sqrt(mean_squared_error(y_new, sst_pred))

print("New R²:   {:.2f}".format(new_r2))
print("New RMSE: {:.2f}".format(new_rmse))

In [None]:
# Create figure
fig1, ax = plt.subplots(1, 1, figsize=(15, 5))

# Plot True and Predicted SSTs
ax.plot(new_df['Date'], new_df['SST'], linestyle = '-', c = 'blue', linewidth = 1.0, label = 'True SSTs')
ax.plot(new_df['Date'], sst_pred,linestyle = '-.', c = 'orangered', linewidth = 1.5, label = 'Predicted')

# Formatting
ax.grid(True, color = 'silver', linestyle = ':', linewidth = 0.5)
ax.set_xlim([np.nanmin(new_df['Date']), np.nanmax(new_df['Date'])])
ax.set_ylabel('SST °C', fontsize = 10, weight = 'bold')
ax.legend();

# Set the x-axis major locator to every 3 months
ax.xaxis.set_major_locator(mdates.MonthLocator(interval = 1))
# Format the x-axis ticks as 'mm-yy'
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%y'))
# Rotate tick labels for better readability
plt.setp(ax.get_xticklabels(), rotation = 40, ha = 'center')

plt.show()

<div class="alert alert-info" role="alert" 
     style="font-size: 1.1em; padding: 15px; margin: 10px 0; text-align: center; background-color: #d9edf7; border-color: #bce8f1; color: #31708f; border-radius: 8px;">

    
    Well that's pretty disappointing. It looks like our sst model is not capturing the seasonal cycle very well. 
    To be fair though, we didn't train the model on anything date related. Back to the training wheels for XGBoost!
    
    
<div>