In [None]:
from pathlib import Path
import glob

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
import xarray as xr
from keras.layers import Dense
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
model_df = pd.read_csv("../data/processed/model_inputs.csv")

In [None]:
list(model_df)

In [None]:
input_df = model_df[
    [
        "lat_2",
        "lon_2",
        "time",
        "aet",
        "def",
        "pdsi",
        "pet",
        "pr",
        "srad",
        "ro",
        "soil",
        "swe",
        "precip",
        "lwe_thickness",
    ]
]
input_df["precip"] = input_df["precip"].fillna(0)

In [None]:
input_df = input_df.dropna().reset_index(drop=True)

In [None]:
input_df

In [None]:
X_df = input_df.iloc[:, 0:13]

In [None]:
y_df = input_df.iloc[:, -1]

In [None]:
time_dict = dict(zip(X_df["time"].unique().tolist(), range(0, X_df.time.nunique())))

In [None]:
X_df["time"] = X_df.time.map(time_dict)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_df.to_numpy(), y_df.to_numpy(), test_size=0.2
)

In [None]:
scaler = StandardScaler()

In [None]:
X_train_scaled = scaler.fit_transform(X_train)

In [None]:
X_test_scaled = scaler.transform(X_test)

In [None]:
n_features = X_train.shape[1]

In [None]:
# Setting up error metrics
rmse = tf.keras.metrics.RootMeanSquaredError(name="root_mean_squared_error", dtype=None)

In [None]:
from sklearn.neural_network import MLPRegressor

In [None]:
model = MLPRegressor(hidden_layer_sizes=(24, 18, 12, 6,4,), batch_size=len(X_df[X_df.time == 0]), solver="lbfgs", alpha=0.001)

In [None]:
model.fit(X_train_scaled, y_train)

In [None]:
error = model.evaluate(X_train_scaled, y_train, verbose=0)

In [None]:
error

In [None]:
X_df[X_df.time == 1]

In [None]:
y_pred = model.predict(X_test_scaled)

In [None]:
from sklearn import metrics

print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

In [None]:
metrics.r2_score(y_test, y_pred)

In [None]:
import pickle
filename = '../models/mlp_model.sav'

In [None]:
pickle.dump(model, open(filename, 'wb')) 

In [None]:
model = pickle.load(open(filename, "rb"))

In [None]:
time_dict

In [None]:
def predict_ts(ts_file):
    ts_df = pd.read_parquet(ts_file)
    ts_df["precip"] = ts_df["precip"].fillna(0)
    ts_df["time"] = ts_df.time.dt.strftime("%Y-%m-%d").map(time_dict)
    ts_df = ts_df.dropna().reset_index(drop=True)
    if len(ts_df) > 0:
        ts_test_x = ts_df.to_numpy()
        ts_step_scaled = scaler.transform(ts_test_x)
        ts_pred = model.predict(ts_step_scaled)
        output_df = pd.DataFrame(ts_pred, columns=["lwe_thickness"])
        final_ts_df = pd.concat([ts_df[["lat", "lon", "time"]], output_df], axis=1)
        final_ts_df["time"] = final_ts_df["time"].map(
            dict(zip(time_dict.values(), time_dict.keys()))
        )
        final_ts_df["time"] = pd.to_datetime(final_ts_df.time)
        ds = final_ts_df.groupby(["lat", "lon", "time"]).mean().to_xarray()
        return ds

In [None]:
xr.concat(
    [
        predict_ts(_file)
        for _file in sorted(glob.glob("../data/processed/target_grid/*")) if Path(_file).name in time_dict
    ],
    dim="time",
).to_netcdf("../data/processed/mlp_output.nc")

In [None]:
predicted_grace = xr.open_dataset("../data/processed/mlp_output.nc")

In [None]:
actual_grace = xr.open_dataset("../data/processed/grace.nc")

In [None]:
actual_grace

In [None]:
predicted_grace

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 2, figsize=(15, 10))
predicted_grace.lwe_thickness.sel(time="2008-01-01").plot(ax=ax[1])
actual_grace.lwe_thickness.sel(time="2008-01-01").plot(ax=ax[0])
# plt.savefig("../reports/figures/actual_vs_dl.png")

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 2, figsize=(15, 10))
predicted_grace.lwe_thickness.sel(time="2008-01-01").plot(ax=ax[1])
actual_grace.lwe_thickness.sel(time="2008-01-01").plot(ax=ax[0])
plt.savefig("../reports/figures/actual_vs_mlp.png")

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1, figsize=(25, 10))
predicted_grace.lwe_thickness.mean(["lat", "lon"]).plot(ax=ax, label="Predicted")
actual_grace.lwe_thickness.mean(["lat", "lon"]).plot(ax=ax, label="Original")
plt.legend()
# plt.savefig("mean.png")