In [None]:
import os
import pandas as pd
import numpy as np
import pvlib
import re
import subprocess
import sys

import plotly.express as px
from sklearn.metrics import mean_absolute_error, mean_squared_error,r2_score
from sklearn.preprocessing import MinMaxScaler

module_path = re.sub(r'Notebooks','Python Scripts',os.getcwd())
sys.path.append(module_path)
from performance_helper import *
from ml_helper import *

# from warnings import simplefilter
# from sklearn.exceptions import ConvergenceWarning
# simplefilter("ignore", category=ConvergenceWarning)

from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import GridSearchCV

from sklego.preprocessing import RepeatingBasisFunction
import matplotlib.pyplot as plt

import neptune
import neptune.integrations.sklearn as npt_utils

import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow import keras
from tensorflow.keras import layers
from keras.callbacks import EarlyStopping

from tensorflow.keras import activations

import neptune
from neptune.integrations.tensorflow_keras import NeptuneCallback
from keras.callbacks import TensorBoard

from keras.models import model_from_json

from sklearn.utils import shuffle

from matplotlib import rcParams

In [None]:
# == All Data == #
datapath = re.sub(r'Notebooks|Python Scripts','Support Files',os.getcwd())

# == Load Irradiance Data == #
target_df = pd.read_csv(datapath + '/Irradiance.csv',index_col=0)
target_df.index = pd.to_datetime(target_df.index)

# == Load ML Data == #
training_df = pd.read_csv(datapath + '/meteo_data_cleaned.csv',index_col=0)
training_df.index = pd.to_datetime(training_df.index)

target_df = target_df[(target_df.index.year != 2022) & (target_df.index.month != 7)]
training_df = training_df[(training_df.index.year != 2022) & (training_df.index.month != 7)]
training_df = training_df[training_df.index.isin(target_df.index)]

In [None]:
print(training_df)
print(target_df)
tf.random.set_seed(42)

In [None]:
starttime = pd.Timestamp.now()
# ======= Any Model ======= #

df = target_df.dropna()

test_ml_df = training_df[training_df.index.isin(df.index)].copy()

test_ml_df = pd.concat([test_ml_df,df],axis=1,ignore_index=False)

test_ml_df = shuffle(test_ml_df)

X = test_ml_df.drop(df.columns, axis = 1).to_numpy()
y = test_ml_df.drop(training_df.columns, axis = 1).to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)


# == Activation Function == #
#     activation = activations.relu
#     activation = activations.tanh
#     activation = activations.selu
#     activation = activations.elu
#     activation = activations.sigmoid
#     activation = activations.softmax
#     activation = activations.softplus
#     activation = activations.softsign

# == Optomization Parameters = #
hidden_layer_sizes = (300,200,150)
learning_rate_ = 0.0001
alpha = 0.0001
beta_1_ = 0.9
beta_2_ = 0.999
epsilon_ = 1e-7
# == Optomization Parameters = #

# == Solver == #
#     solver = tf.keras.optimizers.Adam(learning_rate=learning_rate_, beta_1 = beta_1_, beta_2= beta_2_,
#                                                     epsilon = epsilon_, amsgrad = True, name = "Adam")
# solver = tf.keras.optimizers.Adam(learning_rate=learning_rate_,name = "Adam")
#     solver = tf.keras.optimizers.experimental.AdamW(learning_rate=learning_rate_, name = 'AdamW')
#     solver = tf.keras.optimizers.Adamax()
#     solver = tf.keras.optimizers.SGD() # horrible
solver = tf.keras.optimizers.RMSprop()

x_shape, y_shape = training_df.shape
# == Solver == #

# == Model == #
multi_model = keras.models.Sequential([
    keras.layers.Input(shape = (y_shape,), name='MultiOutput-MLP'),

#         keras.layers.ReLU(name = 'Activation-Layer-1'),

    keras.layers.Dense(hidden_layer_sizes[0],
                       kernel_initializer=tf.keras.initializers.HeNormal(),
                       activation = 'relu',
                       name = 'Hidden-Layer-1'),

#         keras.layers.ReLU(name = 'Activation-Layer-2'),
#         keras.optimizers.Nadam(),

    keras.layers.Dense(hidden_layer_sizes[1],
                       kernel_initializer=tf.keras.initializers.HeNormal(),
                       activation = 'relu',
                       name = 'Hidden-Layer-2'),
    
    keras.layers.Dense(hidden_layer_sizes[1],
                       kernel_initializer=tf.keras.initializers.HeNormal(),
                       activation = 'relu',
                       name = 'Hidden-Layer-3'),

    keras.layers.Dense(len(df.columns), name = "Output-Layer")
])

multi_model.compile(optimizer=solver,
              loss='mean_absolute_error',
              metrics=['mean_absolute_error'])
# == Model == #

# == callbacks == #
overfitting = EarlyStopping(monitor = 'loss', min_delta = 0, patience = 20)
learning_rate = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-3 * 10 ** (epoch / 20))
logdir = "logs/fit/" + pd.Timestamp.now().strftime("%Y%m%d-%H%M%S")
tb = TensorBoard(log_dir = logdir, histogram_freq = 1)
# == callbacks == #

# == Local Fit == #
# multi_history = multi_model.fit(X_train,
#                     y_train,
#                     epochs = 10,
# #                     batch_size = 3000,
#                     validation_data = (X_test, y_test),
#                     callbacks = [overfitting,
#                                  tb]
#                    )

# == Neptune Fit == #
run = neptune.init_run(
    project="ethanmasters/PV-Solar-MLP",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIyMWZhYmFiYi0zYWEzLTQ3NTMtYmMyOS1jZjAzYjY0N2EwYjgifQ==",
    name="MLP-DiffuseIR",
    tags=["MLPRegressor", "regression", "MultiOutput"],
    )
multi_history = multi_model.fit(X_train, 
                    y_train, 
                    epochs = 100, 
#                         batch_size = 50,
                    validation_data = (X_test, y_test),
                    callbacks = [NeptuneCallback(run = run, log_model_diagram = True),
                                overfitting,
                                tb]
                   )
# == Neptune Fit == #

# == serialize model == #
multi_model.save(f"multivariate_mlp_model")

multi_model.summary()

endtime = pd.Timestamp.now()
runtime = endtime - starttime
print("Run Time:", runtime)

In [None]:
history = multi_history.history
model = multi_model1

score = model.evaluate(X_test, y_test, verbose = 1)
print(score)

model_df = pd.DataFrame(history)
mse_df = model_df[['mean_absolute_error','val_mean_absolute_error']]
mse_df.plot(figsize=(12,6))
plt.grid(True)
# plt.gca().set_ylim(0,1) # set the vertical range to [0-1]
plt.show()


loss_df = model_df[['loss','val_loss']]
loss_df.plot(figsize=(12,6))
plt.grid(True)
# plt.gca().set_ylim(0,1) # set the vertical range to [0-1]
plt.show()


In [None]:
import dalex as dx
X, y = pd.DataFrame(X_test, columns = test_ml_df.columns), y_test
exp = dx.Explainer(model, X, y, label=col)

In [None]:
exp.model_performance()

In [None]:
exp.model_parts().plot()
run["model/performance/model_parts"].upload(exp.model_parts().plot(show=False))

In [None]:
exp.model_profile().plot()
run["model/performance/model_profile"].upload(exp.model_profile().plot(show=False))

In [None]:
exp.model_diagnostics().plot()
run["model/performance/model_diagnostics"].upload(exp.model_diagnostics().plot(show=False))

In [None]:
exp.model_diagnostics().result

In [None]:
# surrogate_model = explainer.model_surrogate(max_vars=4, max_depth=3)
surrogate_model = exp.model_surrogate()
surrogate_model.performance

In [None]:
surrogate_model.plot()
# run["model/performance/surrogate_model"].upload(surrogate_model.plot(show=False))

In [None]:
run.stop()