In [None]:
import os
import pandas as pd
import numpy as np
import pvlib
import re
import subprocess
import sys

import plotly.express as px
from sklearn.metrics import mean_absolute_error, mean_squared_error,r2_score
from sklearn.preprocessing import MinMaxScaler

module_path = re.sub(r'Notebooks','Python Scripts',os.getcwd())
sys.path.append(module_path)
from performance_helper import *
from ml_helper import *

# from warnings import simplefilter
# from sklearn.exceptions import ConvergenceWarning
# simplefilter("ignore", category=ConvergenceWarning)

from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import GridSearchCV

from sklego.preprocessing import RepeatingBasisFunction
import matplotlib.pyplot as plt

import neptune
import neptune.integrations.sklearn as npt_utils

import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow import keras
from tensorflow.keras import layers
from keras.callbacks import EarlyStopping

from tensorflow.keras import activations

import neptune
from neptune.integrations.tensorflow_keras import NeptuneCallback
from keras.callbacks import TensorBoard

from keras.models import model_from_json

from sklearn.utils import shuffle

from matplotlib import rcParams

In [None]:
datapath = re.sub("Notebooks","Support Files/",os.getcwd())
ml_df = pd.read_csv(datapath + 'ml_features.csv',index_col=0)
ml_df = reshape_ml(test_df, ml_df)

# = if reshaping = #
ml_df = resample_ml(test_df, ml_df, freq = '20s')
ml_df = interpolate(ml_df)
ml_df['Seconds'] = [(time - time.replace(hour=0, minute=0, second=0, microsecond=0)).total_seconds() for time in ml_df.index]
ml_df['Day'] = [d.day for d in ml_df.index]

imputation_dict = {}

In [None]:
for col in df.columns:

    if col != 'Temperature': continue

    test_ml_df = ml_df.copy()

    test_ml_df[col] = df[df.index.isin(ml_df.index)][col]

    X = test_ml_df.drop([col], axis = 1).to_numpy()
    y = test_ml_df[col].to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.fit_transform(X_test)

    lasso = Lasso(alpha=0.1) # range of lasso (0.001 to 10)
    lasso.fit(X_train, y_train)

    selected_indices = np.where(lasso.coef_ != 0)[0]

    if not selected_indices.any(): 
        (print(f'No features meet current criteria for: {col}'))
        continue

    print("Coefficients:", list(zip(lasso.coef_,test_ml_df.drop([col], axis = 1).columns)))

    X_train_selected = X_train[:, selected_indices]
    X_test_selected = X_test[:, selected_indices]

    parameters = {
        "hidden_layer_sizes": (100,100),
        "activation": "tanh",
        "solver": "adam",
        "learning_rate_init": 0.001,
        "max_iter": 300,
        "alpha": 0.0001,
        "beta_1": 0.9,
        "beta_2": 0.999,
        "epsilon": 1e-8
    }

    # == Model == #

    mlp = MLPRegressor(**parameters)

    mlp.fit(X_train_selected, y_train)

    # == Model == #
    
    # == Neptune == #

#     run = neptune.init_run(
#         project="ethanmasters/PV-Solar-MLP",
#         api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIyMWZhYmFiYi0zYWEzLTQ3NTMtYmMyOS1jZjAzYjY0N2EwYjgifQ==",
#         name="MLP-DiffuseIR",
#         tags=["MLPRegressor", "regression", "Temperature"],
#     )

#     run["parameters"] = parameters

#     run["mlp_summary"] = npt_utils.create_regressor_summary(mlp, X_train_selected, X_test_selected, y_train, y_test)

#     run.stop()

    # == Model == #

    print("Selected features: ", list(test_ml_df.drop([col], axis = 1).columns))
    print("Target: ", col)
    print("Number of layers: ", mlp.n_layers_)
    print("Number of outputs: ", mlp.n_outputs_)
    print("Output activation: ", mlp.out_activation_)
    print("Number of iterations:", mlp.n_iter_)
    print("Best loss: ", mlp.best_loss_)
    print("Current loss: ", mlp.loss_)
    print("Number of training samples seen: ", mlp.t_)

    print("\nLoss Curve: ")

    px.line(x = range(mlp.n_iter_), y = mlp.loss_curve_, title = "Loss Curve").show()

    score = mlp.score(X_test_selected, y_test)
    print("\nTest score:", score)

    if test_gaps:
        imputation_values = scaler.fit_transform(test_ml_df.drop([col], axis = 1).iloc[test_gaps[col],:].to_numpy()[:, selected_indices])
        prediction = mlp.predict(imputation_values)

        print(prediction)

        df.iloc[test_gaps[col],:][col] = prediction

    col_indx += 1

    print("\nPerumation Importance: ")

    feature_selection(test_ml_df.drop([col], axis = 1).iloc[:, selected_indices], mlp, X_train_selected, y_train)

    multilinear_feature_selection(test_ml_df.drop([col], axis = 1).iloc[:, selected_indices], X_test_selected)


In [None]:
# == All Data == #
datapath = re.sub(r'Notebooks|Python Scripts','Support Files',os.getcwd())

# == Load Irradiance Data == #
target_df = pd.read_csv(datapath + '/Irradiance.csv',index_col=0)
target_df.index = pd.to_datetime(target_df.index)

# == Load ML Data == #
training_df = pd.read_csv(datapath + '/meteo_data_cleaned.csv',index_col=0)
training_df.index = pd.to_datetime(training_df.index)

target_df = target_df[(target_df.index.year != 2022) & (target_df.index.month != 7)]
training_df = training_df[(training_df.index.year != 2022) & (training_df.index.month != 7)]
training_df = training_df[training_df.index.isin(target_df.index)]

In [None]:
print(training_df)
print(target_df)
tf.random.set_seed(42)

In [None]:
for col in target_df.columns:
    
    if col != 'DirectIR':
        continue
    
    df = target_df[[col]].dropna()

    test_ml_df = training_df[training_df.index.isin(df.index)].copy()
    
    test_ml_df[col] = df[df.index.isin(test_ml_df.index)][col]
    
    test_ml_df = shuffle(test_ml_df)
    
    test_ml_df = test_ml_df.drop(['Wind Direction (°) (10 m)'],axis=1)

    X = test_ml_df.drop([col], axis = 1).to_numpy()
    y = test_ml_df[col].to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.fit_transform(X_test)

    # == Activation Function == #
#     activation = activations.relu
#     activation = activations.tanh
#     activation = activations.selu
#     activation = activations.elu
#     activation = activations.sigmoid
#     activation = activations.softmax
#     activation = activations.softplus
#     activation = activations.softsign
    # == Activation Function == #
    
    # == Optomization Parameters = #
    hidden_layer_sizes = (20,10)
    learning_rate_ = 0.001
    alpha = 0.0001
    beta_1_ = 0.9
    beta_2_ = 0.999
    epsilon_ = 1e-7
    # == Optomization Parameters = #
    
    # == Solver == #
#     solver = tf.keras.optimizers.Adam(learning_rate=learning_rate_, beta_1 = beta_1_, beta_2= beta_2_,
#                                                     epsilon = epsilon_, amsgrad = True, name = "Adam")
#     solver = tf.keras.optimizers.Adam(learning_rate=learning_rate_, name = "Adam")
#     solver = tf.keras.optimizers.experimental.AdamW(learning_rate=learning_rate_, name = 'AdamW')
#     solver = tf.keras.optimizers.Adamax()
#     solver = tf.keras.optimizers.SGD() # horrible
    solver = tf.keras.optimizers.RMSprop()
    # == Solver == #
    
    # == Model == #
    DirectIR_model = keras.models.Sequential([
        keras.layers.Input(shape = [len(test_ml_df.drop([col], axis = 1).columns),],
                           name='DirectIR-MLP'),
        
#         keras.optimizers.Nadam(),
#         keras.layers.ReLU(name = 'Activation-Layer-1'),
        
        keras.layers.Dense(hidden_layer_sizes[0],
                           kernel_initializer=tf.keras.initializers.HeNormal(),
                           activation = 'relu',
                           name = 'Hidden-Layer-1'),
        
#         keras.layers.ReLU(name = 'Activation-Layer-2'),
#         keras.optimizers.Nadam(),
            
        keras.layers.Dense(hidden_layer_sizes[1],
                           kernel_initializer=tf.keras.initializers.HeNormal(),
                           activation = 'relu',
                           name = 'Hidden-Layer-2'),
        
        keras.layers.Dense(1, activation='linear', name = "Output-Layer")
    ])
    DirectIR_model.compile(optimizer=solver,
                  loss='mean_squared_error',
                  metrics=['mean_squared_error'])
    # == Model == #
    
    # == callbacks == #
    overfitting = EarlyStopping(monitor = 'loss', min_delta = 0, patience = 5, restore_best_weights=True)
    learning_rate = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-3 * 10 ** (epoch / 20))
    logdir = "logs/fit/" + pd.Timestamp.now().strftime("%Y%m%d-%H%M%S")
#     tf.debugging.experimental.enable_dump_debug_info(logdir, tensor_debug_mode="FULL_HEALTH", circular_buffer_size=-1)
    tb = TensorBoard(log_dir = logdir, histogram_freq = 1)
    # == callbacks == #
    
    # == Fit == #
    DirectIR_history = DirectIR_model.fit(X_train,
                        y_train,
                        epochs = 25,
#                         batch_size = 850,
                        validation_data = (X_test, y_test),
                        callbacks = [overfitting,
                                     tb]
                       )
    # == Fit == #
    
    # == Neptune Run == #
#     run = neptune.init_run(
#         project="ethanmasters/PV-Solar-MLP",
#         api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIyMWZhYmFiYi0zYWEzLTQ3NTMtYmMyOS1jZjAzYjY0N2EwYjgifQ==",
#         name="MLP-DiffuseIR",
#         tags=["MLPRegressor", "regression", "DirectIR"],
#         )

#     DirectIR_history = DirectIR_model.fit(X_train, 
#                         y_train, 
#                         epochs = 50, 
# #                         batch_size = 50,
#                         validation_data = (X_test, y_test),
#                         callbacks = [NeptuneCallback(run = run, log_model_diagram = True),
#                                     overfitting,
#                                     tb]
#                        )
    # == Neptune Run == #
    
    # == serialize model == #
    DirectIR_model.save(f"{col}_model")


    DirectIR_model.summary()

In [None]:

tf.random.set_seed(42)

for col in target_df.columns:
    
    if col != 'Temperature':
        continue
    
    df = target_df[[col]].dropna()

    test_ml_df = training_df[training_df.index.isin(df.index)].copy()
    
    test_ml_df[col] = df[df.index.isin(test_ml_df.index)][col]
    
    test_ml_df = shuffle(test_ml_df)
    
    test_ml_df = test_ml_df.drop(['Wind Speed (km/h) (10 m)'],axis=1)

    X = test_ml_df.drop([col], axis = 1).to_numpy()
    y = test_ml_df[col].to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.fit_transform(X_test)

    # == Activation Function == #
#     activation = activations.relu
#     activation = activations.tanh
#     activation = activations.selu
#     activation = activations.elu
#     activation = activations.sigmoid
#     activation = activations.softmax
#     activation = activations.softplus
#     activation = activations.softsign
    # == Activation Function == #
    
    # == Optomization Parameters = #
    hidden_layer_sizes = (50,25,25)
    learning_rate_ = 0.001
    alpha = 0.0001
    beta_1_ = 0.9
    beta_2_ = 0.999
    epsilon_ = 1e-7
    # == Optomization Parameters = #
    
    # == Solver == #
#     solver = tf.keras.optimizers.Adam(learning_rate=learning_rate_, beta_1 = beta_1_, beta_2= beta_2_,
#                                                     epsilon = epsilon_, amsgrad = True, name = "Adam")
    solver = tf.keras.optimizers.Adam(learning_rate=learning_rate_, name = "Adam")
#     solver = tf.keras.optimizers.experimental.AdamW(learning_rate=learning_rate_, name = 'AdamW')
#     solver = tf.keras.optimizers.Adamax()
#     solver = tf.keras.optimizers.SGD() # horrible
#     solver = tf.keras.optimizers.RMSprop()
    # == Solver == #
    
    # == Model == #
    Temperature_model = keras.models.Sequential([
        keras.layers.Input(shape = [len(test_ml_df.drop([col], axis = 1).columns),],
                           name='Temperature-MLP'),
        
#         keras.optimizers.Nadam(),
#         keras.layers.ReLU(name = 'Activation-Layer-1'),
        
        keras.layers.Dense(hidden_layer_sizes[0],
                           kernel_initializer=tf.keras.initializers.HeNormal(),
                           activation = 'relu',
                           name = 'Hidden-Layer-1'),
        
#         keras.layers.ReLU(name = 'Activation-Layer-2'),
#         keras.optimizers.Nadam(),
            
        keras.layers.Dense(hidden_layer_sizes[1],
                           kernel_initializer=tf.keras.initializers.HeNormal(),
                           activation = 'relu',
                           name = 'Hidden-Layer-2'),
        
        keras.layers.Dense(hidden_layer_sizes[1],
                           kernel_initializer=tf.keras.initializers.HeNormal(),
                           activation = 'relu',
                           name = 'Hidden-Layer-3'),
        
        keras.layers.Dense(1, activation='linear', name = "Output-Layer")
    ])
    Temperature_model.compile(optimizer=solver,
                  loss='mean_squared_error',
                  metrics=['mean_squared_error'])
    # == Model == #
    
    # == callbacks == #
    overfitting = EarlyStopping(monitor = 'loss', min_delta = 0, patience = 5, restore_best_weights=True)
    learning_rate = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-3 * 10 ** (epoch / 20))
    logdir = "logs/fit/" + pd.Timestamp.now().strftime("%Y%m%d-%H%M%S")
    tb = TensorBoard(log_dir = logdir, histogram_freq = 1)
    # == callbacks == #
    
    # == Fit == #
    Temperature_history = Temperature_model.fit(X_train,
                        y_train,
                        epochs = 25,
#                         batch_size = 850,
                        validation_data = (X_test, y_test),
                        callbacks = [overfitting,
                                     tb]
                       )
    # == Fit == #
    
    # == Neptune Fit == #
#     run = neptune.init_run(
#         project="ethanmasters/PV-Solar-MLP",
#         api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIyMWZhYmFiYi0zYWEzLTQ3NTMtYmMyOS1jZjAzYjY0N2EwYjgifQ==",
#         name="MLP-DiffuseIR",
#         tags=["MLPRegressor", "regression", "Temperature"],
#         )
#     Temperature_history = Temperature_model.fit(X_train, 
#                         y_train, 
#                         epochs = 25, 
# #                         batch_size = 50,
#                         validation_data = (X_test, y_test),
#                         callbacks = [NeptuneCallback(run = run, log_model_diagram = True),
#                                     overfitting,
#                                     tb]
#                        )
    # == Neptune Fit == #
    
    # == serialize model == #
    Temperature_model.save(f"{col}_model")

    Temperature_model.summary()

In [None]:

# == WindSpeed == #

for col in target_df.columns:
    
    if col != 'WindSpeed':
        continue
    
    df = target_df[[col]].dropna()

    test_ml_df = training_df[training_df.index.isin(df.index)].copy()
    
    test_ml_df[col] = df[df.index.isin(test_ml_df.index)][col]
    
    test_ml_df = shuffle(test_ml_df)
    
    test_ml_df = test_ml_df.drop(['Temperature (°C) (2 m elevation corrected)', 'Wind Direction (°) (10 m)',
       'Cloud Cover Total (%) (sfc)'],axis=1)

    X = test_ml_df.drop([col], axis = 1).to_numpy()
    y = test_ml_df[col].to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.fit_transform(X_test)

    # == Activation Function == #
#     activation = activations.relu
#     activation = activations.tanh
#     activation = activations.selu
#     activation = activations.elu
#     activation = activations.sigmoid
#     activation = activations.softmax
#     activation = activations.softplus
#     activation = activations.softsign
# == Activation Function == #
    
    # == Optomization Parameters = #
    hidden_layer_sizes = (20,10)
    learning_rate_ = 0.001
    alpha = 0.0001
    beta_1_ = 0.9
    beta_2_ = 0.999
    epsilon_ = 1e-7
    # == Optomization Parameters = #
    
    
    # == Solver == #
#     solver = tf.keras.optimizers.Adam(learning_rate=learning_rate_, beta_1 = beta_1_, beta_2= beta_2_,
#                                                     epsilon = epsilon_, amsgrad = True, name = "Adam")
#     solver = tf.keras.optimizers.Adam(learning_rate=learning_rate_, name = "Adam")
#     solver = tf.keras.optimizers.experimental.AdamW(learning_rate=learning_rate_, name = 'AdamW')
#     solver = tf.keras.optimizers.Adamax()
#     solver = tf.keras.optimizers.SGD() # horrible
    solver = tf.keras.optimizers.RMSprop()
    # == Solver == #
    
    # == Model == #
    model = keras.models.Sequential([
        keras.layers.Input(shape = [len(test_ml_df.drop([col], axis = 1).columns),],
                           name='WindSpeed-MLP'),
        
#         keras.optimizers.Nadam(),
#         keras.layers.ReLU(name = 'Activation-Layer-1'),
        
        keras.layers.Dense(hidden_layer_sizes[0],
                           kernel_initializer=tf.keras.initializers.HeNormal(),
                           activation = 'relu',
                           name = 'Hidden-Layer-1'),
        
#         keras.layers.ReLU(name = 'Activation-Layer-2'),
#         keras.optimizers.Nadam(),
            
        keras.layers.Dense(hidden_layer_sizes[1],
                           kernel_initializer=tf.keras.initializers.HeNormal(),
                           activation = 'relu',
                           name = 'Hidden-Layer-2'),
        
        keras.layers.Dense(1, activation='linear', name = "Output-Layer")
    ])

    model.compile(optimizer=solver,
                      loss='mean_absolute_error',
                      metrics=['mean_absolute_error'])
    # == Model == #
    
    # == callbacks == #
    overfitting = EarlyStopping(monitor = 'loss', min_delta = 0, patience = 5, restore_best_weights=True)
    learning_rate = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-3 * 10 ** (epoch / 20))
    logdir = "logs/fit/" + pd.Timestamp.now().strftime("%Y%m%d-%H%M%S")
    tb = TensorBoard(log_dir = logdir, histogram_freq = 1)
    # == callbacks == #
    
    # == Local Run == #
    history = model.fit(X_train,
                        y_train,
                        epochs = 25,
#                         batch_size = 850,
                        validation_data = (X_test, y_test),
                        callbacks = [overfitting,
                                     tb]
                       )
    # == Local Run == #
    
    # == Neptune Run == # 
#     run = neptune.init_run(
#         project="ethanmasters/PV-Solar-MLP",
#         api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIyMWZhYmFiYi0zYWEzLTQ3NTMtYmMyOS1jZjAzYjY0N2EwYjgifQ==",
#         name="MLP-DiffuseIR",
#         tags=["MLPRegressor", "regression", "WindSpeed"],
#         )

#     history = model.fit(X_train, 
#                         y_train, 
#                         epochs = 10, 
# #                         batch_size = 50,
#                         validation_data = (X_test, y_test),
#                         callbacks = [NeptuneCallback(run = run, log_model_diagram = True),
#                                     overfitting,
#                                     tb]
#                        )
    # == Neptune Run == # 
    
    # == serialize model == #
    model.save(f"{col}_model")
    # == serialize model == #

    model.summary()

In [None]:

# ======= Diffuse Model ======= #

for col in target_df.columns:
    
    if col != 'DiffuseIR':
        continue
    
    df = target_df[[col]].dropna()

    test_ml_df = training_df[training_df.index.isin(df.index)].copy()
    
    test_ml_df[col] = df[df.index.isin(test_ml_df.index)][col]
    
    test_ml_df = shuffle(test_ml_df)
    
    test_ml_df = test_ml_df.drop(['Wind Direction (°) (10 m)'],axis=1)

    X = test_ml_df.drop([col], axis = 1).to_numpy()
    y = test_ml_df[col].to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.fit_transform(X_test)

    # == Activation Function == #
#     activation = activations.relu
#     activation = activations.tanh
#     activation = activations.selu
#     activation = activations.elu
#     activation = activations.sigmoid
#     activation = activations.softmax
#     activation = activations.softplus
#     activation = activations.softsign
    # == Activation Function == #
    
    # == Optomization Parameters = #
    hidden_layer_sizes = (20,10)
    learning_rate_ = 0.001
    alpha = 0.0001
    beta_1_ = 0.9
    beta_2_ = 0.999
    epsilon_ = 1e-7
    # == Optomization Parameters = #
    
    # == Solver == #
#     solver = tf.keras.optimizers.Adam(learning_rate=learning_rate_, beta_1 = beta_1_, beta_2= beta_2_,
#                                                     epsilon = epsilon_, amsgrad = True, name = "Adam")
#     solver = tf.keras.optimizers.Adam(learning_rate=learning_rate_, name = "Adam")
#     solver = tf.keras.optimizers.experimental.AdamW(learning_rate=learning_rate_, name = 'AdamW')
#     solver = tf.keras.optimizers.Adamax()
#     solver = tf.keras.optimizers.SGD() # horrible
    solver = tf.keras.optimizers.RMSprop()
    # == Solver == #
    
    # == Model == #
    DiffuseIR_model = keras.models.Sequential([
        keras.layers.Input(shape = [len(test_ml_df.drop([col], axis = 1).columns),],
                           name='DiffuseIR-MLP'),
        
#         keras.optimizers.Nadam(),
#         keras.layers.ReLU(name = 'Activation-Layer-1'),
        
        keras.layers.Dense(hidden_layer_sizes[0],
                           kernel_initializer=tf.keras.initializers.HeNormal(),
                           activation = 'relu',
                           name = 'Hidden-Layer-1'),
        
#         keras.layers.ReLU(name = 'Activation-Layer-2'),
#         keras.optimizers.Nadam(),
            
        keras.layers.Dense(hidden_layer_sizes[1],
                           kernel_initializer=tf.keras.initializers.HeNormal(),
                           activation = 'relu',
                           name = 'Hidden-Layer-2'),
        
        keras.layers.Dense(1, activation='linear', name = "Output-Layer")
    ])

    DiffuseIR_model.compile(optimizer=solver,
                  loss='mean_squared_error',
                  metrics=['mean_squared_error'])
    # == Model == #
    
    # == callbacks == #
    overfitting = EarlyStopping(monitor = 'loss', min_delta = 0, patience = 5, restore_best_weights=True)
    learning_rate = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-3 * 10 ** (epoch / 20))
    logdir = "logs/fit/" + pd.Timestamp.now().strftime("%Y%m%d-%H%M%S")
    tb = TensorBoard(log_dir = logdir, histogram_freq = 1)
    # == callbacks == #
    
    # == Local Run == #
    DiffuseIR_history = DiffuseIR_model.fit(X_train,
                        y_train,
                        epochs = 25,
#                         batch_size = 850,
                        validation_data = (X_test, y_test),
                        callbacks = [overfitting,
                                     tb]
                       )
    # == Local Run == #
    
    # == Neptune Run == # 
#     run = neptune.init_run(
#         project="ethanmasters/PV-Solar-MLP",
#         api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIyMWZhYmFiYi0zYWEzLTQ3NTMtYmMyOS1jZjAzYjY0N2EwYjgifQ==",
#         name="MLP-DiffuseIR",
#         tags=["MLPRegressor", "regression", "DiffuseIR"],
#         )

#     DiffuseIR_history = DiffuseIR_model.fit(X_train, 
#                         y_train, 
#                         epochs = 10, 
# #                         batch_size = 50,
#                         validation_data = (X_test, y_test),
#                         callbacks = [NeptuneCallback(run = run, log_model_diagram = True),
#                                     overfitting,
#                                     tb]
#                        )
    # == Neptune Run == # 
    
    # == serialize model == #
    DiffuseIR_model.save(f"{col}_model")
    # == serialize model == #

    DiffuseIR_model.summary()

In [None]:
starttime = pd.Timestamp.now()
# ======= Any Model ======= #

df = target_df.dropna()

test_ml_df = training_df[training_df.index.isin(df.index)].copy()

test_ml_df = pd.concat([test_ml_df,df],axis=1,ignore_index=False)

test_ml_df = shuffle(test_ml_df)

X = test_ml_df.drop(df.columns, axis = 1).to_numpy()
y = test_ml_df.drop(training_df.columns, axis = 1).to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)


# == Activation Function == #
#     activation = activations.relu
#     activation = activations.tanh
#     activation = activations.selu
#     activation = activations.elu
#     activation = activations.sigmoid
#     activation = activations.softmax
#     activation = activations.softplus
#     activation = activations.softsign

# == Optomization Parameters = #
hidden_layer_sizes = (300,200,150)
learning_rate_ = 0.0001
alpha = 0.0001
beta_1_ = 0.9
beta_2_ = 0.999
epsilon_ = 1e-7
# == Optomization Parameters = #

# == Solver == #
#     solver = tf.keras.optimizers.Adam(learning_rate=learning_rate_, beta_1 = beta_1_, beta_2= beta_2_,
#                                                     epsilon = epsilon_, amsgrad = True, name = "Adam")
solver = tf.keras.optimizers.Adam(learning_rate=learning_rate_,name = "Adam")
#     solver = tf.keras.optimizers.experimental.AdamW(learning_rate=learning_rate_, name = 'AdamW')
#     solver = tf.keras.optimizers.Adamax()
#     solver = tf.keras.optimizers.SGD() # horrible
# solver = tf.keras.optimizers.RMSprop()

x_shape, y_shape = training_df.shape
# == Solver == #

# == Model == #
multi_model = keras.models.Sequential([
    keras.layers.Input(shape = (y_shape,), name='Temperature-MLP'),

#         keras.layers.ReLU(name = 'Activation-Layer-1'),

    keras.layers.Dense(hidden_layer_sizes[0],
                       kernel_initializer=tf.keras.initializers.HeNormal(),
                       activation = 'relu',
                       name = 'Hidden-Layer-1'),

#         keras.layers.ReLU(name = 'Activation-Layer-2'),
#         keras.optimizers.Nadam(),

    keras.layers.Dense(hidden_layer_sizes[1],
                       kernel_initializer=tf.keras.initializers.HeNormal(),
                       activation = 'relu',
                       name = 'Hidden-Layer-2'),
    
    keras.layers.Dense(hidden_layer_sizes[1],
                       kernel_initializer=tf.keras.initializers.HeNormal(),
                       activation = 'relu',
                       name = 'Hidden-Layer-3'),

    keras.layers.Dense(len(df.columns), name = "Output-Layer")
])

multi_model.compile(optimizer=solver,
              loss='mean_absolute_error',
              metrics=['mean_absolute_error'])
# == Model == #

# == callbacks == #
overfitting = EarlyStopping(monitor = 'loss', min_delta = 0, patience = 20)
learning_rate = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-3 * 10 ** (epoch / 20))
logdir = "logs/fit/" + pd.Timestamp.now().strftime("%Y%m%d-%H%M%S")
tb = TensorBoard(log_dir = logdir, histogram_freq = 1)
# == callbacks == #

# == Local Fit == #
# multi_history = multi_model.fit(X_train,
#                     y_train,
#                     epochs = 10,
# #                     batch_size = 3000,
#                     validation_data = (X_test, y_test),
#                     callbacks = [overfitting,
#                                  tb]
#                    )

# == Neptune Fit == #
run = neptune.init_run(
    project="ethanmasters/PV-Solar-MLP",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIyMWZhYmFiYi0zYWEzLTQ3NTMtYmMyOS1jZjAzYjY0N2EwYjgifQ==",
    name="MLP-DiffuseIR",
    tags=["MLPRegressor", "regression", "MultiOutput"],
    )
multi_history = multi_model.fit(X_train, 
                    y_train, 
                    epochs = 100, 
#                         batch_size = 50,
                    validation_data = (X_test, y_test),
                    callbacks = [NeptuneCallback(run = run, log_model_diagram = True),
                                overfitting,
                                tb]
                   )
# == Neptune Fit == #

# == serialize model == #
multi_model.save(f"multivariate_mlp_model")

multi_model.summary()

endtime = pd.Timestamp.now()
runtime = endtime - starttime
print("Run Time:", runtime)

In [None]:
history = multi_history.history
model = multi_model1
# col = "Temperature"

score = model.evaluate(X_test, y_test, verbose = 1)
print(score)

model_df = pd.DataFrame(history)
mse_df = model_df[['mean_absolute_error','val_mean_absolute_error']]
mse_df.plot(figsize=(12,6))
plt.grid(True)
# plt.gca().set_ylim(0,1) # set the vertical range to [0-1]
plt.show()


loss_df = model_df[['loss','val_loss']]
loss_df.plot(figsize=(12,6))
plt.grid(True)
# plt.gca().set_ylim(0,1) # set the vertical range to [0-1]
plt.show()


In [None]:
rcParams['figure.figsize'] = (18, 8)
rcParams['axes.spines.top'] = False
rcParams['axes.spines.right'] = False 

plt.plot(
    np.arange(1, 26), 
    history['loss'], 
    label='Loss', lw=3
)
plt.plot(
    np.arange(1, 11), 
    history['val_loss'], 
    label='Value Loss', lw=3
)
# plt.plot(
#     np.arange(1, 21), 
#     history.history['lr'], 
#     label='Learning rate', color='#000', lw=3, linestyle='--'
# )
plt.title('Evaluation metrics', size=20)
plt.xlabel('Epoch', size=14)
plt.legend();
plt.show()


plt.plot(
    np.arange(1, 11), 
    history['mean_squared_error'], 
    label='MSE', lw=3
)
plt.plot(
    np.arange(1, 11), 
    history['val_mean_squared_error'], 
    label='Value MSE', lw=3
)
plt.title('Evaluation metrics', size=20)
plt.xlabel('Epoch', size=14)
plt.legend();

In [None]:
import dalex as dx
X, y = pd.DataFrame(X_test, columns = test_ml_df.columns), y_test
exp = dx.Explainer(model, X, y, label=col)

In [None]:
exp.model_performance()

In [None]:
exp.model_parts().plot()
run["model/performance/model_parts"].upload(exp.model_parts().plot(show=False))

In [None]:
exp.model_profile().plot()
run["model/performance/model_profile"].upload(exp.model_profile().plot(show=False))

In [None]:
exp.model_diagnostics().plot()
run["model/performance/model_diagnostics"].upload(exp.model_diagnostics().plot(show=False))

In [None]:
exp.model_diagnostics().result

In [None]:
# surrogate_model = explainer.model_surrogate(max_vars=4, max_depth=3)
surrogate_model = exp.model_surrogate()
surrogate_model.performance

In [None]:
surrogate_model.plot()
# run["model/performance/surrogate_model"].upload(surrogate_model.plot(show=False))

In [1]:
run.stop()

NameError: name 'run' is not defined