In [1]:
# Module Importations
import sklearn
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np

# Print versioning information
print('keras version =', keras.__version__)
print('numpy version =', np.__version__)  
print('sklearn version =', sklearn.__version__)
print('tensorflow version =', tf.__version__)

keras version = 2.4.0
numpy version = 1.19.4
sklearn version = 0.22.2.post1
tensorflow version = 2.4.1


In [2]:
# Custom Module Imports
from Source.data import load_data
from Source.data import split_data
from Source.models import model_evaluation
from Source.models import keras_helpers
from Source.models import tensorboard_helpers

[keras_helpers]Tensorflow version: 2.4.1
[keras_helpers]keras version = 2.4.0


In [3]:
# Constants
TRAIN_MODELS = True

In [4]:
# Load dataset
df_plant1 = load_data.load_pickled_data("df_plant1_feat_eng.pkl")
df_plant2 = load_data.load_pickled_data("df_plant2_feat_eng.pkl")

Loading pickled dataframe started ...
Loading pickled dataframe complete ...
Loading pickled dataframe started ...
Loading pickled dataframe complete ...


In [5]:
df_plant1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 71808 entries, 0 to 71806
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   DATE_TIME        71808 non-null  datetime64[ns]
 1   PLANT_ID         71808 non-null  object        
 2   SOURCE_KEY       71808 non-null  object        
 3   DC_POWER         71808 non-null  float64       
 4   DAILY_YIELD      71808 non-null  float64       
 5   AMB_TEMP         71808 non-null  float64       
 6   MOD_TEMP         71808 non-null  float64       
 7   IRRADIATION      71808 non-null  float64       
 8   DATE             71808 non-null  object        
 9   TIME_OF_DAY      71808 non-null  object        
 10  HOUR             71808 non-null  int64         
 11  DAY              71808 non-null  int64         
 12  WEEKDAY          71808 non-null  object        
 13  MONTH            71808 non-null  int64         
 14  YEAR             71808 non-null  int64

In [6]:
# Drop unrequired data columns

# Identify columns to drop
cols_to_keep = ['DC_POWER', 'AMB_TEMP', 'MOD_TEMP', 'IRRADIATION', 'TIME_FLOAT']
cols_to_drop = []

for col in df_plant1.columns:
    if col not in cols_to_keep:
        cols_to_drop.append(col)

for df in [df_plant1, df_plant2]:
    df.drop(cols_to_drop, axis = 1, inplace = True)

df_plant2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 71808 entries, 0 to 71807
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   DC_POWER     71808 non-null  float64
 1   AMB_TEMP     71808 non-null  float64
 2   MOD_TEMP     71808 non-null  float64
 3   IRRADIATION  71808 non-null  float64
dtypes: float64(4)
memory usage: 2.7 MB


In [7]:
# Split data into training / evaluation sets
training_set_plant1, evaluation_set_plant1 = split_data.split_train_eval(df_plant1, 0.2)
training_set_plant2, evaluation_set_plant2 = split_data.split_train_eval(df_plant2, 0.2)

Original Data Items: 71808
Training Data Items: 57447
Evaluation Data Items: 14361
Original Data Items: 71808
Training Data Items: 57447
Evaluation Data Items: 14361


In [8]:
# Create DC Power Target datasets

# Modify training sets
dc_power_training_data_pt1 = training_set_plant1.drop('DC_POWER', axis = 1)
dc_label_data_pt1 = training_set_plant1['DC_POWER'].copy()

dc_power_training_data_pt2 = training_set_plant2.drop('DC_POWER', axis = 1)
dc_label_data_pt2 = training_set_plant2['DC_POWER'].copy()

# Modify evaluation sets
dc_evaluation_data_pt1 = evaluation_set_plant1.drop('DC_POWER', axis = 1)
dc_eval_label_data_pt1 = evaluation_set_plant1['DC_POWER'].copy()

dc_evaluation_data_pt2 = evaluation_set_plant2.drop('DC_POWER', axis = 1)
dc_eval_label_data_pt2 = evaluation_set_plant2['DC_POWER'].copy()

In [9]:
# Check shape of arrays
print('Plant 1:')
print('Fit Train:', dc_power_training_data_pt1.shape)
print('Fit Label:', dc_label_data_pt1.shape)
print('Eval Train:', dc_evaluation_data_pt1.shape)
print('Eval Label:', dc_eval_label_data_pt1.shape)

print('Plant 2:')
print('Fit Train:', dc_power_training_data_pt2.shape)
print('Fit Label:', dc_label_data_pt2.shape)
print('Eval Train:', dc_evaluation_data_pt2.shape)
print('Eval Label:', dc_eval_label_data_pt2.shape)

Plant 1:
Fit Train: (57447, 3)
Fit Label: (57447,)
Eval Train: (14361, 3)
Eval Label: (14361,)
Plant 2:
Fit Train: (57447, 3)
Fit Label: (57447,)
Eval Train: (14361, 3)
Eval Label: (14361,)


In [10]:
# Normalise Inputs
normalise_inputs = False
normalise_outputs = False

normaliser = MinMaxScaler()

if normalise_inputs == True:

    # Transform training sets
    dc_power_training_data_pt1 = normaliser.fit_transform(dc_power_training_data_pt1)
    dc_power_training_data_pt2 = normaliser.fit_transform(dc_power_training_data_pt2)

    # Transform evaluation sets
    dc_evaluation_data_pt1 = normaliser.fit_transform(dc_evaluation_data_pt1)
    dc_evaluation_data_pt2 = normaliser.fit_transform(dc_evaluation_data_pt2)

if normalise_outputs == True:

    # Transform training sets
    dc_label_data_pt1 = normaliser.fit_transform(dc_label_data_pt1)
    dc_label_data_pt2 = normaliser.fit_transform(dc_label_data_pt2)

    # Transform evaluation sets
    dc_eval_label_data_pt1 = normaliser.fit_transform(dc_eval_label_data_pt1)
    dc_eval_label_data_pt2 = normaliser.fit_transform(dc_eval_label_data_pt2)

In [11]:
# Check values after normalisation
dc_power_training_data_pt1

Unnamed: 0,AMB_TEMP,MOD_TEMP,IRRADIATION
14143,30.709979,37.024858,0.301570
57612,21.928052,21.149878,0.024557
2193,21.912934,20.559299,0.000000
45323,27.553448,47.849731,0.684174
52973,21.649394,19.301268,0.000000
...,...,...,...
28390,29.480094,54.250505,0.818236
66023,21.658166,20.840604,0.033310
58579,26.332082,27.795502,0.097186
18920,25.133763,22.681325,0.000000


In [12]:
# Create train and test arrays (plant 1)
X_train_pt1, X_test_pt1, y_train_pt1, y_test_pt1 = train_test_split(dc_power_training_data_pt1, dc_label_data_pt1, test_size = 0.2, random_state = 0)

print(X_train_pt1)
print(y_train_pt1)

        AMB_TEMP   MOD_TEMP  IRRADIATION
16030  31.882248  54.657647     0.760547
4052   25.053473  22.286041     0.000000
71148  24.390767  29.473142     0.118630
71233  24.196167  26.664971     0.104762
24592  32.395532  42.827372     0.319141
...          ...        ...          ...
14020  31.933858  46.442515     0.355913
50198  26.388254  25.753438     0.021628
65767  22.101636  21.011483     0.000000
50797  22.663851  20.767582     0.000000
12524  24.236110  21.519643     0.000000

[45957 rows x 3 columns]
16030    936.842857
4052       0.000000
71148    178.350000
71233    154.514286
24592    414.100000
            ...    
14020    560.000000
50198     31.471429
65767      0.000000
50797      0.000000
12524      0.000000
Name: DC_POWER, Length: 45957, dtype: float64


In [13]:
# Initial MLP (Target - DC Power, Plant 1)

if TRAIN_MODELS == True:
    # Clear existing models
    keras.backend.clear_session()

    # Build model
    model = keras_helpers.build_multilayer_perceptron()

    # Name model
    model_type = "MLP_DC_Plant1"
    model_id = keras_helpers.name_model(model_type)
    filepath_full = keras_helpers.make_save_string(model_id)

    # Set save and earlystop callbacks
    earlystop_cb = keras.callbacks.EarlyStopping(patience = 5)
    checkpoint_cb = keras.callbacks.ModelCheckpoint(filepath = filepath_full, save_best_only = True)

    # Train model
    model.fit(X_train_pt1, y_train_pt1, epochs = 20, validation_data = (X_test_pt1, y_test_pt1), callbacks =[checkpoint_cb, earlystop_cb])

Building Model ...
Hidden Layers: 2, Neurons: 6, LR: 0.001
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [14]:
# Evaluate MLP Model (DC Power, Plant 1)

if TRAIN_MODELS == False:
    filepath_full = r'C:\Developer\solar-power-generation-project\Models\WJ_MLP_DC_2021_01_13-16_10_50.h5'

# Load model
model = keras.models.load_model(filepath_full)

# Summarise model
model.summary()

# Make predictions
dc_pred_eval = model.predict(dc_evaluation_data_pt1)

# Determine model prediction stats
model_name = "MLP_DC"
model_evaluation.evaluate_model(model_name, dc_eval_label_data_pt1, dc_pred_eval)
rmse, mae, r2 = model_evaluation.return_model_evaluation_stats(dc_eval_label_data_pt1, dc_pred_eval)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 6)                 24        
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 42        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 7         
Total params: 73
Trainable params: 73
Non-trainable params: 0
_________________________________________________________________
MLP_DC rmse (Eval): 76.74447129784127
MLP_DC mae (Eval): 38.42543503639796
MLP_DC r2 (Eval): 0.9630481143508067


In [15]:
# Setup tensorboard for logging 
x = tensorboard_helpers.get_run_logdir()

# Print tensorboard directory
print(x)

c:\Developer\solar-power-generation-project\Models\TensorBoard\run_2021_06_08-10_58_32


In [16]:
# Optimised MLP (Target - DC Power, Plant 1)

if TRAIN_MODELS == True:
    
    # Clear existing models
    keras.backend.clear_session()

    # Establish parameter distribution for tuning
    param_distribs = {
        "n_hidden":[12],
        "n_neurons": np.arange(1, 100),
        "learning_rate": [1e-1, 1e-2, 1e-3, 1e-4],
    }

    # Build model
    wrapped_model = keras_helpers.wrap_model()

    # Initialise random search
    rnd_search_cv = RandomizedSearchCV(wrapped_model, param_distribs, n_iter = 10, cv = 3)

    # Name model
    model_type = "MLP_Opt_DC_Pt1"
    model_id = keras_helpers.name_model(model_type)
    filepath_full = keras_helpers.make_save_string(model_id)

    # Set save and earlystop callbacks
    earlystop_cb = keras.callbacks.EarlyStopping(patience = 3)
    checkpoint_cb = keras.callbacks.ModelCheckpoint(filepath = filepath_full, save_best_only = True)

    # Set TensorBoard callback for logging
    tb_logdir = tensorboard_helpers.get_run_logdir()
    tensorboard_cb = keras.callbacks.TensorBoard(tb_logdir)

    # Train model
    rnd_search_cv.fit(X_train_pt1, y_train_pt1, epochs = 100, validation_data = (X_test_pt1, y_test_pt1), callbacks = [checkpoint_cb, earlystop_cb, tensorboard_cb])

NameError: name 'X_train' is not defined

In [15]:
# Evaluate MLP Model (DC Power, Plant 1)

if TRAIN_MODELS == False:
    filepath_full = r'C:\Developer\solar-power-generation-project\Models\WJ_MLP_Opt_DC_2021_01_13-15_30_27.h5'

wrapped_model = keras.models.load_model(filepath_full)

# Summarise model
wrapped_model.summary()

# Make predictions
dc_pred_eval = wrapped_model.predict(dc_evaluation_data)

model_name = "MLP_Opt_DC_pt1"
model_evaluation.evaluate_model(model_name, dc_eval_label_data_pt1, dc_pred_eval)
rmse, mae, r2 = model_evaluation.return_model_evaluation_stats(dc_eval_label_data_pt1, dc_pred_eval)

Model: "sequential_25"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_169 (Dense)            (None, 40)                280       
_________________________________________________________________
dense_170 (Dense)            (None, 40)                1640      
_________________________________________________________________
dense_171 (Dense)            (None, 40)                1640      
_________________________________________________________________
dense_172 (Dense)            (None, 40)                1640      
_________________________________________________________________
dense_173 (Dense)            (None, 40)                1640      
_________________________________________________________________
dense_174 (Dense)            (None, 40)                1640      
_________________________________________________________________
dense_175 (Dense)            (None, 1)               