In [13]:
# Module Importations
import sklearn
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
import keras
import numpy as np

# Print versioning information
print(sklearn.__version__, np.__version__) 
print('keras.__version__=', keras.__version__)

0.22.2.post1 1.19.4
keras.__version__= 2.3.1


In [14]:
# Custom Module Imports
from Source.data import load_data
from Source.data import split_data
from Source.models import model_evaluation
from Source.models import keras_helpers

In [15]:
# Load dataset
original_dataset_df = load_data.load_pickled_data('full_data_df.pkl')

Loaded pickled dataframe ...


In [16]:
# Data Munging - Convert time of day to float

def convert_time_to_float(time):
    return time.hour / 24.0 + time.minute / (24.0*60.0) + time.second / (24.0*60.0*60.0) + time.microsecond / (24.0*60.0*60.0*1000000.0)

original_dataset_df['TIME_OF_DAY'] = original_dataset_df.apply(lambda row: convert_time_to_float(row['DATE_TIME']), axis = 1)

print(original_dataset_df)

                 DATE_TIME  PLANT_ID       SOURCE_KEY  DC_POWER  AC_POWER  \
0      2020-05-15 00:00:00   4135001  1BY6WEcLGh8j5v7       0.0       0.0   
1      2020-05-15 00:00:00   4135001  1IF53ai7Xc0U56Y       0.0       0.0   
2      2020-05-15 00:00:00   4135001  3PZuoBAID5Wc2HD       0.0       0.0   
3      2020-05-15 00:00:00   4135001  7JYdWkrLSPkdwr4       0.0       0.0   
4      2020-05-15 00:00:00   4135001  McdE0feGgRqW7Ca       0.0       0.0   
...                    ...       ...              ...       ...       ...   
137551 2020-06-17 23:45:00   4135001  uHbuxQJl8lW7ozc       0.0       0.0   
137552 2020-06-17 23:45:00   4135001  wCURE6d3bPkepu2       0.0       0.0   
137553 2020-06-17 23:45:00   4135001  z9Y9gH1T5YWrNuG       0.0       0.0   
137554 2020-06-17 23:45:00   4135001  zBIq5rxdHJRwDNY       0.0       0.0   
137555 2020-06-17 23:45:00   4135001  zVJPv84UY57bAof       0.0       0.0   

        DAILY_YIELD  TOTAL_YIELD CELL_NO  TIME_OF_DAY   AMB_TEMP   MOD_TEMP

In [17]:
# Data Munging - Convert plant to int

def convert_plant_to_int(plant):
    
    if plant == "plant1":
        return 1
    else:
        return 2    

original_dataset_df['PLANT'] = original_dataset_df.apply(lambda row: convert_plant_to_int(row['PLANT']), axis = 1)

print(original_dataset_df)

                 DATE_TIME  PLANT_ID       SOURCE_KEY  DC_POWER  AC_POWER  \
0      2020-05-15 00:00:00   4135001  1BY6WEcLGh8j5v7       0.0       0.0   
1      2020-05-15 00:00:00   4135001  1IF53ai7Xc0U56Y       0.0       0.0   
2      2020-05-15 00:00:00   4135001  3PZuoBAID5Wc2HD       0.0       0.0   
3      2020-05-15 00:00:00   4135001  7JYdWkrLSPkdwr4       0.0       0.0   
4      2020-05-15 00:00:00   4135001  McdE0feGgRqW7Ca       0.0       0.0   
...                    ...       ...              ...       ...       ...   
137551 2020-06-17 23:45:00   4135001  uHbuxQJl8lW7ozc       0.0       0.0   
137552 2020-06-17 23:45:00   4135001  wCURE6d3bPkepu2       0.0       0.0   
137553 2020-06-17 23:45:00   4135001  z9Y9gH1T5YWrNuG       0.0       0.0   
137554 2020-06-17 23:45:00   4135001  zBIq5rxdHJRwDNY       0.0       0.0   
137555 2020-06-17 23:45:00   4135001  zVJPv84UY57bAof       0.0       0.0   

        DAILY_YIELD  TOTAL_YIELD CELL_NO  TIME_OF_DAY   AMB_TEMP   MOD_TEMP

In [18]:
# Split into training / evaluation sets
training_set, evaluation_set = split_data.split_train_eval(original_dataset_df, 0.2)

Original Data Items: 137556
Training Data Items: 110045
Evaluation Data Items: 27511


In [19]:
# Drop unrequired data columns

# Identify columns to drop 
columns_to_drop = ['DATE_TIME', 'PLANT_ID', 'SOURCE_KEY', 'AC_POWER', 'DAILY_YIELD', 'TOTAL_YIELD']

training_set = training_set.drop(columns_to_drop, axis = 1)
evaluation_set = evaluation_set.drop(columns_to_drop, axis = 1)

print(evaluation_set.head(5))

           DC_POWER CELL_NO  TIME_OF_DAY   AMB_TEMP   MOD_TEMP  IRRADIATION  \
23464      0.000000      05     0.041667  23.478941  22.007802     0.000000   
82416      0.000000      12     0.229167  23.216699  21.191993     0.000000   
131200     0.000000      03     0.968750  24.652915  23.913763     0.000000   
120917     0.000000      15     0.093750  24.696277  23.876865     0.000000   
98459   3486.857143      17     0.364583  25.788373  28.674120     0.215449   

        PLANT  
23464       1  
82416       2  
131200      2  
120917      2  
98459       2  


In [20]:
# Create DC Power Target datasets

# Modify training set
dc_power_training_data = training_set.drop('DC_POWER', axis = 1)
dc_label_data = training_set['DC_POWER'].copy()

# Modify evaluation set
dc_evaluation_data = evaluation_set.drop('DC_POWER', axis = 1)
dc_eval_label_data = evaluation_set['DC_POWER'].copy()

In [21]:
# Create train and test arrays
X_train, X_test, y_train, y_test = train_test_split(dc_power_training_data, dc_label_data, test_size = 0.2, random_state = 0)

In [22]:
# Initial MLP (Target - DC Power)

# Clear existing models
keras.backend.clear_session()

# Build model
model = keras_helpers.build_multilayer_perceptron()

# Name model
model_type = "MLP_DC"
model_id = keras_helpers.name_model(model_type)
filepath_full = keras_helpers.make_save_string(model_id)

# Set save and earlystop callbacks
earlystop_cb = keras.callbacks.EarlyStopping(patience = 5)
checkpoint_cb = keras.callbacks.ModelCheckpoint(filepath = filepath_full, save_best_only = True)

# Train model
model.fit(X_train, y_train, epochs = 100, validation_data =(X_test, y_test), callbacks =[checkpoint_cb, earlystop_cb])

Train on 88036 samples, validate on 22009 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100


<keras.callbacks.callbacks.History at 0x23b26554588>

In [23]:
# Evaluate MLP Model

# Make predictions
dc_pred_eval = model.predict(dc_evaluation_data)

model_name = "MLP_DC"
model_evaluation.evaluate_model(model_name, dc_eval_label_data, dc_pred_eval)

MLP_DC rmse (Eval): 4039.649323340588
MLP_DC mae (Eval): 3518.525014834632
MLP_DC r2 (Eval): -2.6381909228323508e-05


In [24]:
# Optimised MLP (Target - DC Power)

# Clear existing models
keras.backend.clear_session()

# Establish parameter distribution for tuning
param_distribs = {
    "n_hidden":[1, 4, 8],
    "n_neurons": np.arange(1, 50),
    "learning_rate": (3e-2, 3e-4)
}

# Build model
wrapped_model = keras_helpers.wrap_model()

# Initialise random search
rnd_search_cv = RandomizedSearchCV(wrapped_model, param_distribs, n_iter = 10, cv = 3)

# Name model
model_type = "MLP_Opt_DC"
model_id = keras_helpers.name_model(model_type)
filepath_full = keras_helpers.make_save_string(model_id)

# Set save and earlystop callbacks
earlystop_cb = keras.callbacks.EarlyStopping(patience = 5)
checkpoint_cb = keras.callbacks.ModelCheckpoint(filepath = filepath_full, save_best_only = True)

# Train model
rnd_search_cv.fit(X_train, y_train, epochs = 100, validation_data =(X_test, y_test), callbacks =[checkpoint_cb, earlystop_cb])

Train on 58690 samples, validate on 22009 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 58691 samples, validate on 22009 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 58691 samples, validate on 22009 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 58690 samples, validate on 22009 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 58691 samples, validate on 22009 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 58691 samples, validate on 22009 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 58690 samples, validate on 22009 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 58691 samples, validate on 22009 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 58691 samples, validate on 22009 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 5

RuntimeError: Cannot clone object <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000023B20508B00>, as the constructor either does not set or modifies parameter n_neurons

In [None]:
# Evaluate MLP Model

# Make predictions
dc_pred_eval = wrapped_model.predict(dc_evaluation_data)

model_name = "MLP_OPT_DC"
model_evaluation.evaluate_model(model_name, dc_eval_label_data, dc_pred_eval)