In [1]:
# Module Importations
import sklearn
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np

# Print versioning information
print('keras version =', keras.__version__)
print('numpy version =', np.__version__)  
print('sklearn version =', sklearn.__version__)
print('tensorflow version =', tf.__version__)

keras version = 2.4.0
numpy version = 1.19.4
sklearn version = 0.22.2.post1
tensorflow version = 2.4.1


In [2]:
# Custom Module Imports
from Source.data import load_data
from Source.data import split_data
from Source.models import model_evaluation
from Source.models import keras_helpers
from Source.models import tensorboard_helpers

[keras_helpers]Tensorflow version: 2.4.1
[keras_helpers]keras version = 2.4.0


In [3]:
# Constants
DC_Power_Range = 13000
TRAIN_MODELS = False

In [4]:
# Load dataset
original_dataset_df = load_data.load_pickled_data('full_data_df.pkl')

Loading pickled dataframe started ...
Loading pickled dataframe complete ...


In [5]:
# Data Munging - Convert time of day to float

def convert_time_to_float(time):
    return time.hour / 24.0 + time.minute / (24.0*60.0) + time.second / (24.0*60.0*60.0) + time.microsecond / (24.0*60.0*60.0*1000000.0)

original_dataset_df['TIME_OF_DAY'] = original_dataset_df.apply(lambda row: convert_time_to_float(row['DATE_TIME']), axis = 1)

print(original_dataset_df)

                 DATE_TIME  PLANT_ID       SOURCE_KEY  DC_POWER  AC_POWER  \
0      2020-05-15 00:00:00   4135001  1BY6WEcLGh8j5v7       0.0       0.0   
1      2020-05-15 00:00:00   4135001  1IF53ai7Xc0U56Y       0.0       0.0   
2      2020-05-15 00:00:00   4135001  3PZuoBAID5Wc2HD       0.0       0.0   
3      2020-05-15 00:00:00   4135001  7JYdWkrLSPkdwr4       0.0       0.0   
4      2020-05-15 00:00:00   4135001  McdE0feGgRqW7Ca       0.0       0.0   
...                    ...       ...              ...       ...       ...   
137551 2020-06-17 23:45:00   4135001  uHbuxQJl8lW7ozc       0.0       0.0   
137552 2020-06-17 23:45:00   4135001  wCURE6d3bPkepu2       0.0       0.0   
137553 2020-06-17 23:45:00   4135001  z9Y9gH1T5YWrNuG       0.0       0.0   
137554 2020-06-17 23:45:00   4135001  zBIq5rxdHJRwDNY       0.0       0.0   
137555 2020-06-17 23:45:00   4135001  zVJPv84UY57bAof       0.0       0.0   

        DAILY_YIELD  TOTAL_YIELD CELL_NO  TIME_OF_DAY   AMB_TEMP   MOD_TEMP

In [6]:
# Data Munging - Convert plant to int

def convert_plant_to_int(plant):
    
    if plant == "plant1":
        return 1
    else:
        return 2    

original_dataset_df['PLANT'] = original_dataset_df.apply(lambda row: convert_plant_to_int(row['PLANT']), axis = 1)

print(original_dataset_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137556 entries, 0 to 137555
Data columns (total 13 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   DATE_TIME    137556 non-null  datetime64[ns]
 1   PLANT_ID     137556 non-null  int64         
 2   SOURCE_KEY   137556 non-null  object        
 3   DC_POWER     137556 non-null  float64       
 4   AC_POWER     137556 non-null  float64       
 5   DAILY_YIELD  137556 non-null  float64       
 6   TOTAL_YIELD  137556 non-null  float64       
 7   CELL_NO      137556 non-null  object        
 8   TIME_OF_DAY  137556 non-null  float64       
 9   AMB_TEMP     137556 non-null  float64       
 10  MOD_TEMP     137556 non-null  float64       
 11  IRRADIATION  137556 non-null  float64       
 12  PLANT        137556 non-null  int64         
dtypes: datetime64[ns](1), float64(8), int64(2), object(2)
memory usage: 13.6+ MB
None


In [8]:
# Data Munging - Convert cell to int

def convert_cellno_to_int(cell_no):
    
    cell_no = int(cell_no)
    return cell_no

original_dataset_df['CELL_NO'] = original_dataset_df.apply(lambda row: convert_cellno_to_int(row['CELL_NO']), axis = 1)

print(original_dataset_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137556 entries, 0 to 137555
Data columns (total 13 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   DATE_TIME    137556 non-null  datetime64[ns]
 1   PLANT_ID     137556 non-null  int64         
 2   SOURCE_KEY   137556 non-null  object        
 3   DC_POWER     137556 non-null  float64       
 4   AC_POWER     137556 non-null  float64       
 5   DAILY_YIELD  137556 non-null  float64       
 6   TOTAL_YIELD  137556 non-null  float64       
 7   CELL_NO      137556 non-null  int64         
 8   TIME_OF_DAY  137556 non-null  float64       
 9   AMB_TEMP     137556 non-null  float64       
 10  MOD_TEMP     137556 non-null  float64       
 11  IRRADIATION  137556 non-null  float64       
 12  PLANT        137556 non-null  int64         
dtypes: datetime64[ns](1), float64(8), int64(3), object(1)
memory usage: 13.6+ MB
None


In [9]:
# Split into training / evaluation sets
training_set, evaluation_set = split_data.split_train_eval(original_dataset_df, 0.2)

Original Data Items: 137556
Training Data Items: 110045
Evaluation Data Items: 27511


In [10]:
# Drop unrequired data columns

# Identify columns to drop 
columns_to_drop = ['DATE_TIME', 'PLANT_ID', 'SOURCE_KEY', 'AC_POWER', 'DAILY_YIELD', 'TOTAL_YIELD']

training_set = training_set.drop(columns_to_drop, axis = 1)
evaluation_set = evaluation_set.drop(columns_to_drop, axis = 1)

print(evaluation_set.head(5))

           DC_POWER  CELL_NO  TIME_OF_DAY   AMB_TEMP   MOD_TEMP  IRRADIATION  \
23464      0.000000        5     0.041667  23.478941  22.007802     0.000000   
82416      0.000000       12     0.229167  23.216699  21.191993     0.000000   
131200     0.000000        3     0.968750  24.652915  23.913763     0.000000   
120917     0.000000       15     0.093750  24.696277  23.876865     0.000000   
98459   3486.857143       17     0.364583  25.788373  28.674120     0.215449   

        PLANT  
23464       1  
82416       2  
131200      2  
120917      2  
98459       2  


In [11]:
# Create DC Power Target datasets

# Modify training set
dc_power_training_data = training_set.drop('DC_POWER', axis = 1)
dc_label_data = training_set['DC_POWER'].copy()

print(dc_power_training_data.info())
print(dc_label_data)

# Modify evaluation set
dc_evaluation_data = evaluation_set.drop('DC_POWER', axis = 1)
dc_eval_label_data = evaluation_set['DC_POWER'].copy()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 110045 entries, 99361 to 121958
Data columns (total 6 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   CELL_NO      110045 non-null  int64  
 1   TIME_OF_DAY  110045 non-null  float64
 2   AMB_TEMP     110045 non-null  float64
 3   MOD_TEMP     110045 non-null  float64
 4   IRRADIATION  110045 non-null  float64
 5   PLANT        110045 non-null  int64  
dtypes: float64(4), int64(2)
memory usage: 5.9 MB
None
99361        0.000000
113108    3576.750000
53656        0.000000
46387     9456.625000
116927       0.000000
             ...     
110268       0.000000
119879    7230.250000
103694       0.000000
131932    1900.857143
121958    7665.750000
Name: DC_POWER, Length: 110045, dtype: float64


In [12]:
# Load to dataset
tf.data.Dataset.from_tensor_slices((dc_power_training_data.values, dc_label_data.values))

<TensorSliceDataset shapes: ((6,), ()), types: (tf.float64, tf.float64)>

In [13]:
# Create train and test arrays
X_train, X_test, y_train, y_test = train_test_split(dc_power_training_data, dc_label_data, test_size = 0.2, random_state = 0)

print(X_train)
print(y_train)

        CELL_NO  TIME_OF_DAY   AMB_TEMP   MOD_TEMP  IRRADIATION  PLANT
26727         6     0.614583  32.965332  46.834515     0.537781      1
131564       14     0.135417  24.157297  23.330549     0.000000      2
39111        13     0.854167  23.664010  21.192350     0.000000      1
85370        14     0.729167  37.208696  39.745883     0.128514      2
52462         8     0.250000  21.767090  19.326091     0.009079      1
...         ...          ...        ...        ...          ...    ...
129940       18     0.364583  27.568193  35.298639     0.336816      2
49470         8     0.833333  24.800704  21.858350     0.000000      1
44627        11     0.510417  29.854372  53.016915     0.688047      1
90334        14     0.114583  25.172858  24.225251     0.000000      2
115964       12     0.750000  31.154428  31.562370     0.068103      2

[88036 rows x 6 columns]
26727     8246.714286
131564       0.000000
39111        0.000000
85370     2074.500000
52462      102.285714
            

In [14]:
# Initial MLP (Target - DC Power)

if TRAIN_MODELS == True:
    # Clear existing models
    keras.backend.clear_session()

    # Build model
    model = keras_helpers.build_multilayer_perceptron()

    # Name model
    model_type = "MLP_DC"
    model_id = keras_helpers.name_model(model_type)
    filepath_full = keras_helpers.make_save_string(model_id)

    # Set save and earlystop callbacks
    earlystop_cb = keras.callbacks.EarlyStopping(patience = 5)
    checkpoint_cb = keras.callbacks.ModelCheckpoint(filepath = filepath_full, save_best_only = True)

    # Train model
    model.fit(X_train, y_train, epochs = 5, validation_data =(X_test, y_test), callbacks =[checkpoint_cb, earlystop_cb])

Building Model ...
Hidden Layers: 2, Neurons: 6, LR: 0.001
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [15]:
# Evaluate MLP Model

if TRAIN_MODELS == False:
    filepath_full = r'C:\Developer\solar-power-generation-project\Models\WJ_MLP_DC_2021_01_13-16_10_50.h5'

# Load model
model = keras.models.load_model(filepath_full)

# Summarise model
model.summary()

# Make predictions
dc_pred_eval = model.predict(dc_evaluation_data)

# Determine model prediction stats
model_name = "MLP_DC"
model_evaluation.evaluate_model(model_name, dc_eval_label_data, dc_pred_eval)

# Calculate indicative accuracy
rmse, mae, r2 = model_evaluation.return_model_evaluation_stats(dc_eval_label_data, dc_pred_eval)

print(model_name, "% Acc:", ((1-(rmse/DC_Power_Range))*100))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 6)                 42        
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 42        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 7         
Total params: 91
Trainable params: 91
Non-trainable params: 0
_________________________________________________________________
MLP_DC rmse (Eval): 1407.753559548739
MLP_DC mae (Eval): 791.5627798654845
MLP_DC r2 (Eval): 0.8785556280612263
MLP_DC % Acc: 89.17112646500969


In [16]:
# Optimised MLP (Target - DC Power)

if TRAIN_MODELS == True:
    # Clear existing models
    keras.backend.clear_session()

    # Establish parameter distribution for tuning
    param_distribs = {
        "n_hidden":[12],
        "n_neurons": np.arange(1, 100),
        "learning_rate": [1e-1, 1e-2, 1e-3, 1e-4],
    }

    # Build model
    wrapped_model = keras_helpers.wrap_model()

    # Initialise random search
    rnd_search_cv = RandomizedSearchCV(wrapped_model, param_distribs, n_iter = 10, cv = 3)

    # Name model
    model_type = "MLP_Opt_DC"
    model_id = keras_helpers.name_model(model_type)
    filepath_full = keras_helpers.make_save_string(model_id)

    # Set save and earlystop callbacks
    earlystop_cb = keras.callbacks.EarlyStopping(patience = 3)
    checkpoint_cb = keras.callbacks.ModelCheckpoint(filepath = filepath_full, save_best_only = True)

    # Train model
    rnd_search_cv.fit(X_train, y_train, epochs = 5, validation_data =(X_test, y_test), callbacks = [checkpoint_cb, earlystop_cb])

Building Model ...
Hidden Layers: 12, Neurons: 19, LR: 0.1
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Building Model ...
Hidden Layers: 12, Neurons: 19, LR: 0.1
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Building Model ...
Hidden Layers: 12, Neurons: 19, LR: 0.1
Epoch 1/5

KeyboardInterrupt: 

In [15]:
# Evaluate MLP Model

if TRAIN_MODELS == False:
    filepath_full = r'C:\Developer\solar-power-generation-project\Models\WJ_MLP_Opt_DC_2021_01_13-15_30_27.h5'

wrapped_model = keras.models.load_model(filepath_full)

# Summarise model
wrapped_model.summary()

# Make predictions
dc_pred_eval = wrapped_model.predict(dc_evaluation_data)

model_name = "MLP_Opt_DC"
model_evaluation.evaluate_model(model_name, dc_eval_label_data, dc_pred_eval)

# Calculate indicative accuracy
rmse, mae, r2 = model_evaluation.return_model_evaluation_stats(dc_eval_label_data, dc_pred_eval)

print(model_name, "% Acc:", ((1-(rmse/DC_Power_Range))*100))

Model: "sequential_25"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_169 (Dense)            (None, 40)                280       
_________________________________________________________________
dense_170 (Dense)            (None, 40)                1640      
_________________________________________________________________
dense_171 (Dense)            (None, 40)                1640      
_________________________________________________________________
dense_172 (Dense)            (None, 40)                1640      
_________________________________________________________________
dense_173 (Dense)            (None, 40)                1640      
_________________________________________________________________
dense_174 (Dense)            (None, 40)                1640      
_________________________________________________________________
dense_175 (Dense)            (None, 1)               

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).

In [15]:
x = tensorboard_helpers.get_run_logdir()

print(x)

c:\Developer\solar-power-generation-project\Models\TensorBoard\run_2021_01_21-10_38_37


In [16]:
# Clear existing models
keras.backend.clear_session()

# Build model
model = keras_helpers.build_multilayer_perceptron()

# Name model
model_type = "MLP_DC"
model_id = keras_helpers.name_model(model_type)
filepath_full = keras_helpers.make_save_string(model_id)

# Set save and earlystop callbacks
earlystop_cb = keras.callbacks.EarlyStopping(patience = 5)
checkpoint_cb = keras.callbacks.ModelCheckpoint(filepath = filepath_full, save_best_only = True)

# Train model
model.fit(X_train, y_train, epochs = 5, validation_data =(X_test, y_test), callbacks =[checkpoint_cb, earlystop_cb])

Building Model ...
Hidden Layers: 2, Neurons: 6, LR: 0.001


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).

In [13]:
# Clear existing models
keras.backend.clear_session()

# Build model
model = keras_helpers.build_multilayer_perceptron()

# Name model
model_type = "MLP_DC"
model_id = keras_helpers.name_model(model_type)
filepath_full = keras_helpers.make_save_string(model_id)

# Set save and earlystop callbacks
earlystop_cb = keras.callbacks.EarlyStopping(patience = 5)
checkpoint_cb = keras.callbacks.ModelCheckpoint(filepath = filepath_full, save_best_only = True)

# Set TensorBoard callback for logging
tb_logdir = tensorboard_helpers.get_run_logdir()
tensorboard_cb = keras.callbacks.TensorBoard(tb_logdir)

# Train model
model.fit(X_train, y_train, epochs = 5, validation_data =(X_test, y_test), callbacks =[checkpoint_cb, earlystop_cb, tensorboard_cb])

Building Model ...
Hidden Layers: 2, Neurons: 6, LR: 0.001
Train on 88036 samples, validate on 22009 samples


AttributeError: 'Sequential' object has no attribute '_get_distribution_strategy'