In [None]:
#from https://thinkingneuron.com/using-artificial-neural-networks-for-regression-in-python/

import pandas as pd
import seaborn as sns
from sklearn.neural_network import MLPRegressor
import itertools
from sklearn.model_selection import cross_validate, RandomizedSearchCV, train_test_split
from sklearn.metrics import balanced_accuracy_score, confusion_matrix
import pickle
df = pd.read_csv("../PreProcessamentoDados/cleanedData.csv")

In [None]:
df[['Hour','Minute','Second']] = df.DateTime.str.split(":",expand=True)

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
# define a dictionary of column names and their corresponding data types
dtypes_dict = {'Hour': float, 'Minute': float}

# convert the columns to their corresponding data types
df = df.astype(dtypes_dict)

In [None]:
df.head()

In [None]:
# Separate Target Variable and Predictor Variables
TargetVariable = ["Generated power"]
Predictors= ["Hour","Minute","TemperatureC","DewpointC","PressurehPa","WindDirectionDegrees","WindSpeedKMH","WindSpeedGustKMH","Humidity","HourlyPrecipMM","dailyrainMM","SolarRadiationWatts_m2"]

X=df[Predictors].values
y=df[TargetVariable].values

In [None]:
# check if the array has any negative values
if (y < 0).any():
    print('The array has negative values.')
else:
    print('The array does not have negative values.')

In [None]:
# y = df['generated_power'].values
# X = df.drop(columns=['generated_power']).values

In [None]:
# y = y.reshape(-1,1)


In [None]:
### Standardization of data ###
from sklearn.preprocessing import StandardScaler
PredictorScaler=StandardScaler()
TargetVarScaler=StandardScaler()
 
# Storing the fit object for later reference
PredictorScalerFit=PredictorScaler.fit(X)
TargetVarScalerFit=TargetVarScaler.fit(y)


In [None]:
# Generating the standardized values of X and y
X=PredictorScalerFit.transform(X)
y=TargetVarScalerFit.transform(y)
 

In [None]:
# check if the array has any negative values
if (y < 0).any():
    print('The array has negative values.')
else:
    print('The array does not have negative values.')

In [None]:
# Split the data into training and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Quick sanity check with the shapes of Training and testing datasets
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
from keras.models import Sequential
from keras.layers import Dense
 
# create ANN model
model = Sequential()
 
# Defining the Input layer and FIRST hidden layer, both are same!
model.add(Dense(units=5, input_dim=12, kernel_initializer='normal', activation='relu'))
 
# Defining the Second layer of the model
# after the first layer we don't have to specify input_dim as keras configure it automatically
model.add(Dense(units=5, kernel_initializer='normal', activation='tanh'))
 
# The output neuron is a single fully connected node 
# Since we will be predicting a single number
model.add(Dense(1, kernel_initializer='normal'))

In [None]:

# Compiling the model
model.compile(loss='mean_squared_error', optimizer='adam',metrics=['mean_squared_error','mean_absolute_error'])

In [None]:
# Fitting the ANN to the Training set
model.fit(X_train, y_train ,batch_size = 20, epochs = 50, verbose=1)

# Hyperparameter tuning of ANN


# Finding best set of parameters using manual grid search


In [None]:
import numpy as np

# Defining a function to find the best parameters for ANN
def FunctionFindBestParams(X_train, y_train, X_test, y_test):
    
    # Defining the list of hyper parameters to try
    batch_size_list=[5, 10, 15, 20]
    epoch_list  =   [5, 10, 50, 100]
    
    import pandas as pd
    SearchResultsData=pd.DataFrame(columns=['TrialNumber', 'Parameters', 'Accuracy'])
    
    # initializing the trials
    TrialNumber=0
    for batch_size_trial in batch_size_list:
        for epochs_trial in epoch_list:
            TrialNumber+=1
            # create ANN model
            model = Sequential()
            # Defining the first layer of the model
            model.add(Dense(units=5, input_dim=X_train.shape[1], kernel_initializer='normal', activation='relu'))

            # Defining the Second layer of the model
            model.add(Dense(units=5, kernel_initializer='normal', activation='relu'))

            # The output neuron is a single fully connected node 
            # Since we will be predicting a single number
            model.add(Dense(1, kernel_initializer='normal'))

            # Compiling the model
            model.compile(loss='mean_squared_error', optimizer='adam')

            # Fitting the ANN to the Training set
            model.fit(X_train, y_train ,batch_size = batch_size_trial, epochs = epochs_trial, verbose=0)
            print("y_test: ",y_test)
            r = model.predict(X_test)
            print("model.predict(X_test)",r)
            MAPE = np.mean(100 * (np.abs(y_test-r)/y_test))
            
            # printing the results of the current iteration
            print(TrialNumber, 'Parameters:','batch_size:', batch_size_trial,'-', 'epochs:',epochs_trial, 'MAPE:',MAPE,'Accuracy:', 100-MAPE)
            
            SearchResultsData=SearchResultsData.append(pd.DataFrame(data=[[TrialNumber, str(batch_size_trial)+'-'+str(epochs_trial), 100-MAPE]],
                                                                    columns=['TrialNumber', 'Parameters', 'Accuracy'] ))
    return(SearchResultsData)


######################################################
# Calling the function
ResultsData=FunctionFindBestParams(X_train, y_train, X_test, y_test)

In [None]:
%matplotlib inline
ResultsData.plot(x='Parameters', y='Accuracy', figsize=(15,4), kind='line')

In [None]:
ResultsData

In [None]:
ResultsDataPlot = ResultsData


In [None]:
ResultsDataPlot

In [None]:
ResultsDataPlot["Accuracy"] = ResultsDataPlot["Accuracy"].add(100)

In [None]:
ResultsDataPlot["Accuracy"] = ResultsDataPlot["Accuracy"].multiply(-1)


In [None]:
ResultsDataPlot

In [None]:
%matplotlib inline
ResultsDataPlot.plot(x='Parameters', y='Accuracy', figsize=(15,4), kind='line')

# Training the ANN model with the best parameters


In [None]:
# Fitting the ANN to the Training set
history=model.fit(
    X_train, 
    y_train ,
    batch_size = 15, 
    epochs = 5,     
    verbose=1,
    validation_split = 0.33)

In [None]:
# Generating Predictions on testing data
Predictions=model.predict(X_test)

# Scaling the predicted Price data back to original price scale
Predictions=TargetVarScalerFit.inverse_transform(Predictions)

# Scaling the y_test Price data back to original price scale
y_test_orig=TargetVarScalerFit.inverse_transform(y_test)

# Scaling the test data back to original scale
Test_Data=PredictorScalerFit.inverse_transform(X_test)

TestingData=pd.DataFrame(data=Test_Data, columns=Predictors)
TestingData['Generation']=y_test_orig
TestingData['PredictedGeneration']=Predictions
TestingData.head()

In [None]:
# filter rows where column1 is equal to column2
filtered_data = TestingData.loc[TestingData['Generation'] == TestingData['PredictedGeneration']]

print(filtered_data)

In [None]:
TestingData

# Save and Evaluation

In [None]:
filename = './DNN_finalized_model'


In [None]:
# save the model to disk
model.save(filename)

In [None]:
import tensorflow as tf

# load the model from disk
model = tf.keras.models.load_model(filename)

In [None]:
print(type(model))

In [None]:
# Fitting the ANN to the Training set
history=model.fit(
    X_train,
    y_train, 
    batch_size = 15,
    epochs = 5,
    verbose=1,
    validation_split = 0.33)


In [None]:

# Generating Predictions on testing data
Predictions=model.predict(X_test)

# Scaling the predicted Price data back to original price scale
Predictions=TargetVarScalerFit.inverse_transform(Predictions)

# Scaling the y_test Price data back to original price scale
y_test_orig=TargetVarScalerFit.inverse_transform(y_test)

# Scaling the test data back to original scale
Test_Data=PredictorScalerFit.inverse_transform(X_test)

TestingData=pd.DataFrame(data=Test_Data, columns=Predictors)
TestingData['Generation']=y_test_orig
TestingData['PredictedGeneration']=Predictions
TestingData.head()

In [None]:
model.summary()

# Explainable AI Shap

In [None]:
import shap
explainer = shap.DeepExplainer(model,X_train)

shap_values = explainer.shap_values(X_train)

In [23]:
import shap
explainer = shap.DeepExplainer(model,X_test)

shap_values = explainer.shap_values(X_test)

In [24]:
shap.summary_plot(shap_values,X_test)

[array([[-0.15718889, -0.03398376,  0.10363226, ...,  0.        ,
          0.        ,  1.4603172 ],
        [-0.30350978, -0.01413901, -0.00470937, ...,  0.        ,
          0.        , -0.31737816],
        [-0.29546018,  0.02198594, -0.00832522, ...,  0.        ,
          0.        , -0.30592685],
        ...,
        [-0.0824921 , -0.01265377, -0.02428068, ...,  0.        ,
          0.        , -0.45956556],
        [-0.08375002,  0.00612907, -0.02767705, ...,  0.        ,
          0.        , -0.3009693 ],
        [-0.3312422 ,  0.00578892,  0.00187964, ...,  0.        ,
          0.        , -0.31492582]])]

In [None]:
import matplotlib.pyplot as plt

def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.ylim([0, 10])
  plt.xlabel('Epoch')
  plt.ylabel('Error [MPG]')
  plt.legend()
  plt.grid(True)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

In [None]:
plot_loss(history)