In [1]:
#Importing packages
import numpy as np
import pandas as pd
import warnings
import io
warnings.filterwarnings("ignore")
from google.colab import files

In [3]:
#Uploading files in colab
uploaded = files.upload()

Saving electricity_demand.csv to electricity_demand.csv


In [4]:
#Electricity demand data set
electricity_demand = pd.read_csv(io.BytesIO(uploaded['electricity_demand.csv']))

In [5]:
#Display data
electricity_demand.head()

Unnamed: 0,date,demand,RRP,min_temperature,max_temperature,solar_exposure,rainfall,school_day,holiday
0,01-01-2015,99635.03,25.633696,13.3,26.9,23.6,0.0,N,Y
1,01-02-2015,129606.01,33.138988,15.4,38.8,26.8,0.0,N,N
2,01-03-2015,142300.54,34.564855,20.0,38.2,26.5,0.0,N,N
3,01-04-2015,104330.715,25.00556,16.3,21.4,25.2,4.2,N,N
4,01-05-2015,118132.2,26.724176,15.0,22.0,30.7,0.0,N,N


In [6]:
#Obtain the independent variables for predicting RRP attribute
data = electricity_demand.drop(columns=['date','RRP'])
data['school_day'] = data['school_day'].replace(to_replace ="N", value = 0)
data['school_day'] = data['school_day'].replace(to_replace ="Y", value = 1)
data['holiday'] = data['holiday'].replace(to_replace ="N", value = 0)
data['holiday'] = data['holiday'].replace(to_replace ="Y", value = 1)

In [7]:
#Identify the missing values and fill it with 0
data.isnull().sum()
data = data.fillna(0)
data.isnull().sum()

demand             0
min_temperature    0
max_temperature    0
solar_exposure     0
rainfall           0
school_day         0
holiday            0
dtype: int64

In [8]:
#Display data
data.head()

Unnamed: 0,demand,min_temperature,max_temperature,solar_exposure,rainfall,school_day,holiday
0,99635.03,13.3,26.9,23.6,0.0,0,1
1,129606.01,15.4,38.8,26.8,0.0,0,0
2,142300.54,20.0,38.2,26.5,0.0,0,0
3,104330.715,16.3,21.4,25.2,4.2,0,0
4,118132.2,15.0,22.0,30.7,0.0,0,0


In [9]:
#Obtain the RRP attribute to form target data set
target = electricity_demand['RRP']

In [10]:
#Import statement
from sklearn.model_selection import train_test_split

#Splitting data to form training and testing set
training_data, testing_data, training_target, testing_target = train_test_split(
   data, target, test_size = 0.2, random_state = 1
)

In [10]:
training_data.head()

Unnamed: 0,demand,min_temperature,max_temperature,solar_exposure,rainfall,school_day,holiday
372,111879.54,16.2,20.6,25.4,0.0,0,0
137,127524.8,7.2,20.4,10.2,0.0,1,0
1940,106565.295,15.1,22.3,12.7,0.4,0,0
1503,107697.205,18.4,21.5,12.4,0.0,1,0
325,98621.62,13.5,20.0,27.3,0.0,1,0


In [25]:
training_target.head()

372     36.888156
137     39.333713
1940    26.641180
1503    57.338212
325     32.345901
Name: RRP, dtype: float64

In [26]:
testing_data.head()

Unnamed: 0,demand,min_temperature,max_temperature,solar_exposure,rainfall,school_day,holiday
804,139672.835,20.8,33.2,18.9,0.0,1,0
1291,119464.315,6.0,13.8,9.8,0.0,0,0
1287,143461.125,6.6,11.6,6.0,0.0,0,0
1846,112128.195,14.8,23.5,28.4,44.0,0,0
242,137724.895,8.1,13.0,11.5,0.0,1,0


In [27]:
testing_target.head(10)

804     102.012218
1291     67.790910
1287     81.952519
1846     53.564870
242      50.273287
1787     49.794293
169      38.550714
1714     83.052148
111      36.695284
1138     93.173936
Name: RRP, dtype: float64

In [35]:
#Import statement
from sklearn import tree
from sklearn.metrics import mean_squared_error, mean_absolute_error 

#Train the decision tree regressor and predict the output
clf = tree.DecisionTreeRegressor()
print("Decision Tree Regressor Structure: \n",clf)
clf = clf.fit(training_data, training_target)
pred = clf.predict(testing_data)

print("Prediction values:\n",pred[:10])

rmse = np.sqrt(mean_squared_error(testing_target,pred))
print("\nRoot Mean Squared Error: ",rmse)

mae = mean_absolute_error(testing_target,pred)
print("\nMean Absolute Error: ",mae)

Decision Tree Regressor Structure: 
 DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=None, splitter='best')
Prediction values:
 [ 79.50264397  50.7877126   67.9384962   24.06507765 175.8546299
  21.09536038 102.3388107   68.07737227  89.23620112  31.32939962]

Root Mean Squared Error:  58.14914440571473

Mean Absolute Error:  38.678830079445675


In [25]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
gsc = GridSearchCV(
        estimator=SVR(kernel='rbf'),
        param_grid={
            'C': [0.1, 1, 100, 1000],
            'epsilon': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10],
            'gamma': [0.0001, 0.001, 0.005, 0.1, 1, 3, 5]
        },
        cv=3, scoring='neg_mean_squared_error', verbose=0, n_jobs=-1)
grid_result = gsc.fit(training_data, training_target)
grid_result.best_params_

{'C': 1000, 'epsilon': 10, 'gamma': 0.1}

In [34]:
#Train the model and predict the RRP attribute
regressor = SVR(C= 1000, epsilon= 10, gamma= 0.1)
print("Support Vector Regressor:\n",regressor)
regressor.fit(training_data, training_target)
pred = regressor.predict(testing_data)

print("Prediction values:\n",pred[:10])

rmse = np.sqrt(mean_squared_error(testing_target,pred))
print("\nRoot Mean Squared Error: ",rmse)

mae = mean_absolute_error(testing_target,pred)
print("\nMean Absolute Error: ",mae)

Support Vector Regressor:
 SVR(C=1000, cache_size=200, coef0=0.0, degree=3, epsilon=10, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
Prediction values:
 [72.60340823 72.60340823 72.60340823 72.60340823 72.68618056 72.60340823
 72.60340823 72.60340837 72.60340823 72.60340823]

Root Mean Squared Error:  48.688402284155295

Mean Absolute Error:  32.61254926086508


In [33]:
#Import statement
from sklearn.svm import SVR

#Train the model and predict the RRP attribute
regressor = SVR()
print("Support Vector Regressor:\n",regressor)
regressor.fit(training_data, training_target)
pred = regressor.predict(testing_data)

print("Prediction values:\n",pred[:10])

rmse = np.sqrt(mean_squared_error(testing_target,pred))
print("\nRoot Mean Squared Error: ",rmse)

mae = mean_absolute_error(testing_target,pred)
print("\nMean Absolute Error: ",mae)

Support Vector Regressor:
 SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
Prediction values:
 [70.50883076 62.65754586 71.85863445 59.72507029 69.7940261  54.07210638
 70.54822206 63.31383929 66.298582   61.35210631]

Root Mean Squared Error:  47.33908027561177

Mean Absolute Error:  30.79351594633852


In [32]:
#Import statement
from sklearn.neural_network import MLPRegressor

#Train the model and predict the RRP attribute
regressor = MLPRegressor()
print("MLP Regressor:\n",regressor)
regressor.fit(training_data, training_target)
pred = regressor.predict(testing_data)

print("Prediction values:\n",pred[:10])

rmse = np.sqrt(mean_squared_error(testing_target,pred))
print("\nRoot Mean Squared Error: ",rmse)

mae = mean_absolute_error(testing_target,pred)
print("\nMean Absolute Error: ",mae)

MLP Regressor:
 MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(100,), learning_rate='constant',
             learning_rate_init=0.001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=None, shuffle=True, solver='adam',
             tol=0.0001, validation_fraction=0.1, verbose=False,
             warm_start=False)
Prediction values:
 [84.1660714  67.8758393  80.53735548 72.91726455 77.82321015 58.09498528
 78.69821649 69.68475413 74.15233477 69.22761369]

Root Mean Squared Error:  45.96455973553287

Mean Absolute Error:  31.039808318130504


In [18]:
from sklearn.model_selection import GridSearchCV
# Create the parameter grid
param_grid = {
    'bootstrap': [True],
    'max_depth': [80, 90, 100, 110],
    'max_features': [2, 3],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300, 1000]
}
# Create a based model
rf = RandomForestRegressor()
# Instantiate the grid search model
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, 
                          cv = 3, n_jobs = -1, verbose = 2)
# Fit the grid search to the data
grid_search.fit(training_data, training_target)
grid_search.best_params_

Fitting 3 folds for each of 288 candidates, totalling 864 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   27.5s
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 361 tasks      | elapsed:  4.7min
[Parallel(n_jobs=-1)]: Done 644 tasks      | elapsed:  8.5min
[Parallel(n_jobs=-1)]: Done 864 out of 864 | elapsed: 11.5min finished


{'bootstrap': True,
 'max_depth': 100,
 'max_features': 3,
 'min_samples_leaf': 3,
 'min_samples_split': 12,
 'n_estimators': 100}

In [31]:
#Import statement
from sklearn.ensemble import RandomForestRegressor

#Train the model and predict the RRP attribute
regressor = RandomForestRegressor(bootstrap=True, max_depth=100, max_features=3, min_samples_leaf= 3, min_samples_split= 12, n_estimators= 100)
print("Random Forest Regressor:\n",regressor)
regressor.fit(training_data, training_target)
pred = regressor.predict(testing_data)

print("Prediction values:\n",pred[:10])

rmse = np.sqrt(mean_squared_error(testing_target,pred))
print("\nRoot Mean Squared Error: ",rmse)

mae = mean_absolute_error(testing_target,pred)
print("\nMean Absolute Error: ",mae)

Random Forest Regressor:
 RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=100, max_features=3, max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=3,
                      min_samples_split=12, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)
Prediction values:
 [94.65518374 67.21027166 61.62489638 52.26338301 89.33966913 54.74629765
 89.29290143 75.67504559 63.58779327 66.42720873]

Root Mean Squared Error:  80.16162149271159

Mean Absolute Error:  34.92117108148677


In [30]:
#Import statement
from sklearn.neighbors import KNeighborsRegressor

#Train the model and predict the RRP attribute
neigh = KNeighborsRegressor(n_neighbors=8)
print("KNeighbors Regressor:\n",neigh)
neigh.fit(training_data, training_target)
pred = neigh.predict(testing_data)

print("Prediction values:\n",pred[:10])

rmse = np.sqrt(mean_squared_error(testing_target,pred))
print("\nRoot Mean Squared Error: ",rmse)

mae = mean_absolute_error(testing_target,pred)
print("\nMean Absolute Error: ",mae)

KNeighbors Regressor:
 KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
                    metric_params=None, n_jobs=None, n_neighbors=8, p=2,
                    weights='uniform')
Prediction values:
 [ 92.85568309  56.12039665  65.8763137   33.71248726 113.79700035
  67.991639    91.88662715  70.71198923  87.51138743  55.4343802 ]

Root Mean Squared Error:  88.99468241730065

Mean Absolute Error:  38.69819912282317
