In [1]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras import optimizers
from keras.utils import plot_model
from keras.models import Sequential, Model
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers import Dense, LSTM, RepeatVector, TimeDistributed, Flatten
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot

%matplotlib inline
warnings.filterwarnings("ignore")
init_notebook_mode(connected=True)

# Set seeds to make the experiment more reproducible.
from tensorflow import set_random_seed
from numpy.random import seed
set_random_seed(1)
seed(1)

ModuleNotFoundError: No module named 'keras.layers.convolutional'

In [2]:
consumption = pd.read_csv('trainData/energy consumption.csv')
solar = pd.read_csv('trainData/solar.csv')
site = pd.read_csv('trainData/site.csv')

In [3]:
solar.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14400 entries, 0 to 14399
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Site                14400 non-null  object 
 1   Day                 14400 non-null  int64  
 2   Hour                14400 non-null  int64  
 3   Solar Zenith Angle  14400 non-null  float64
 4   Clearsky DHI        14400 non-null  float64
 5   Clearsky DNI        14400 non-null  float64
 6   Clearsky GHI        14400 non-null  float64
 7   Relative Humidity   14400 non-null  float64
 8   DHI                 14400 non-null  float64
 9   DNI                 14400 non-null  float64
 10  GHI                 14400 non-null  float64
 11  Output              14400 non-null  float64
dtypes: float64(9), int64(2), object(1)
memory usage: 1.3+ MB


In [4]:
base_date = pd.to_datetime("2024-01-01")
solar['datetime'] = base_date + pd.to_timedelta(solar['Day'] - 1, unit='d') + pd.to_timedelta(solar['Hour'], unit='h')

In [5]:
solar.drop(["Day", "Hour"], axis=1, inplace=True)

In [6]:
#solar.set_index("datetime", inplace=True)

In [7]:
daily_sites_sc = go.Scatter(x=solar['datetime'], y=solar['Output'])
layout = go.Layout(title='Hourly Output', xaxis=dict(title='Date'), yaxis=dict(title='Output'))
fig = go.Figure(data=[daily_sites_sc], layout=layout)
iplot(fig)

In [8]:
by_site = []
for site in solar['Site'].unique():
    current_store_daily_sales = solar[(solar['Site'] == site)]
    by_site.append(go.Scatter(x=current_store_daily_sales['datetime'], y=current_store_daily_sales['Output'], name=('Site %s' % site)))

layout = go.Layout(title='Site hourly output', xaxis=dict(title='Date'), yaxis=dict(title='Output'))
fig = go.Figure(data=by_site, layout=layout)
iplot(fig)

In [9]:
def series_to_supervised(data, window=1, lag=1, dropnan=True):
    cols, names = list(), list()
    # Input sequence (t-n, ... t-1)
    for i in range(window, 0, -1):
        cols.append(data.shift(i))
        names += [('%s(t-%d)' % (col, i)) for col in data.columns]
    # Current timestep (t=0)
    cols.append(data)
    names += [('%s(t)' % (col)) for col in data.columns]
    # Target timestep (t=lag)
    cols.append(data.shift(-lag))
    names += [('%s(t+%d)' % (col, lag)) for col in data.columns]
    # Put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # Drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [10]:
window = 719
lag = 168
names = solar.columns

In [11]:
series = series_to_supervised(solar, window, lag)
series.head()

Unnamed: 0,Site(t-719),Solar Zenith Angle(t-719),Clearsky DHI(t-719),Clearsky DNI(t-719),Clearsky GHI(t-719),Relative Humidity(t-719),DHI(t-719),DNI(t-719),GHI(t-719),Output(t-719),...,Solar Zenith Angle(t+168),Clearsky DHI(t+168),Clearsky DNI(t+168),Clearsky GHI(t+168),Relative Humidity(t+168),DHI(t+168),DNI(t+168),GHI(t+168),Output(t+168),datetime(t+168)
719,site1,138.12,0.0,0.0,0.0,59.05,0.0,0.0,0.0,0.0,...,160.01,0.0,0.0,0.0,76.64,0.0,0.0,0.0,0.0,2024-02-06 23:00:00
720,site1,125.23,0.0,0.0,0.0,62.04,0.0,0.0,0.0,0.0,...,146.2,0.0,0.0,0.0,81.95,0.0,0.0,0.0,0.0,2024-02-07 00:00:00
721,site1,116.75,0.0,0.0,0.0,65.29,0.0,0.0,0.0,0.0,...,135.29,0.0,0.0,0.0,91.53,0.0,0.0,0.0,0.0,2024-02-07 01:00:00
722,site1,97.86,0.0,0.0,0.0,63.05,0.0,0.0,0.0,0.0,...,115.25,0.0,0.0,0.0,99.46,0.0,0.0,0.0,0.0,2024-02-07 02:00:00
723,site1,85.22,30.97,204.94,48.05,57.81,32.22,0.0,33.7,0.0,...,104.88,0.0,0.0,0.0,96.23,0.0,0.0,0.0,0.0,2024-02-07 03:00:00


In [12]:
last_site = 'Site(t-%d)' % window
series = series[(series['Site(t)'] == series[last_site])]
series.head()

Unnamed: 0,Site(t-719),Solar Zenith Angle(t-719),Clearsky DHI(t-719),Clearsky DNI(t-719),Clearsky GHI(t-719),Relative Humidity(t-719),DHI(t-719),DNI(t-719),GHI(t-719),Output(t-719),...,Solar Zenith Angle(t+168),Clearsky DHI(t+168),Clearsky DNI(t+168),Clearsky GHI(t+168),Relative Humidity(t+168),DHI(t+168),DNI(t+168),GHI(t+168),Output(t+168),datetime(t+168)
719,site1,138.12,0.0,0.0,0.0,59.05,0.0,0.0,0.0,0.0,...,160.01,0.0,0.0,0.0,76.64,0.0,0.0,0.0,0.0,2024-02-06 23:00:00
720,site1,125.23,0.0,0.0,0.0,62.04,0.0,0.0,0.0,0.0,...,146.2,0.0,0.0,0.0,81.95,0.0,0.0,0.0,0.0,2024-02-07 00:00:00
721,site1,116.75,0.0,0.0,0.0,65.29,0.0,0.0,0.0,0.0,...,135.29,0.0,0.0,0.0,91.53,0.0,0.0,0.0,0.0,2024-02-07 01:00:00
722,site1,97.86,0.0,0.0,0.0,63.05,0.0,0.0,0.0,0.0,...,115.25,0.0,0.0,0.0,99.46,0.0,0.0,0.0,0.0,2024-02-07 02:00:00
723,site1,85.22,30.97,204.94,48.05,57.81,32.22,0.0,33.7,0.0,...,104.88,0.0,0.0,0.0,96.23,0.0,0.0,0.0,0.0,2024-02-07 03:00:00


In [13]:
columns_to_drop = ['Site(t+%d)' % lag]
for i in range(window, 0, -1):
    columns_to_drop.append('Site(t-%d)' % i)
columns_to_drop.append('Site(t)')
series.drop(columns_to_drop, axis=1, inplace=True)
series.head()

Unnamed: 0,Solar Zenith Angle(t-719),Clearsky DHI(t-719),Clearsky DNI(t-719),Clearsky GHI(t-719),Relative Humidity(t-719),DHI(t-719),DNI(t-719),GHI(t-719),Output(t-719),datetime(t-719),...,Solar Zenith Angle(t+168),Clearsky DHI(t+168),Clearsky DNI(t+168),Clearsky GHI(t+168),Relative Humidity(t+168),DHI(t+168),DNI(t+168),GHI(t+168),Output(t+168),datetime(t+168)
719,138.12,0.0,0.0,0.0,59.05,0.0,0.0,0.0,0.0,2024-01-01 00:00:00,...,160.01,0.0,0.0,0.0,76.64,0.0,0.0,0.0,0.0,2024-02-06 23:00:00
720,125.23,0.0,0.0,0.0,62.04,0.0,0.0,0.0,0.0,2024-01-01 01:00:00,...,146.2,0.0,0.0,0.0,81.95,0.0,0.0,0.0,0.0,2024-02-07 00:00:00
721,116.75,0.0,0.0,0.0,65.29,0.0,0.0,0.0,0.0,2024-01-01 02:00:00,...,135.29,0.0,0.0,0.0,91.53,0.0,0.0,0.0,0.0,2024-02-07 01:00:00
722,97.86,0.0,0.0,0.0,63.05,0.0,0.0,0.0,0.0,2024-01-01 03:00:00,...,115.25,0.0,0.0,0.0,99.46,0.0,0.0,0.0,0.0,2024-02-07 02:00:00
723,85.22,30.97,204.94,48.05,57.81,32.22,0.0,33.7,0.0,2024-01-01 04:00:00,...,104.88,0.0,0.0,0.0,96.23,0.0,0.0,0.0,0.0,2024-02-07 03:00:00


In [14]:
# Label
labels_col = 'Output(t+%d)' % lag
labels = series[labels_col]
series = series.drop(labels_col, axis=1)

X_train, X_valid, Y_train, Y_valid = train_test_split(series, labels.values, test_size=0.2, random_state=0)
print('Train set shape', X_train.shape)
print('Validation set shape', X_valid.shape)
X_train.head()

Train set shape (5633, 7209)
Validation set shape (1409, 7209)


Unnamed: 0,Solar Zenith Angle(t-719),Clearsky DHI(t-719),Clearsky DNI(t-719),Clearsky GHI(t-719),Relative Humidity(t-719),DHI(t-719),DNI(t-719),GHI(t-719),Output(t-719),datetime(t-719),...,datetime(t),Solar Zenith Angle(t+168),Clearsky DHI(t+168),Clearsky DNI(t+168),Clearsky GHI(t+168),Relative Humidity(t+168),DHI(t+168),DNI(t+168),GHI(t+168),datetime(t+168)
11072,23.4,121.51,919.14,991.72,42.83,127.57,916.21,954.64,2.32,2024-01-12 09:00:00,...,2024-02-11 08:00:00,42.89,94.18,919.8,756.3,45.97,89.42,916.65,783.53,2024-02-18 08:00:00
9401,115.32,0.0,0.0,0.0,30.66,0.0,0.0,0.0,0.405,2024-01-02 18:00:00,...,2024-02-01 17:00:00,109.97,0.0,0.0,0.0,83.0,0.0,0.0,0.0,2024-02-08 17:00:00
12569,116.25,0.0,0.0,0.0,88.25,0.0,0.0,0.0,0.36,2024-01-14 18:00:00,...,2024-02-13 17:00:00,109.72,0.0,0.0,0.0,65.87,0.0,0.0,0.0,2024-02-20 17:00:00
11112,132.53,0.0,0.0,0.0,85.9,0.0,0.0,0.0,0.0,2024-01-14 01:00:00,...,2024-02-13 00:00:00,150.74,0.0,0.0,0.0,88.97,0.0,0.0,0.0,2024-02-20 00:00:00
11463,87.29,0.0,0.0,0.0,56.02,0.0,0.0,0.0,3.54,2024-01-28 16:00:00,...,2024-02-27 15:00:00,72.93,115.04,397.86,239.45,65.38,135.19,21.7,138.51,2024-01-05 15:00:00


In [None]:
epochs = 40
batch = 256
lr = 0.0003
adam = optimizers.Adam(lr)

In [None]:
model_mlp = Sequential()
model_mlp.add(Dense(100, activation='relu', input_dim=X_train.shape[1]))
model_mlp.add(Dense(1))
model_mlp.compile(loss='mse', optimizer=adam)
model_mlp.summary()

In [None]:
mlp_history = model_mlp.fit(X_train.values, Y_train, validation_data=(X_valid.values, Y_valid), epochs=epochs, verbose=2)