### Checking Python Environment Path

In [46]:
import sys
print(sys.executable)

/opt/conda/bin/python3.10


### Importing Libraries

In [47]:
import csv
import os
import numpy as np
import pandas as pd
import seaborn as sns
import datetime as dt
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.preprocessing.sequence  import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import tensorflow as tf

mpl.rcParams['figure.figsize'] = (10,8)
mpl.rcParams['axes.grid'] = False

# Data Analyzing and Cleaning

### Importing CSV files and merging them all together

In [48]:
data = pd.concat(
    map(pd.read_csv, [
#                       '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2018Floor1.csv',
#                       '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2018Floor2.csv',
#                       '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2018Floor3.csv',
                      '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2018Floor4.csv',
#                       '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2018Floor5.csv',
#                       '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2018Floor6.csv',
#                       '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2018Floor7.csv',
#                       '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2019Floor1.csv',
#                       '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2019Floor2.csv',
#                       '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2019Floor3.csv',
                      '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2019Floor4.csv',
#                       '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2019Floor5.csv',
#                       '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2019Floor6.csv',
#                       '/kaggle/input/cubems-smart-building-energy-and-iaq-data/2019Floor7.csv',
                      
                      
                     ]),
    ignore_index=True)

data.head(5)

Unnamed: 0,Date,z1_AC1(kW),z1_AC2(kW),z1_AC3(kW),z1_AC4(kW),z1_Light(kW),z1_Plug(kW),z1_S1(degC),z1_S1(RH%),z1_S1(lux),...,z4_Plug(kW),z4_S1(degC),z4_S1(RH%),z4_S1(lux),z5_AC1(kW),z5_Light(kW),z5_Plug(kW),z5_S1(degC),z5_S1(RH%),z5_S1(lux)
0,2018-07-01 00:00:00,0.0,0.0,0.0,8.74,0.0,0.22,28.28,62.5,0.0,...,0.07,,,,0.0,0.0,0.15,27.73,62.86,0.0
1,2018-07-01 00:01:00,0.0,0.0,0.0,8.74,0.0,0.23,28.29,62.5,0.0,...,0.07,,,,0.0,0.0,0.14,27.73,62.87,0.0
2,2018-07-01 00:02:00,0.0,0.0,0.0,8.61,0.0,0.22,28.29,62.5,0.0,...,0.08,,,,0.0,0.0,0.14,27.72,62.88,0.0
3,2018-07-01 00:03:00,0.0,0.0,0.0,8.81,0.01,0.23,28.29,62.5,0.0,...,0.08,,,,0.0,0.0,0.14,27.72,62.88,0.0
4,2018-07-01 00:04:00,0.0,0.0,0.0,8.89,0.0,0.22,28.29,62.51,0.0,...,0.07,,,,0.0,0.0,0.14,27.72,62.88,0.0


In [49]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 790560 entries, 0 to 790559
Data columns (total 30 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   Date          790560 non-null  object 
 1   z1_AC1(kW)    621829 non-null  float64
 2   z1_AC2(kW)    621829 non-null  float64
 3   z1_AC3(kW)    621829 non-null  float64
 4   z1_AC4(kW)    751579 non-null  float64
 5   z1_Light(kW)  707605 non-null  float64
 6   z1_Plug(kW)   756532 non-null  float64
 7   z1_S1(degC)   526125 non-null  float64
 8   z1_S1(RH%)    526128 non-null  float64
 9   z1_S1(lux)    526127 non-null  float64
 10  z2_AC1(kW)    751576 non-null  float64
 11  z2_Light(kW)  707605 non-null  float64
 12  z2_Plug(kW)   707605 non-null  float64
 13  z2_S1(degC)   519026 non-null  float64
 14  z2_S1(RH%)    519026 non-null  float64
 15  z2_S1(lux)    519027 non-null  float64
 16  z3_Light(kW)  756457 non-null  float64
 17  z3_Plug(kW)   756490 non-null  float64
 18  z4_A

### Sorting Dataframe values in order of Date & Time

In [50]:
# df['Date'] = pd.to_datetime(df['Date'])
data = data.sort_values(by='Date')

In [51]:
data.head(5)

Unnamed: 0,Date,z1_AC1(kW),z1_AC2(kW),z1_AC3(kW),z1_AC4(kW),z1_Light(kW),z1_Plug(kW),z1_S1(degC),z1_S1(RH%),z1_S1(lux),...,z4_Plug(kW),z4_S1(degC),z4_S1(RH%),z4_S1(lux),z5_AC1(kW),z5_Light(kW),z5_Plug(kW),z5_S1(degC),z5_S1(RH%),z5_S1(lux)
0,2018-07-01 00:00:00,0.0,0.0,0.0,8.74,0.0,0.22,28.28,62.5,0.0,...,0.07,,,,0.0,0.0,0.15,27.73,62.86,0.0
1,2018-07-01 00:01:00,0.0,0.0,0.0,8.74,0.0,0.23,28.29,62.5,0.0,...,0.07,,,,0.0,0.0,0.14,27.73,62.87,0.0
2,2018-07-01 00:02:00,0.0,0.0,0.0,8.61,0.0,0.22,28.29,62.5,0.0,...,0.08,,,,0.0,0.0,0.14,27.72,62.88,0.0
3,2018-07-01 00:03:00,0.0,0.0,0.0,8.81,0.01,0.23,28.29,62.5,0.0,...,0.08,,,,0.0,0.0,0.14,27.72,62.88,0.0
4,2018-07-01 00:04:00,0.0,0.0,0.0,8.89,0.0,0.22,28.29,62.51,0.0,...,0.07,,,,0.0,0.0,0.14,27.72,62.88,0.0


In [52]:
data = data[1:]
data.head(5)

Unnamed: 0,Date,z1_AC1(kW),z1_AC2(kW),z1_AC3(kW),z1_AC4(kW),z1_Light(kW),z1_Plug(kW),z1_S1(degC),z1_S1(RH%),z1_S1(lux),...,z4_Plug(kW),z4_S1(degC),z4_S1(RH%),z4_S1(lux),z5_AC1(kW),z5_Light(kW),z5_Plug(kW),z5_S1(degC),z5_S1(RH%),z5_S1(lux)
1,2018-07-01 00:01:00,0.0,0.0,0.0,8.74,0.0,0.23,28.29,62.5,0.0,...,0.07,,,,0.0,0.0,0.14,27.73,62.87,0.0
2,2018-07-01 00:02:00,0.0,0.0,0.0,8.61,0.0,0.22,28.29,62.5,0.0,...,0.08,,,,0.0,0.0,0.14,27.72,62.88,0.0
3,2018-07-01 00:03:00,0.0,0.0,0.0,8.81,0.01,0.23,28.29,62.5,0.0,...,0.08,,,,0.0,0.0,0.14,27.72,62.88,0.0
4,2018-07-01 00:04:00,0.0,0.0,0.0,8.89,0.0,0.22,28.29,62.51,0.0,...,0.07,,,,0.0,0.0,0.14,27.72,62.88,0.0
5,2018-07-01 00:05:00,0.0,0.0,0.0,8.68,0.0,0.23,28.29,62.51,0.0,...,0.08,,,,0.0,0.0,0.14,27.72,62.88,0.0


### Converting 'Date' column into DateTime Format

In [53]:
data['Date'] = pd.to_datetime(data['Date'], infer_datetime_format = True)
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 790559 entries, 1 to 790559
Data columns (total 30 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   Date          790559 non-null  datetime64[ns]
 1   z1_AC1(kW)    621828 non-null  float64       
 2   z1_AC2(kW)    621828 non-null  float64       
 3   z1_AC3(kW)    621828 non-null  float64       
 4   z1_AC4(kW)    751578 non-null  float64       
 5   z1_Light(kW)  707604 non-null  float64       
 6   z1_Plug(kW)   756531 non-null  float64       
 7   z1_S1(degC)   526124 non-null  float64       
 8   z1_S1(RH%)    526127 non-null  float64       
 9   z1_S1(lux)    526126 non-null  float64       
 10  z2_AC1(kW)    751575 non-null  float64       
 11  z2_Light(kW)  707604 non-null  float64       
 12  z2_Plug(kW)   707604 non-null  float64       
 13  z2_S1(degC)   519025 non-null  float64       
 14  z2_S1(RH%)    519025 non-null  float64       
 15  z2_S1(lux)    519

### Graphs for different Devices and Zones

In [54]:

# data.set_index('Date')[['z1_Light(kW)', 'z1_Plug(kW)', 'z2_AC1(kW)', 'z2_AC2(kW)', 'z2_AC3(kW)', 'z2_AC4(kW)', 'z2_Light(kW)',
#            'z2_Plug(kW)', 'z3_Light(kW)', 'z3_Plug(kW)', 'z4_Light(kW)', 'z1_AC1(kW)', 'z2_AC5(kW)', 'z2_AC6(kW)',
#            'z2_AC7(kW)', 'z2_AC8(kW)', 'z2_AC9(kW)', 'z2_AC10(kW)', 'z2_AC11(kW)', 'z2_AC12(kW)', 'z2_AC13(kW)',
#            'z2_AC14(kW)', 'z4_AC1(kW)', 'z4_Plug(kW)', 'z1_AC2(kW)', 'z1_AC3(kW)', 'z1_AC4(kW)', 'z5_AC1(kW)','z5_Light(kW)',
#             'z5_Plug(kW)', 'z4_AC2(kW)', 'z4_AC3(kW)', 'z4_AC4(kW)', 'z1_S1(degC)', 'z2_S1(degC)', 'z3_S1(degC)', 'z4_S1(degC)',
#             'z5_S1(degC)', 'z1_S1(RH%)', 'z2_S1(RH%)', 'z3_S1(RH%)', 'z4_S1(RH%)', 'z5_S1(RH%)', 'z1_S1(lux)', 'z2_S1(lux)',
#             'z3_S1(lux)', 'z4_S1(lux)', 'z5_S1(lux)']].plot(subplots=True, figsize=(15,15))


# plt.figure(figsize=(14,5))
# sns.set_style("ticks")
# sns.lineplot(x=data['Date'],y=data['z1_Light(kW)'],color='blue')
# sns.despine()
# plt.title("Zone 1 Light Power Consumption(kW)",size='x-large',color='blue')

data.head()




Unnamed: 0,Date,z1_AC1(kW),z1_AC2(kW),z1_AC3(kW),z1_AC4(kW),z1_Light(kW),z1_Plug(kW),z1_S1(degC),z1_S1(RH%),z1_S1(lux),...,z4_Plug(kW),z4_S1(degC),z4_S1(RH%),z4_S1(lux),z5_AC1(kW),z5_Light(kW),z5_Plug(kW),z5_S1(degC),z5_S1(RH%),z5_S1(lux)
1,2018-07-01 00:01:00,0.0,0.0,0.0,8.74,0.0,0.23,28.29,62.5,0.0,...,0.07,,,,0.0,0.0,0.14,27.73,62.87,0.0
2,2018-07-01 00:02:00,0.0,0.0,0.0,8.61,0.0,0.22,28.29,62.5,0.0,...,0.08,,,,0.0,0.0,0.14,27.72,62.88,0.0
3,2018-07-01 00:03:00,0.0,0.0,0.0,8.81,0.01,0.23,28.29,62.5,0.0,...,0.08,,,,0.0,0.0,0.14,27.72,62.88,0.0
4,2018-07-01 00:04:00,0.0,0.0,0.0,8.89,0.0,0.22,28.29,62.51,0.0,...,0.07,,,,0.0,0.0,0.14,27.72,62.88,0.0
5,2018-07-01 00:05:00,0.0,0.0,0.0,8.68,0.0,0.23,28.29,62.51,0.0,...,0.08,,,,0.0,0.0,0.14,27.72,62.88,0.0


### Missing Values Heatmap

In [55]:
# num_ticks=19 #tick number for Y-axis
# plt.figure(figsize = (10,5))
# ax = sns.heatmap(data.isnull(), cbar=False)
# ax.set_yticks(np.linspace(0,100,num_ticks,dtype=np.integer))
# ax.set_title('Missing Values')
# plt.show()

In [56]:
data.reset_index()

Unnamed: 0,index,Date,z1_AC1(kW),z1_AC2(kW),z1_AC3(kW),z1_AC4(kW),z1_Light(kW),z1_Plug(kW),z1_S1(degC),z1_S1(RH%),...,z4_Plug(kW),z4_S1(degC),z4_S1(RH%),z4_S1(lux),z5_AC1(kW),z5_Light(kW),z5_Plug(kW),z5_S1(degC),z5_S1(RH%),z5_S1(lux)
0,1,2018-07-01 00:01:00,0.0,0.0,0.0,8.74,0.00,0.23,28.29,62.50,...,0.07,,,,0.0,0.0,0.14,27.73,62.87,0.0
1,2,2018-07-01 00:02:00,0.0,0.0,0.0,8.61,0.00,0.22,28.29,62.50,...,0.08,,,,0.0,0.0,0.14,27.72,62.88,0.0
2,3,2018-07-01 00:03:00,0.0,0.0,0.0,8.81,0.01,0.23,28.29,62.50,...,0.08,,,,0.0,0.0,0.14,27.72,62.88,0.0
3,4,2018-07-01 00:04:00,0.0,0.0,0.0,8.89,0.00,0.22,28.29,62.51,...,0.07,,,,0.0,0.0,0.14,27.72,62.88,0.0
4,5,2018-07-01 00:05:00,0.0,0.0,0.0,8.68,0.00,0.23,28.29,62.51,...,0.08,,,,0.0,0.0,0.14,27.72,62.88,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
790554,790555,2019-12-31 23:55:00,0.0,0.0,0.0,0.00,0.00,0.23,27.62,63.19,...,0.05,26.45,66.64,0.0,0.0,0.0,0.19,27.80,67.67,0.0
790555,790556,2019-12-31 23:56:00,0.0,0.0,0.0,0.00,0.01,0.23,27.63,63.17,...,0.03,26.45,66.68,0.0,0.0,0.0,0.14,27.80,67.70,0.0
790556,790557,2019-12-31 23:57:00,0.0,0.0,0.0,0.00,0.00,0.23,27.63,63.17,...,0.05,26.45,66.68,0.0,0.0,0.0,0.18,27.70,67.70,0.0
790557,790558,2019-12-31 23:58:00,0.0,0.0,0.0,0.00,0.00,0.23,27.62,63.18,...,0.03,26.45,66.65,0.0,0.0,0.0,0.14,27.80,67.70,0.0


### Solving Blank Cell Problem assuming Consumption Value '0'

In [57]:
data = data.fillna(0)

In [58]:
data.describe()

Unnamed: 0,z1_AC1(kW),z1_AC2(kW),z1_AC3(kW),z1_AC4(kW),z1_Light(kW),z1_Plug(kW),z1_S1(degC),z1_S1(RH%),z1_S1(lux),z2_AC1(kW),...,z4_Plug(kW),z4_S1(degC),z4_S1(RH%),z4_S1(lux),z5_AC1(kW),z5_Light(kW),z5_Plug(kW),z5_S1(degC),z5_S1(RH%),z5_S1(lux)
count,790559.0,790559.0,790559.0,790559.0,790559.0,790559.0,790559.0,790559.0,790559.0,790559.0,...,790559.0,790559.0,790559.0,790559.0,790559.0,790559.0,790559.0,790559.0,790559.0,790559.0
mean,0.000309,0.009051,0.000966,4.514454,1.417981,0.963996,17.086992,41.557118,15.470694,3.220222,...,0.389162,13.61943,35.0807,13.975827,1.656883,0.469562,0.224868,16.189863,44.723494,12.2735
std,0.030544,0.133802,0.041527,9.664746,1.88148,1.066666,12.297406,29.713895,25.936497,7.003122,...,0.45111,12.448004,31.773315,25.024414,3.898456,0.7252,0.214992,11.720073,32.352634,25.061398
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.3,0.0,0.0,0.0,0.0,...,0.08,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.38,24.82,59.31,0.0,0.0,...,0.1,18.98,56.45,0.0,0.0,0.0,0.16,23.58,63.11,0.0
75%,0.0,0.0,0.0,0.0,3.27,1.4,26.68,63.79,31.0,0.0,...,0.77,25.34,63.99,8.0,0.0,1.01,0.31,25.19,69.71,1.0
max,22.29,20.28,9.91,51.69,63.11,87.11,31.9,78.8,87.0,58.4,...,9.52,30.99,85.3,111.0,56.46,58.61,3.14,30.9,84.81,87.0


In [59]:
data.head(5)

Unnamed: 0,Date,z1_AC1(kW),z1_AC2(kW),z1_AC3(kW),z1_AC4(kW),z1_Light(kW),z1_Plug(kW),z1_S1(degC),z1_S1(RH%),z1_S1(lux),...,z4_Plug(kW),z4_S1(degC),z4_S1(RH%),z4_S1(lux),z5_AC1(kW),z5_Light(kW),z5_Plug(kW),z5_S1(degC),z5_S1(RH%),z5_S1(lux)
1,2018-07-01 00:01:00,0.0,0.0,0.0,8.74,0.0,0.23,28.29,62.5,0.0,...,0.07,0.0,0.0,0.0,0.0,0.0,0.14,27.73,62.87,0.0
2,2018-07-01 00:02:00,0.0,0.0,0.0,8.61,0.0,0.22,28.29,62.5,0.0,...,0.08,0.0,0.0,0.0,0.0,0.0,0.14,27.72,62.88,0.0
3,2018-07-01 00:03:00,0.0,0.0,0.0,8.81,0.01,0.23,28.29,62.5,0.0,...,0.08,0.0,0.0,0.0,0.0,0.0,0.14,27.72,62.88,0.0
4,2018-07-01 00:04:00,0.0,0.0,0.0,8.89,0.0,0.22,28.29,62.51,0.0,...,0.07,0.0,0.0,0.0,0.0,0.0,0.14,27.72,62.88,0.0
5,2018-07-01 00:05:00,0.0,0.0,0.0,8.68,0.0,0.23,28.29,62.51,0.0,...,0.08,0.0,0.0,0.0,0.0,0.0,0.14,27.72,62.88,0.0


### Defining Target Variable

 Here We have converted Lux, DegC and Rh% into kW units for Data simplicity 
(due to Consideration of GPU power and time)

kW  = Lux/100

kW  = [Temperature(degC)*2] + [Humidity(Rh%)/2]

In [62]:
# load_kw = ['z1_Light(kW)', 'z1_Plug(kW)', 'z2_AC1(kW)', 'z2_AC2(kW)', 'z2_AC3(kW)', 'z2_AC4(kW)', 'z2_Light(kW)',
#            'z2_Plug(kW)', 'z3_Light(kW)', 'z3_Plug(kW)', 'z4_Light(kW)', 'z1_AC1(kW)', 'z2_AC5(kW)', 'z2_AC6(kW)',
#            'z2_AC7(kW)', 'z2_AC8(kW)', 'z2_AC9(kW)', 'z2_AC10(kW)', 'z2_AC11(kW)', 'z2_AC12(kW)', 'z2_AC13(kW)',
#            'z2_AC14(kW)', 'z4_AC1(kW)', 'z4_Plug(kW)', 'z1_AC2(kW)', 'z1_AC3(kW)', 'z1_AC4(kW)', 'z5_AC1(kW)',
#            'z5_Light(kW)', 'z5_Plug(kW)', 'z4_AC2(kW)', 'z4_AC3(kW)', 'z4_AC4(kW)' ]

# load_degC = ['z1_S1(degC)', 'z2_S1(degC)', 'z3_S1(degC)', 'z4_S1(degC)', 'z5_S1(degC)']

# load_rh = ['z1_S1(RH%)', 'z2_S1(RH%)', 'z3_S1(RH%)', 'z4_S1(RH%)', 'z5_S1(RH%)' ]

# load_lux = ['z1_S1(lux)', 'z2_S1(lux)', 'z3_S1(lux)', 'z4_S1(lux)', 'z5_S1(lux)' ]

load_kw = ['z1_Light(kW)', 'z1_Plug(kW)', 'z2_AC1(kW)', 'z2_Light(kW)',
           'z2_Plug(kW)', 'z3_Light(kW)', 'z3_Plug(kW)', 'z4_Light(kW)', 'z4_AC1(kW)', 'z4_Plug(kW)', 'z1_AC2(kW)', 'z1_AC3(kW)', 'z1_AC4(kW)', 'z5_AC1(kW)',
           'z5_Light(kW)', 'z5_Plug(kW)' ]

load_degC = ['z1_S1(degC)', 'z2_S1(degC)', 'z4_S1(degC)', 'z5_S1(degC)']

load_rh = ['z1_S1(RH%)', 'z2_S1(RH%)', 'z4_S1(RH%)', 'z5_S1(RH%)' ]

load_lux = ['z1_S1(lux)', 'z2_S1(lux)', 'z4_S1(lux)', 'z5_S1(lux)' ]

data['total_kw'] = data.loc[ :, load_kw].sum(axis = 1)

data['total_lux'] = (data.loc[ :, load_lux].sum(axis=1))/100

data['total_rh'] = (data.loc[ :, load_rh].sum(axis=1))/2

data['total_degC'] = (data.loc[ :, load_degC].sum(axis=1))*2


data.head(5)





Unnamed: 0,Date,z1_AC1(kW),z1_AC2(kW),z1_AC3(kW),z1_AC4(kW),z1_Light(kW),z1_Plug(kW),z1_S1(degC),z1_S1(RH%),z1_S1(lux),...,z5_AC1(kW),z5_Light(kW),z5_Plug(kW),z5_S1(degC),z5_S1(RH%),z5_S1(lux),total_kw,total_lux,total_rh,total_degC
1,2018-07-01 00:01:00,0.0,0.0,0.0,8.74,0.0,0.23,28.29,62.5,0.0,...,0.0,0.0,0.14,27.73,62.87,0.0,9.74,0.0,95.555,166.66
2,2018-07-01 00:02:00,0.0,0.0,0.0,8.61,0.0,0.22,28.29,62.5,0.0,...,0.0,0.0,0.14,27.72,62.88,0.0,9.61,0.0,95.56,166.64
3,2018-07-01 00:03:00,0.0,0.0,0.0,8.81,0.01,0.23,28.29,62.5,0.0,...,0.0,0.0,0.14,27.72,62.88,0.0,9.83,0.0,95.555,166.64
4,2018-07-01 00:04:00,0.0,0.0,0.0,8.89,0.0,0.22,28.29,62.51,0.0,...,0.0,0.0,0.14,27.72,62.88,0.0,9.88,0.0,95.56,166.62
5,2018-07-01 00:05:00,0.0,0.0,0.0,8.68,0.0,0.23,28.29,62.51,0.0,...,0.0,0.0,0.14,27.72,62.88,0.0,9.69,0.0,95.56,166.62


# Target Variable Dataframe and its Statistical Data

In [66]:
new_data = pd.DataFrame()
new_data = data.loc[:, ['total_kw', 'total_lux', 'total_rh', 'total_degC']]

new_data.head(5)

Unnamed: 0,total_kw,total_lux,total_rh,total_degC
1,9.74,0.0,95.555,166.66
2,9.61,0.0,95.56,166.64
3,9.83,0.0,95.555,166.64
4,9.88,0.0,95.56,166.62
5,9.69,0.0,95.56,166.62


In [67]:
new_data.describe()

Unnamed: 0,total_kw,total_lux,total_rh,total_degC
count,790559.0,790559.0,790559.0,790559.0
mean,20.095084,0.530925,82.484251,126.139832
std,33.410005,0.870776,58.149937,89.779749
min,0.0,0.0,0.0,0.0
25%,1.08,0.0,0.0,0.0
50%,2.17,0.0,109.505,160.6
75%,14.21,0.92,129.81,201.78
max,234.8,3.04,156.75,245.28


# Data Preprocessing

### Data Normalization using MinMaxScaler

In [68]:
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(new_data)

In [70]:
data_scaled

array([[0.04148211, 0.        , 0.60960128, 0.67946836],
       [0.04092845, 0.        , 0.60963317, 0.67938682],
       [0.04186542, 0.        , 0.60960128, 0.67938682],
       ...,
       [0.00413118, 0.        , 0.83177033, 0.88837247],
       [0.00370528, 0.        , 0.83180223, 0.88902479],
       [0.00396082, 0.        , 0.83180223, 0.88902479]])

In [71]:
features = data_scaled
target = data_scaled[:,0]

# Training

In [72]:
TimeseriesGenerator(features, target, length=2, sampling_rate=1, batch_size=1)[0]

(array([[[0.04148211, 0.        , 0.60960128, 0.67946836],
         [0.04092845, 0.        , 0.60963317, 0.67938682]]]),
 array([0.04186542]))

### Split Data into Train and Test (80-20)

In [73]:
x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.20, random_state=123, shuffle = False)

In [74]:
x_train.shape

(632447, 4)

In [75]:
x_test.shape

(158112, 4)

In [88]:
win_length= 720
batch_size = 224
num_features=4

train_generator = TimeseriesGenerator(x_train, y_train, length=win_length, sampling_rate=1, batch_size=batch_size)
test_generator = TimeseriesGenerator(x_test, y_test, length=win_length, sampling_rate=1, batch_size=batch_size)


In [77]:
train_generator[0]

(array([[[0.04148211, 0.        , 0.60960128, 0.67946836],
         [0.04092845, 0.        , 0.60963317, 0.67938682],
         [0.04186542, 0.        , 0.60960128, 0.67938682],
         ...,
         [0.05144804, 0.04276316, 0.79100478, 0.93778539],
         [0.04808348, 0.04276316, 0.79097289, 0.93770385],
         [0.04914821, 0.04276316, 0.79097289, 0.93778539]],
 
        [[0.04092845, 0.        , 0.60963317, 0.67938682],
         [0.04186542, 0.        , 0.60960128, 0.67938682],
         [0.04207836, 0.        , 0.60963317, 0.67930528],
         ...,
         [0.04808348, 0.04276316, 0.79097289, 0.93770385],
         [0.04914821, 0.04276316, 0.79097289, 0.93778539],
         [0.0467632 , 0.04276316, 0.79090909, 0.93778539]],
 
        [[0.04186542, 0.        , 0.60960128, 0.67938682],
         [0.04207836, 0.        , 0.60963317, 0.67930528],
         [0.04126917, 0.        , 0.60963317, 0.67930528],
         ...,
         [0.04914821, 0.04276316, 0.79097289, 0.93778539],
        

### Neural Network Layers

In [89]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(32, input_shape = (win_length, num_features), return_sequences=True))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))
model.add(tf.keras.layers.LSTM(32, return_sequences=True))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.LSTM(16, return_sequences=False))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(1))

In [90]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_9 (LSTM)               (None, 720, 32)           4736      
                                                                 
 leaky_re_lu_6 (LeakyReLU)   (None, 720, 32)           0         
                                                                 
 lstm_10 (LSTM)              (None, 720, 32)           8320      
                                                                 
 leaky_re_lu_7 (LeakyReLU)   (None, 720, 32)           0         
                                                                 
 dropout_6 (Dropout)         (None, 720, 32)           0         
                                                                 
 lstm_11 (LSTM)              (None, 16)                3136      
                                                                 
 dropout_7 (Dropout)         (None, 16)               

### Early Stopping Criterias

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, mode=min)

model.compile(loss=tf.losses.MeanSquaredError(), optimizer=tf.optimizers.Adam(), metrics=[tf.metrics.MeanAbsoluteError()])

history = model.fit(train_generator, epochs=50, validation_data=test_generator, shuffle=False, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

In [None]:
model.evaluate_generator(test_generator, verbose=0)

In [None]:
predictions=model.predict_generator(test_generator)

In [None]:
predictions.shape[0]

In [None]:
predictions

In [None]:
y_test

In [None]:
x_test

In [None]:
x_test[:,1:][win_length:]

In [None]:
df_pred= pd.concat([pd.DataFrame(predictions), pd.DataFrame(x_test[:,1:][win_length:])],axis=1)

### Reverse Transformation of Scaled Data into Original Data

In [None]:
rev_trans = scaler.inverse_transform(df_pred)

In [None]:
rev_trans

## Output

In [None]:
df_final = data[predictions.shape[0]*-1:]

In [None]:
df_final.count()

In [None]:
df_final['App_Pred'] = rev_trans[:,0]

In [None]:
df_final.head(5)

In [None]:
df_final[['total_kw', 'App_Pred']].iplot()