In [7]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import datetime
import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import sklearn.preprocessing
from sklearn.metrics import r2_score
from windows import WindowGenerator, MultiStepLastBaseline, compile_and_fit
from datetime import datetime
from keras.layers import Dense,Dropout,SimpleRNN,LSTM
from keras.models import Sequential

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

In [8]:
dir_path = '/home/hanlinn/00.projects/tensorflow-prepare/example-file/energy/PJME_hourly.csv'

In [16]:
df = pd.read_csv(dir_path)
df.head()

Unnamed: 0,Datetime,PJME_MW
0,2002-12-31 01:00:00,26498.0
1,2002-12-31 02:00:00,25147.0
2,2002-12-31 03:00:00,24574.0
3,2002-12-31 04:00:00,24393.0
4,2002-12-31 05:00:00,24860.0


In [17]:
#checking missing data
df.isna().sum()


Datetime    0
PJME_MW     0
dtype: int64

In [19]:
date_time = pd.to_datetime(df.pop('Datetime'), format='%Y-%m-%d %H:%M:%S')

In [20]:
timestamp_s = date_time.map(pd.Timestamp.timestamp)
day = 24*60*60
year = (365.2425)*day

df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))

In [21]:
df.head()

Unnamed: 0,PJME_MW,Day sin,Day cos,Year sin,Year cos
0,26498.0,0.258819,0.965926,-0.016528,0.999863
1,25147.0,0.5,0.866025,-0.015812,0.999875
2,24574.0,0.707107,0.707107,-0.015095,0.999886
3,24393.0,0.866025,0.5,-0.014378,0.999897
4,24860.0,0.965926,0.258819,-0.013661,0.999907


In [22]:
#Check and normalize
print(f"The original dataset shape is :{df.shape}")
from sklearn.preprocessing import minmax_scale
scaled_df = minmax_scale(df)
print(f"The scaled dataset shape is : {scaled_df.shape}")

# check column name
column_names = list(df.columns.values)
scaled_df = pd.DataFrame(scaled_df.tolist(),columns=column_names)

#Train_test_split
n = len(scaled_df)
train_df = scaled_df[0:int(n*0.7)]
val_df = scaled_df[int(n*0.7):int(n*0.9)]
test_df = scaled_df[int(n*0.9):]

num_features = scaled_df.shape[1]
print(f"The train shape is : {train_df.shape} \n"
      f"The valid shape is : {val_df.shape} \n"
      f"The test shape is : {test_df.shape} \n"
      f"The number of feature: {num_features}")


OUT_STEPS = 24
multi_window = WindowGenerator(input_width=24,
                               label_width=OUT_STEPS,
                               shift=OUT_STEPS,
                               train_df= train_df,
                               val_df= val_df,
                               test_df= test_df)

#multi_window.plot()
print(multi_window)

The original dataset shape is :(145366, 5)
The scaled dataset shape is : (145366, 5)
The train shape is : (101756, 5) 
The valid shape is : (29073, 5) 
The test shape is : (14537, 5) 
The number of feature: 5
Total window size: 48
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
Label indices: [24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47]
Label column name(s): None


In [23]:
last_baseline = MultiStepLastBaseline()
last_baseline.compile(loss=tf.keras.losses.MeanSquaredError(),
                      metrics=['mae'])

val_performance = {}
performance = {}

val_performance['Last'] = last_baseline.evaluate(multi_window.val)
performance['Last'] = last_baseline.evaluate(multi_window.test, verbose=0)
#multi_window.plot(last_baseline)




In [24]:


lstm_model = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(32, return_sequences=True),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(units=num_features)
])

history = compile_and_fit(lstm_model, multi_window)

IPython.display.clear_output()
val_performance['LSTM'] = lstm_model.evaluate( multi_window.val)
performance['LSTM'] = lstm_model.evaluate( multi_window.test, verbose=0)

print(performance)

{'Last': [0.10360661894083023, 0.1831669956445694], 'LSTM': [0.000937471108045429, 0.01060345396399498]}
