In [None]:
import tensorflow as tf
import statistics
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.layers import Dropout
from tensorflow.keras.regularizers import l2
from data import *
from plot import *


mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

df = pd.read_csv('resources/weather3.csv')

TRAIN_SPLIT = 60000
tf.random.set_seed(13)
BATCH_SIZE = 512
BUFFER_SIZE = 60000
EVALUATION_INTERVAL = 150
EPOCHS = 2

features_considered = ['temp', 'hum', 'airpr', 'solrad', 'windvel']
features = df[features_considered]
features.index = df['date']

dataset = features.values
data_mean = dataset[:TRAIN_SPLIT].mean(axis=0)
data_std = dataset[:TRAIN_SPLIT].std(axis=0)
dataset = (dataset-data_mean)/data_std

past_history = 48
future_target = 12
STEP = 1

x_train_multi, y_train_multi = multivariate_data(dataset, dataset[:, 1], 0,
                                                 TRAIN_SPLIT, past_history,
                                                 future_target, STEP)
x_val_multi, y_val_multi = multivariate_data(dataset, dataset[:, 1],
                                             TRAIN_SPLIT, None, past_history,
                                             future_target, STEP)


train_data_multi = tf.data.Dataset.from_tensor_slices((x_train_multi, y_train_multi))
train_data_multi = train_data_multi.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
val_data_multi = val_data_multi.batch(BATCH_SIZE).repeat()

multi_step_model = tf.keras.models.Sequential()
multi_step_model.add(tf.keras.layers.LSTM(32, kernel_regularizer=l2(0.005), 
                                          recurrent_regularizer=l2(0.005), bias_regularizer=l2(0.005), 
                                          return_sequences=True, input_shape=x_train_multi.shape[-2:]))
multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))
multi_step_model.add(tf.keras.layers.Dense(12))
multi_step_model.compile(optimizer='adam', loss='mse')
multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
                                          steps_per_epoch=EVALUATION_INTERVAL,
                                          validation_data=val_data_multi,
                                          validation_steps=50)

#winddir disregarded
#20 EPOCHS á 100 EVALUATION INTERVALS
#TARGET = 24 hours
#++++++++++++++++++++++++++++++++++++
#Not too many neurons to not overfit! Not too few to not underfit!
#More than one hidden layer leads to exploding gradient!
#More hidden layers can decrease chance of underfit
#Too low regularizer (~ 0.001) tends to cause underfit!
#winddir disregarded
#30 EPOCHS á 150 EVALUATION INTERVALS
#TARGET = 12 hours
#HISTORY = 48
#Adding l2 Regularizer of 0.005 to avoid overfitting
#BATCH SIZE = 512
#BUFFER SIZE = 60000

In [None]:
plot_train_history(multi_step_history, 'Multi-step training and validation loss')

In [None]:
for x, y in val_data_multi.take(50):
    multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0], 'Multi-step time series prediction', STEP)