In [None]:
import numpy as np
import csv
import matplotlib.pyplot as plt
%matplotlib inline

# load from csv
series = []
with open("/content/tokyo_max_temp.csv", "r") as f:
    reader = csv.reader(f)
    for row in reader:
        series.append(row)
series = list(map(float, series[0]))
print("Total data points: {}".format(len(series)))

Total data points: 1728


In [None]:
# take 80% of data as train set
train_size = 0.8
split_time = int(train_size * len(series))
# series data
series_train =  series[:split_time]
series_test = series[split_time:]
print("Number of data points for training: {}, for testing: {}".format(len(series_train), len(series_test)))
# time step data
time = np.arange(0, len(series))
time_train = time[:split_time]
time_test = time[split_time:]

# variables for window dataset
window_size = 24
batch_size = 64
shuffle_buffer_size = 1000

#from tensorflow.python.data import Dataset
import tensorflow as tf
#from tensorflow.data import Dataset
from tensorflow.python.data import Dataset

def window_dataset(series, window_size, batch_size, shuffle_buffer_size):
    '''
    windowing the time series data to be a windowed dataset
    
        Parameters:
            series: time series data 
            window_size: desired number of data points as inputs of the network
            batch_size: batching the input of the network
            shuffle_buffer_size: buffer size for shuffle method
            
        Returns:
            windowed dataset
    '''
    series = tf.expand_dims(series, axis=-1)
    ds = Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda x: x.batch(window_size + 1))
    ds = ds.map(lambda x: (x[:-1], x[-1:]))
    ds = ds.shuffle(shuffle_buffer_size)
    return ds.batch(batch_size).prefetch(1)

def forecast(model, series, window_size):
    '''
    forecasting the time series data
    
        Parameters:
            model: trained network model
            series: time series data to be forecasted
            window_size: desired number of data points as inputs of the network
            
        Returns:
            forecast: forecasted time series data
    '''
    series = tf.expand_dims(series, axis=-1)
    ds = Dataset.from_tensor_slices(series)
    ds = ds.window(window_size, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda x: x.batch(window_size))
    ds = ds.batch(batch_size).prefetch(1)
    forecast = model.predict(ds)
    return forecast


train_set = window_dataset(series_train, window_size, batch_size, shuffle_buffer_size)
test_set = window_dataset(series_test, window_size, batch_size, shuffle_buffer_size)

Number of data points for training: 1382, for testing: 346


In [None]:
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Input(shape=window_size))
model.add(tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=1)))
model.add(tf.keras.layers.SimpleRNN(64, kernel_regularizer=tf.keras.regularizers.L1(0.01),activity_regularizer=tf.keras.regularizers.L2(0.01)))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(128, activation='elu'))
model.add(tf.keras.layers.Dense(1))

model.compile(loss=tf.keras.losses.Huber(), optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3, momentum=0.9), metrics=["mae"])
callbacks = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=3)

hist = model.fit(train_set, epochs=100, validation_data=test_set, callbacks=[callbacks])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100


In [None]:
model.save('TokyoClimate.h5')